From 154ef67c8ed1caf4ba9077f9830de5f0d4214fb8 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 25 Mar 2024 16:31:16 -0400 Subject: [PATCH 001/190] Prepare to offer (dynamic) `.msg.Codec` overrides By simply allowing an input `codec: tuple` of funcs for now to the `MsgpackTCPStream` transport but, ideally wrapping this in a `Codec` type with an API for dynamic extension of the interchange lib's msg processing settings. Right now we're tied to `msgspec.msgpack` for this transport but with the right design this can likely extend to other libs in the future. Relates to starting feature work toward #36, #196, #365. --- tractor/_ipc.py | 43 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/tractor/_ipc.py b/tractor/_ipc.py index f57d3bd8..2b5df698 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -30,6 +30,7 @@ import struct import typing from typing import ( Any, + Callable, runtime_checkable, Protocol, Type, @@ -123,6 +124,16 @@ class MsgpackTCPStream(MsgTransport): stream: trio.SocketStream, prefix_size: int = 4, + # XXX optionally provided codec pair for `msgspec`: + # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types + # + # TODO: define this as a `Codec` struct which can be + # overriden dynamically by the application/runtime. + codec: tuple[ + Callable[[Any], Any]|None, # coder + Callable[[type, Any], Any]|None, # decoder + ]|None = None, + ) -> None: self.stream = stream @@ -138,12 +149,18 @@ class MsgpackTCPStream(MsgTransport): # public i guess? self.drained: list[dict] = [] - self.recv_stream = BufferedReceiveStream(transport_stream=stream) + self.recv_stream = BufferedReceiveStream( + transport_stream=stream + ) self.prefix_size = prefix_size # TODO: struct aware messaging coders - self.encode = msgspec.msgpack.Encoder().encode - self.decode = msgspec.msgpack.Decoder().decode # dict[str, Any]) + self.encode = msgspec.msgpack.Encoder( + enc_hook=codec[0] if codec else None, + ).encode + self.decode = msgspec.msgpack.Decoder( + dec_hook=codec[1] if codec else None, + ).decode async def _iter_packets(self) -> AsyncGenerator[dict, None]: '''Yield packets from the underlying stream. @@ -349,9 +366,25 @@ class Channel: stream: trio.SocketStream, type_key: tuple[str, str]|None = None, + # XXX optionally provided codec pair for `msgspec`: + # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types + codec: tuple[ + Callable[[Any], Any], # coder + Callable[[type, Any], Any], # decoder + ]|None = None, + ) -> MsgTransport: - type_key = type_key or self._transport_key - self._transport = get_msg_transport(type_key)(stream) + type_key = ( + type_key + or + self._transport_key + ) + self._transport = get_msg_transport( + type_key + )( + stream, + codec=codec, + ) return self._transport def __repr__(self) -> str: -- 2.34.1 From 213e083dc604a2d1cd032ddfcceca431748fdcbc Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 26 Mar 2024 15:50:47 -0400 Subject: [PATCH 002/190] Proto `MsgCodec`, an interchange fmt modify API Fitting in line with the issues outstanding: - #36: (msg)spec-ing out our SCIPP (structured-con-inter-proc-prot). (https://github.com/goodboy/tractor/issues/36) - #196: adding strictly typed IPC msg dialog schemas, more or less better described as "dialog/transaction scoped message specs" using `msgspec`'s tagged unions and custom codecs. (https://github.com/goodboy/tractor/issues/196) - #365: using modern static type-annots to drive capability based messaging and RPC. (statically https://github.com/goodboy/tractor/issues/365) This is a first draft of a new API for dynamically overriding IPC msg codecs for a given interchange lib from any task in the runtime. Right now we obviously only support `msgspec` but ideally this API holds general enough to be used for other backends eventually (like `capnproto`, and apache arrow). Impl is in a new `tractor.msg._codec` with: - a new `MsgCodec` type for encapsing `msgspec.msgpack.Encoder/Decoder` pairs and configuring any custom enc/dec_hooks or typed decoding. - factory `mk_codec()` for creating new codecs ad-hoc from a task. - `contextvars` support for a new `trio.Task` scoped `_ctxvar_MsgCodec: ContextVar[MsgCodec]` named 'msgspec_codec'. - `apply_codec()` for temporarily modifying the above per task as needed around `.open_context()` / `.open_stream()` operation. A new test (suite) in `test_caps_msging.py`: - verify a parent and its child can enable the same custom codec (in this case to transmit `NamespacePath`s) with tons of pedantic ctx-vars checks. - ToDo: still need to implement #36 msg types in order to be able to get decodes working (as in `MsgStream.receive()` will deliver an already created `NamespacePath` obj) since currently all msgs come packed in `dict`-msg wrapper packets.. -> use the proto from PR #35 to get nested `msgspec.Raw` processing up and running Bo --- tests/test_caps_msging.py | 198 +++++++++++++++++++++++++++++ tractor/_ipc.py | 74 ++++++++--- tractor/msg/__init__.py | 10 ++ tractor/msg/_codec.py | 253 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 519 insertions(+), 16 deletions(-) create mode 100644 tests/test_caps_msging.py create mode 100644 tractor/msg/_codec.py diff --git a/tests/test_caps_msging.py b/tests/test_caps_msging.py new file mode 100644 index 00000000..f659cb13 --- /dev/null +++ b/tests/test_caps_msging.py @@ -0,0 +1,198 @@ +''' +Functional audits for our "capability based messaging (schema)" feats. + +B~) + +''' +from typing import ( + Any, + Type, +) +from contextvars import ( + Context, +) + +import tractor +from tractor.msg import ( + _def_msgspec_codec, + _ctxvar_MsgCodec, + + NamespacePath, + MsgCodec, + mk_codec, + apply_codec, + current_msgspec_codec, +) +import trio + +# TODO: wrap these into `._codec` such that user can just pass +# a type table of some sort? +def enc_hook(obj: Any) -> Any: + if isinstance(obj, NamespacePath): + return str(obj) + else: + raise NotImplementedError( + f'Objects of type {type(obj)} are not supported' + ) + + +def dec_hook(type: Type, obj: Any) -> Any: + print(f'type is: {type}') + if type is NamespacePath: + return NamespacePath(obj) + else: + raise NotImplementedError( + f'Objects of type {type(obj)} are not supported' + ) + + +def ex_func(*args): + print(f'ex_func({args})') + + +def mk_custom_codec() -> MsgCodec: + # apply custom hooks and set a `Decoder` which only + # loads `NamespacePath` types. + nsp_codec: MsgCodec = mk_codec( + dec_types=NamespacePath, + enc_hook=enc_hook, + dec_hook=dec_hook, + ) + + # TODO: validate `MsgCodec` interface/semantics? + # -[ ] simple field tests to ensure caching + reset is workin? + # -[ ] custom / changing `.decoder()` calls? + # + # dec = nsp_codec.decoder( + # types=NamespacePath, + # ) + # assert nsp_codec.dec is dec + return nsp_codec + + +@tractor.context +async def send_back_nsp( + ctx: tractor.Context, + +) -> None: + ''' + Setup up a custom codec to load instances of `NamespacePath` + and ensure we can round trip a func ref with our parent. + + ''' + task: trio.Task = trio.lowlevel.current_task() + task_ctx: Context = task.context + assert _ctxvar_MsgCodec not in task_ctx + + nsp_codec: MsgCodec = mk_custom_codec() + with apply_codec(nsp_codec) as codec: + chk_codec_applied( + custom_codec=nsp_codec, + enter_value=codec, + ) + + nsp = NamespacePath.from_ref(ex_func) + await ctx.started(nsp) + + async with ctx.open_stream() as ipc: + async for msg in ipc: + + assert msg == f'{__name__}:ex_func' + + # TODO: as per below + # assert isinstance(msg, NamespacePath) + assert isinstance(msg, str) + + +def chk_codec_applied( + custom_codec: MsgCodec, + enter_value: MsgCodec, +) -> MsgCodec: + + task: trio.Task = trio.lowlevel.current_task() + task_ctx: Context = task.context + + assert _ctxvar_MsgCodec in task_ctx + curr_codec: MsgCodec = task.context[_ctxvar_MsgCodec] + + assert ( + # returned from `mk_codec()` + custom_codec is + + # yielded value from `apply_codec()` + enter_value is + + # read from current task's `contextvars.Context` + curr_codec is + + # public API for all of the above + current_msgspec_codec() + + # the default `msgspec` settings + is not _def_msgspec_codec + ) + + +def test_codec_hooks_mod(): + ''' + Audit the `.msg.MsgCodec` override apis details given our impl + uses `contextvars` to accomplish per `trio` task codec + application around an inter-proc-task-comms context. + + ''' + async def main(): + task: trio.Task = trio.lowlevel.current_task() + task_ctx: Context = task.context + assert _ctxvar_MsgCodec not in task_ctx + + async with tractor.open_nursery() as an: + p: tractor.Portal = await an.start_actor( + 'sub', + enable_modules=[__name__], + ) + + # TODO: 2 cases: + # - codec not modified -> decode nsp as `str` + # - codec modified with hooks -> decode nsp as + # `NamespacePath` + nsp_codec: MsgCodec = mk_custom_codec() + with apply_codec(nsp_codec) as codec: + chk_codec_applied( + custom_codec=nsp_codec, + enter_value=codec, + ) + + async with ( + p.open_context( + send_back_nsp, + ) as (ctx, first), + ctx.open_stream() as ipc, + ): + # ensure codec is still applied across + # `tractor.Context` + its embedded nursery. + chk_codec_applied( + custom_codec=nsp_codec, + enter_value=codec, + ) + + assert first == f'{__name__}:ex_func' + # TODO: actually get the decoder loading + # to native once we spec our SCIPP msgspec + # (structurred-conc-inter-proc-protocol) + # implemented as per, + # https://github.com/goodboy/tractor/issues/36 + # + # assert isinstance(first, NamespacePath) + assert isinstance(first, str) + await ipc.send(first) + + with trio.move_on_after(1): + async for msg in ipc: + + # TODO: as per above + # assert isinstance(msg, NamespacePath) + assert isinstance(msg, str) + + await p.cancel_actor() + + trio.run(main) diff --git a/tractor/_ipc.py b/tractor/_ipc.py index 2b5df698..5aafda3f 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -23,7 +23,10 @@ from collections.abc import ( AsyncGenerator, AsyncIterator, ) -from contextlib import asynccontextmanager as acm +from contextlib import ( + asynccontextmanager as acm, + contextmanager as cm, +) import platform from pprint import pformat import struct @@ -37,12 +40,15 @@ from typing import ( TypeVar, ) -import msgspec from tricycle import BufferedReceiveStream import trio from tractor.log import get_logger from tractor._exceptions import TransportClosed +from tractor.msg import ( + _ctxvar_MsgCodec, + MsgCodec, +) log = get_logger(__name__) @@ -154,13 +160,9 @@ class MsgpackTCPStream(MsgTransport): ) self.prefix_size = prefix_size - # TODO: struct aware messaging coders - self.encode = msgspec.msgpack.Encoder( - enc_hook=codec[0] if codec else None, - ).encode - self.decode = msgspec.msgpack.Decoder( - dec_hook=codec[1] if codec else None, - ).decode + # allow for custom IPC msg interchange format + # dynamic override Bo + self.codec: MsgCodec = codec or MsgCodec() async def _iter_packets(self) -> AsyncGenerator[dict, None]: '''Yield packets from the underlying stream. @@ -199,7 +201,23 @@ class MsgpackTCPStream(MsgTransport): log.transport(f"received {msg_bytes}") # type: ignore try: - yield self.decode(msg_bytes) + # NOTE: lookup the `trio.Task.context`'s var for + # the current `MsgCodec`. + yield _ctxvar_MsgCodec.get().decode(msg_bytes) + + # TODO: remove, was only for orig draft impl + # testing. + # + # curr_codec: MsgCodec = _ctxvar_MsgCodec.get() + # obj = curr_codec.decode(msg_bytes) + # if ( + # curr_codec is not + # _codec._def_msgspec_codec + # ): + # print(f'OBJ: {obj}\n') + # + # yield obj + except ( msgspec.DecodeError, UnicodeDecodeError, @@ -235,7 +253,10 @@ class MsgpackTCPStream(MsgTransport): # __tracebackhide__: bool = hide_tb async with self._send_lock: - bytes_data: bytes = self.encode(msg) + # NOTE: lookup the `trio.Task.context`'s var for + # the current `MsgCodec`. + bytes_data: bytes = _ctxvar_MsgCodec.get().encode(msg) + # bytes_data: bytes = self.codec.encode(msg) # supposedly the fastest says, # https://stackoverflow.com/a/54027962 @@ -335,7 +356,9 @@ class Channel: @property def msgstream(self) -> MsgTransport: - log.info('`Channel.msgstream` is an old name, use `._transport`') + log.info( + '`Channel.msgstream` is an old name, use `._transport`' + ) return self._transport @property @@ -368,10 +391,7 @@ class Channel: # XXX optionally provided codec pair for `msgspec`: # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types - codec: tuple[ - Callable[[Any], Any], # coder - Callable[[type, Any], Any], # decoder - ]|None = None, + codec: MsgCodec|None = None, ) -> MsgTransport: type_key = ( @@ -379,14 +399,36 @@ class Channel: or self._transport_key ) + # get transport type, then self._transport = get_msg_transport( type_key + # instantiate an instance of the msg-transport )( stream, codec=codec, ) return self._transport + # TODO: something simliar at the IPC-`Context` + # level so as to support + @cm + def apply_codec( + self, + codec: MsgCodec, + + ) -> None: + ''' + Temporarily override the underlying IPC msg codec for + dynamic enforcement of messaging schema. + + ''' + orig: MsgCodec = self._transport.codec + try: + self._transport.codec = codec + yield + finally: + self._transport.codec = orig + def __repr__(self) -> str: if not self._transport: return '' diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index 906627cf..e2296788 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -24,3 +24,13 @@ from .ptr import ( from .types import ( Struct as Struct, ) +from ._codec import ( + + _def_msgspec_codec as _def_msgspec_codec, + _ctxvar_MsgCodec as _ctxvar_MsgCodec, + + apply_codec as apply_codec, + mk_codec as mk_codec, + MsgCodec as MsgCodec, + current_msgspec_codec as current_msgspec_codec, +) diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py new file mode 100644 index 00000000..0da454ad --- /dev/null +++ b/tractor/msg/_codec.py @@ -0,0 +1,253 @@ +# tractor: structured concurrent "actors". +# Copyright 2018-eternity Tyler Goodlet. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +IPC msg interchange codec management. + +Supported backend libs: +- `msgspec.msgpack` + +ToDo: backends we prolly should offer: + +- see project/lib list throughout GH issue discussion comments: + https://github.com/goodboy/tractor/issues/196 + +- `capnproto`: https://capnproto.org/rpc.html + - https://capnproto.org/language.html#language-reference + +''' +from contextvars import ( + ContextVar, + Token, +) +from contextlib import ( + contextmanager as cm, +) +from typing import ( + Any, + Callable, + Type, + Union, +) +from types import ModuleType + +import msgspec +from msgspec import msgpack + +from .types import Struct + + +# TODO: API changes towards being interchange lib agnostic! +# -[ ] capnproto has pre-compiled schema for eg.. +# * https://capnproto.org/language.html +# * http://capnproto.github.io/pycapnp/quickstart.html +# * https://github.com/capnproto/pycapnp/blob/master/examples/addressbook.capnp +class MsgCodec(Struct): + ''' + A IPC msg interchange format lib's encoder + decoder pair. + + ''' + + lib: ModuleType = msgspec + + # ad-hoc type extensions + # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types + enc_hook: Callable[[Any], Any]|None = None # coder + dec_hook: Callable[[type, Any], Any]|None = None # decoder + + # struct type unions + # https://jcristharif.com/msgspec/structs.html#tagged-unions + types: Union[Type[Struct]]|Any = Any + + # post-configure cached props + _enc: msgpack.Encoder|None = None + _dec: msgpack.Decoder|None = None + + + # TODO: use `functools.cached_property` for these ? + # https://docs.python.org/3/library/functools.html#functools.cached_property + @property + def enc(self) -> msgpack.Encoder: + return self._enc or self.encoder() + + def encoder( + self, + enc_hook: Callable|None = None, + reset: bool = False, + + # TODO: what's the default for this? + # write_buffer_size: int + **kwargs, + + ) -> msgpack.Encoder: + ''' + Set or get the maybe-cached `msgspec.msgpack.Encoder` + instance configured for this codec. + + When `reset=True` any previously configured encoder will + be recreated and then cached with the new settings passed + as input. + + ''' + if ( + self._enc is None + or reset + ): + self._enc = self.lib.msgpack.Encoder( + enc_hook=enc_hook or self.enc_hook, + # write_buffer_size=write_buffer_size, + ) + + return self._enc + + def encode( + self, + py_obj: Any, + + ) -> bytes: + ''' + Encode input python objects to `msgpack` bytes for transfer + on a tranport protocol connection. + + ''' + return self.enc.encode(py_obj) + + @property + def dec(self) -> msgpack.Decoder: + return self._dec or self.decoder() + + def decoder( + self, + types: Union[Type[Struct]]|None = None, + dec_hook: Callable|None = None, + reset: bool = False, + **kwargs, + # ext_hook: ext_hook_sig + + ) -> msgpack.Decoder: + ''' + Set or get the maybe-cached `msgspec.msgpack.Decoder` + instance configured for this codec. + + When `reset=True` any previously configured decoder will + be recreated and then cached with the new settings passed + as input. + + ''' + if ( + self._dec is None + or reset + ): + self._dec = self.lib.msgpack.Decoder( + types or self.types, + dec_hook=dec_hook or self.dec_hook, + **kwargs, + ) + + return self._dec + + def decode( + self, + msg: bytes, + ) -> Any: + ''' + Decode received `msgpack` bytes into a local python object + with special `msgspec.Struct` (or other type) handling + determined by the + + ''' + + return self.dec.decode(msg) + + +# TODO: struct aware messaging coders as per: +# - https://github.com/goodboy/tractor/issues/36 +# - https://github.com/goodboy/tractor/issues/196 +# - https://github.com/goodboy/tractor/issues/365 + +def mk_codec( + libname: str = 'msgspec', + + # struct type unions set for `Decoder` + # https://jcristharif.com/msgspec/structs.html#tagged-unions + dec_types: Union[Type[Struct]]|Any = Any, + + cache_now: bool = True, + + # proxy to the `Struct.__init__()` + **kwargs, + +) -> MsgCodec: + ''' + Convenience factory for creating codecs eventually meant + to be interchange lib agnostic (i.e. once we support more then just + `msgspec` ;). + + ''' + codec = MsgCodec( + types=dec_types, + **kwargs, + ) + assert codec.lib.__name__ == libname + + # by default config and cache the codec pair for given + # input settings. + if cache_now: + assert codec.enc + assert codec.dec + + return codec + + +# instance of the default `msgspec.msgpack` codec settings, i.e. +# no custom structs, hooks or other special types. +_def_msgspec_codec: MsgCodec = mk_codec() + +# NOTE: provides for per-`trio.Task` specificity of the +# IPC msging codec used by the transport layer when doing +# `Channel.send()/.recv()` of wire data. +_ctxvar_MsgCodec: ContextVar[MsgCodec] = ContextVar( + 'msgspec_codec', + default=_def_msgspec_codec, +) + + +@cm +def apply_codec( + codec: MsgCodec, + +) -> MsgCodec: + ''' + Dynamically apply a `MsgCodec` to the current task's + runtime context such that all IPC msgs are processed + with it for that task. + + ''' + token: Token = _ctxvar_MsgCodec.set(codec) + try: + yield _ctxvar_MsgCodec.get() + finally: + _ctxvar_MsgCodec.reset(token) + + +def current_msgspec_codec() -> MsgCodec: + ''' + Return the current `trio.Task.context`'s value + for `msgspec_codec` used by `Channel.send/.recv()` + for wire serialization. + + ''' + return _ctxvar_MsgCodec.get() -- 2.34.1 From b06754db3a676c7520917193cce7958f3b0b94d1 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 6 Jul 2022 17:35:09 -0400 Subject: [PATCH 003/190] WIP tagged union message type API XXX NOTE XXX: this is a heavily modified commit from the original (ec226463) which was super out of date when rebased onto the current branch. I went through a manual conflict rework and removed all the legacy segments as well as rename-moved this original mod `tractor.msg.py` -> `tractor.msg/_old_msg.py`. Further the `NamespacePath` type def was discarded from this mod since it was from a super old version which was already moved to a `.msg.ptr` submod. As per original questions and discussion with `msgspec` author: - https://github.com/jcrist/msgspec/issues/25 - https://github.com/jcrist/msgspec/issues/140 this prototypes a new (but very naive) `msgspec.Struct` codec implementation which will be more filled out in the next commit. --- tractor/_ipc.py | 4 +- tractor/msg/_old_msg.py | 121 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 tractor/msg/_old_msg.py diff --git a/tractor/_ipc.py b/tractor/_ipc.py index 5aafda3f..b1c2ccd2 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -165,7 +165,9 @@ class MsgpackTCPStream(MsgTransport): self.codec: MsgCodec = codec or MsgCodec() async def _iter_packets(self) -> AsyncGenerator[dict, None]: - '''Yield packets from the underlying stream. + ''' + Yield `bytes`-blob decoded packets from the underlying TCP + stream using the current task's `MsgCodec`. ''' import msgspec # noqa diff --git a/tractor/msg/_old_msg.py b/tractor/msg/_old_msg.py new file mode 100644 index 00000000..823228a3 --- /dev/null +++ b/tractor/msg/_old_msg.py @@ -0,0 +1,121 @@ +# tractor: structured concurrent "actors". +# Copyright 2018-eternity Tyler Goodlet. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +Capability-based messaging specifications: or colloquially as "msgspecs". + +Includes our SCIPP (structured-con-inter-process-protocol) message type defs +and APIs for applying custom msgspec-sets for implementing un-protocol state machines. + +''' + +# TODO: integration with our ``enable_modules: list[str]`` caps sys. + +# ``pkgutil.resolve_name()`` internally uses +# ``importlib.import_module()`` which can be filtered by inserting +# a ``MetaPathFinder`` into ``sys.meta_path`` (which we could do before +# entering the ``Actor._process_messages()`` loop). +# https://github.com/python/cpython/blob/main/Lib/pkgutil.py#L645 +# https://stackoverflow.com/questions/1350466/preventing-python-code-from-importing-certain-modules +# - https://stackoverflow.com/a/63320902 +# - https://docs.python.org/3/library/sys.html#sys.meta_path + +# the new "Implicit Namespace Packages" might be relevant? +# - https://www.python.org/dev/peps/pep-0420/ + +# add implicit serialized message type support so that paths can be +# handed directly to IPC primitives such as streams and `Portal.run()` +# calls: +# - via ``msgspec``: +# - https://jcristharif.com/msgspec/api.html#struct +# - https://jcristharif.com/msgspec/extending.html +# via ``msgpack-python``: +# https://github.com/msgpack/msgpack-python#packingunpacking-of-custom-data-type + +from __future__ import annotations +from contextlib import contextmanager as cm +from typing import ( + Union, + Any, +) + +from msgspec import Struct +from msgspec.msgpack import ( + Encoder, + Decoder, +) + + +# LIFO codec stack that is appended when the user opens the +# ``configure_native_msgs()`` cm below to configure a new codec set +# which will be applied to all new (msgspec relevant) IPC transports +# that are spawned **after** the configure call is made. +_lifo_codecs: list[ + tuple[ + Encoder, + Decoder, + ], +] = [(Encoder(), Decoder())] + + +def get_msg_codecs() -> tuple[ + Encoder, + Decoder, +]: + ''' + Return the currently configured ``msgspec`` codec set. + + The defaults are defined above. + + ''' + global _lifo_codecs + return _lifo_codecs[-1] + + +@cm +def configure_native_msgs( + tagged_structs: list[Struct], +): + ''' + Push a codec set that will natively decode + tagged structs provied in ``tagged_structs`` + in all IPC transports and pop the codec on exit. + + ''' + global _lifo_codecs + + # See "tagged unions" docs: + # https://jcristharif.com/msgspec/structs.html#tagged-unions + + # "The quickest way to enable tagged unions is to set tag=True when + # defining every struct type in the union. In this case tag_field + # defaults to "type", and tag defaults to the struct class name + # (e.g. "Get")." + enc = Encoder() + + types_union = Union[tagged_structs[0]] | Any + for struct in tagged_structs[1:]: + types_union |= struct + + dec = Decoder(types_union) + + _lifo_codecs.append((enc, dec)) + try: + print("YOYOYOOYOYOYOY") + yield enc, dec + finally: + print("NONONONONON") + _lifo_codecs.pop() -- 2.34.1 From 8c23f838890489fbc12dbb1309e5991a5c439d6b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 7 Jul 2022 15:48:16 -0400 Subject: [PATCH 004/190] Re-think, `msgspec`-multi-typed msg dialogs The greasy details are strewn throughout a `msgspec` issue: https://github.com/jcrist/msgspec/issues/140 and specifically this code was mostly written as part of POC example in this comment: https://github.com/jcrist/msgspec/issues/140#issuecomment-1177850792 This work obviously pertains to our desire and prep for typed messaging and capabilities aware msg-oriented-protocols in #196. I added a "wants to have" method to `Context` showing how I think we could offer a pretty neat msg-type-set-as-capability-for-protocol system. XXX NOTE XXX: this commit was rewritten during a rebase from a very old version as per the prior commit. --- tractor/_streaming.py | 11 +++++ tractor/msg/_old_msg.py | 95 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 102 insertions(+), 4 deletions(-) diff --git a/tractor/_streaming.py b/tractor/_streaming.py index e0015fe4..90c33d31 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -533,6 +533,17 @@ class MsgStream(trio.abc.Channel): else: raise + # TODO: msg capability context api1 + # @acm + # async def enable_msg_caps( + # self, + # msg_subtypes: Union[ + # list[list[Struct]], + # Protocol, # hypothetical type that wraps a msg set + # ], + # ) -> tuple[Callable, Callable]: # payload enc, dec pair + # ... + def stream(func: Callable) -> Callable: ''' diff --git a/tractor/msg/_old_msg.py b/tractor/msg/_old_msg.py index 823228a3..240b2eca 100644 --- a/tractor/msg/_old_msg.py +++ b/tractor/msg/_old_msg.py @@ -48,11 +48,12 @@ and APIs for applying custom msgspec-sets for implementing un-protocol state mac from __future__ import annotations from contextlib import contextmanager as cm from typing import ( - Union, Any, + Optional, + Union, ) -from msgspec import Struct +from msgspec import Struct, Raw from msgspec.msgpack import ( Encoder, Decoder, @@ -95,8 +96,6 @@ def configure_native_msgs( in all IPC transports and pop the codec on exit. ''' - global _lifo_codecs - # See "tagged unions" docs: # https://jcristharif.com/msgspec/structs.html#tagged-unions @@ -119,3 +118,91 @@ def configure_native_msgs( finally: print("NONONONONON") _lifo_codecs.pop() + + +class Header(Struct, tag=True): + ''' + A msg header which defines payload properties + + ''' + uid: str + msgtype: Optional[str] = None + + +class Msg(Struct, tag=True): + ''' + The "god" msg type, a box for task level msg types. + + ''' + header: Header + payload: Raw + + +_root_dec = Decoder(Msg) +_root_enc = Encoder() + +# sub-decoders for retreiving embedded +# payload data and decoding to a sender +# side defined (struct) type. +_subdecs: dict[ + Optional[str], + Decoder] = { + None: Decoder(Any), +} + + +@cm +def enable_context( + msg_subtypes: list[list[Struct]] +) -> Decoder: + + for types in msg_subtypes: + first = types[0] + + # register using the default tag_field of "type" + # which seems to map to the class "name". + tags = [first.__name__] + + # create a tagged union decoder for this type set + type_union = Union[first] + for typ in types[1:]: + type_union |= typ + tags.append(typ.__name__) + + dec = Decoder(type_union) + + # register all tags for this union sub-decoder + for tag in tags: + _subdecs[tag] = dec + try: + yield dec + finally: + for tag in tags: + _subdecs.pop(tag) + + +def decmsg(msg: Msg) -> Any: + msg = _root_dec.decode(msg) + tag_field = msg.header.msgtype + dec = _subdecs[tag_field] + return dec.decode(msg.payload) + + +def encmsg( + dialog_id: str | int, + payload: Any, +) -> Msg: + + tag_field = None + + plbytes = _root_enc.encode(payload) + if b'type' in plbytes: + assert isinstance(payload, Struct) + tag_field = type(payload).__name__ + payload = Raw(plbytes) + + msg = Msg( + Header(dialog_id, tag_field), + payload, + ) + return _root_enc.encode(msg) -- 2.34.1 From ae434ae8a40e62b687ce4a5736dfe337b5dad7f6 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 26 Mar 2024 17:47:55 -0400 Subject: [PATCH 005/190] Merge original content from PR #311 into `.msg.types` for now --- tractor/msg/_old_msg.py | 208 ---------------------------------------- tractor/msg/types.py | 185 ++++++++++++++++++++++++++++++++++- 2 files changed, 182 insertions(+), 211 deletions(-) delete mode 100644 tractor/msg/_old_msg.py diff --git a/tractor/msg/_old_msg.py b/tractor/msg/_old_msg.py deleted file mode 100644 index 240b2eca..00000000 --- a/tractor/msg/_old_msg.py +++ /dev/null @@ -1,208 +0,0 @@ -# tractor: structured concurrent "actors". -# Copyright 2018-eternity Tyler Goodlet. - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -''' -Capability-based messaging specifications: or colloquially as "msgspecs". - -Includes our SCIPP (structured-con-inter-process-protocol) message type defs -and APIs for applying custom msgspec-sets for implementing un-protocol state machines. - -''' - -# TODO: integration with our ``enable_modules: list[str]`` caps sys. - -# ``pkgutil.resolve_name()`` internally uses -# ``importlib.import_module()`` which can be filtered by inserting -# a ``MetaPathFinder`` into ``sys.meta_path`` (which we could do before -# entering the ``Actor._process_messages()`` loop). -# https://github.com/python/cpython/blob/main/Lib/pkgutil.py#L645 -# https://stackoverflow.com/questions/1350466/preventing-python-code-from-importing-certain-modules -# - https://stackoverflow.com/a/63320902 -# - https://docs.python.org/3/library/sys.html#sys.meta_path - -# the new "Implicit Namespace Packages" might be relevant? -# - https://www.python.org/dev/peps/pep-0420/ - -# add implicit serialized message type support so that paths can be -# handed directly to IPC primitives such as streams and `Portal.run()` -# calls: -# - via ``msgspec``: -# - https://jcristharif.com/msgspec/api.html#struct -# - https://jcristharif.com/msgspec/extending.html -# via ``msgpack-python``: -# https://github.com/msgpack/msgpack-python#packingunpacking-of-custom-data-type - -from __future__ import annotations -from contextlib import contextmanager as cm -from typing import ( - Any, - Optional, - Union, -) - -from msgspec import Struct, Raw -from msgspec.msgpack import ( - Encoder, - Decoder, -) - - -# LIFO codec stack that is appended when the user opens the -# ``configure_native_msgs()`` cm below to configure a new codec set -# which will be applied to all new (msgspec relevant) IPC transports -# that are spawned **after** the configure call is made. -_lifo_codecs: list[ - tuple[ - Encoder, - Decoder, - ], -] = [(Encoder(), Decoder())] - - -def get_msg_codecs() -> tuple[ - Encoder, - Decoder, -]: - ''' - Return the currently configured ``msgspec`` codec set. - - The defaults are defined above. - - ''' - global _lifo_codecs - return _lifo_codecs[-1] - - -@cm -def configure_native_msgs( - tagged_structs: list[Struct], -): - ''' - Push a codec set that will natively decode - tagged structs provied in ``tagged_structs`` - in all IPC transports and pop the codec on exit. - - ''' - # See "tagged unions" docs: - # https://jcristharif.com/msgspec/structs.html#tagged-unions - - # "The quickest way to enable tagged unions is to set tag=True when - # defining every struct type in the union. In this case tag_field - # defaults to "type", and tag defaults to the struct class name - # (e.g. "Get")." - enc = Encoder() - - types_union = Union[tagged_structs[0]] | Any - for struct in tagged_structs[1:]: - types_union |= struct - - dec = Decoder(types_union) - - _lifo_codecs.append((enc, dec)) - try: - print("YOYOYOOYOYOYOY") - yield enc, dec - finally: - print("NONONONONON") - _lifo_codecs.pop() - - -class Header(Struct, tag=True): - ''' - A msg header which defines payload properties - - ''' - uid: str - msgtype: Optional[str] = None - - -class Msg(Struct, tag=True): - ''' - The "god" msg type, a box for task level msg types. - - ''' - header: Header - payload: Raw - - -_root_dec = Decoder(Msg) -_root_enc = Encoder() - -# sub-decoders for retreiving embedded -# payload data and decoding to a sender -# side defined (struct) type. -_subdecs: dict[ - Optional[str], - Decoder] = { - None: Decoder(Any), -} - - -@cm -def enable_context( - msg_subtypes: list[list[Struct]] -) -> Decoder: - - for types in msg_subtypes: - first = types[0] - - # register using the default tag_field of "type" - # which seems to map to the class "name". - tags = [first.__name__] - - # create a tagged union decoder for this type set - type_union = Union[first] - for typ in types[1:]: - type_union |= typ - tags.append(typ.__name__) - - dec = Decoder(type_union) - - # register all tags for this union sub-decoder - for tag in tags: - _subdecs[tag] = dec - try: - yield dec - finally: - for tag in tags: - _subdecs.pop(tag) - - -def decmsg(msg: Msg) -> Any: - msg = _root_dec.decode(msg) - tag_field = msg.header.msgtype - dec = _subdecs[tag_field] - return dec.decode(msg.payload) - - -def encmsg( - dialog_id: str | int, - payload: Any, -) -> Msg: - - tag_field = None - - plbytes = _root_enc.encode(payload) - if b'type' in plbytes: - assert isinstance(payload, Struct) - tag_field = type(payload).__name__ - payload = Raw(plbytes) - - msg = Msg( - Header(dialog_id, tag_field), - payload, - ) - return _root_enc.encode(msg) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 3ceff845..e457370e 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -21,19 +21,27 @@ types. ''' from __future__ import annotations from collections import UserList -from pprint import ( - saferepr, -) +from contextlib import contextmanager as cm from typing import ( Any, Iterator, + Optional, + Union, ) from msgspec import ( msgpack, + Raw, Struct as _Struct, structs, ) +from msgspec.msgpack import ( + Encoder, + Decoder, +) +from pprint import ( + saferepr, +) # TODO: auto-gen type sig for input func both for # type-msgs and logging of RPC tasks? @@ -268,3 +276,174 @@ class Struct( )) return diffs + +# ------ - ------ +# +# TODO: integration with our ``enable_modules: list[str]`` caps sys. +# +# ``pkgutil.resolve_name()`` internally uses +# ``importlib.import_module()`` which can be filtered by inserting +# a ``MetaPathFinder`` into ``sys.meta_path`` (which we could do before +# entering the ``Actor._process_messages()`` loop). +# https://github.com/python/cpython/blob/main/Lib/pkgutil.py#L645 +# https://stackoverflow.com/questions/1350466/preventing-python-code-from-importing-certain-modules +# - https://stackoverflow.com/a/63320902 +# - https://docs.python.org/3/library/sys.html#sys.meta_path + +# the new "Implicit Namespace Packages" might be relevant? +# - https://www.python.org/dev/peps/pep-0420/ + +# add implicit serialized message type support so that paths can be +# handed directly to IPC primitives such as streams and `Portal.run()` +# calls: +# - via ``msgspec``: +# - https://jcristharif.com/msgspec/api.html#struct +# - https://jcristharif.com/msgspec/extending.html +# via ``msgpack-python``: +# https://github.com/msgpack/msgpack-python#packingunpacking-of-custom-data-type +# LIFO codec stack that is appended when the user opens the +# ``configure_native_msgs()`` cm below to configure a new codec set +# which will be applied to all new (msgspec relevant) IPC transports +# that are spawned **after** the configure call is made. +_lifo_codecs: list[ + tuple[ + Encoder, + Decoder, + ], +] = [(Encoder(), Decoder())] + + +def get_msg_codecs() -> tuple[ + Encoder, + Decoder, +]: + ''' + Return the currently configured ``msgspec`` codec set. + + The defaults are defined above. + + ''' + global _lifo_codecs + return _lifo_codecs[-1] + + +@cm +def configure_native_msgs( + tagged_structs: list[_Struct], +): + ''' + Push a codec set that will natively decode + tagged structs provied in ``tagged_structs`` + in all IPC transports and pop the codec on exit. + + ''' + # See "tagged unions" docs: + # https://jcristharif.com/msgspec/structs.html#tagged-unions + + # "The quickest way to enable tagged unions is to set tag=True when + # defining every struct type in the union. In this case tag_field + # defaults to "type", and tag defaults to the struct class name + # (e.g. "Get")." + enc = Encoder() + + types_union = Union[tagged_structs[0]] | Any + for struct in tagged_structs[1:]: + types_union |= struct + + dec = Decoder(types_union) + + _lifo_codecs.append((enc, dec)) + try: + print("YOYOYOOYOYOYOY") + yield enc, dec + finally: + print("NONONONONON") + _lifo_codecs.pop() + + +class Header(_Struct, tag=True): + ''' + A msg header which defines payload properties + + ''' + uid: str + msgtype: Optional[str] = None + + +class Msg(_Struct, tag=True): + ''' + The "god" msg type, a box for task level msg types. + + ''' + header: Header + payload: Raw + + +_root_dec = Decoder(Msg) +_root_enc = Encoder() + +# sub-decoders for retreiving embedded +# payload data and decoding to a sender +# side defined (struct) type. +_subdecs: dict[ + Optional[str], + Decoder] = { + None: Decoder(Any), +} + + +@cm +def enable_context( + msg_subtypes: list[list[_Struct]] +) -> Decoder: + + for types in msg_subtypes: + first = types[0] + + # register using the default tag_field of "type" + # which seems to map to the class "name". + tags = [first.__name__] + + # create a tagged union decoder for this type set + type_union = Union[first] + for typ in types[1:]: + type_union |= typ + tags.append(typ.__name__) + + dec = Decoder(type_union) + + # register all tags for this union sub-decoder + for tag in tags: + _subdecs[tag] = dec + try: + yield dec + finally: + for tag in tags: + _subdecs.pop(tag) + + +def decmsg(msg: Msg) -> Any: + msg = _root_dec.decode(msg) + tag_field = msg.header.msgtype + dec = _subdecs[tag_field] + return dec.decode(msg.payload) + + +def encmsg( + dialog_id: str | int, + payload: Any, +) -> Msg: + + tag_field = None + + plbytes = _root_enc.encode(payload) + if b'type' in plbytes: + assert isinstance(payload, _Struct) + tag_field = type(payload).__name__ + payload = Raw(plbytes) + + msg = Msg( + Header(dialog_id, tag_field), + payload, + ) + return _root_enc.encode(msg) -- 2.34.1 From e77333eb73eb0302ef2762e18504bcc3cdb95ca1 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 26 Mar 2024 18:27:55 -0400 Subject: [PATCH 006/190] Move the pretty-`Struct` stuff to a `.pretty_struct` Leave all the proto native struct-msg stuff in `.types` since i'm thinking it's the right name for the mod that will hold all the built-in SCIPP msgspecs longer run. Obvi the naive codec stack stuff needs to be cleaned out/up and anything useful moved into `._codec` ;) --- tractor/msg/__init__.py | 3 +- tractor/msg/_codec.py | 2 +- tractor/msg/pretty_struct.py | 269 ++++++++++++++++++++++++++++++++++ tractor/msg/types.py | 277 ++--------------------------------- 4 files changed, 286 insertions(+), 265 deletions(-) create mode 100644 tractor/msg/pretty_struct.py diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index e2296788..b5c261cc 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -21,11 +21,10 @@ Built-in messaging patterns, types, APIs and helpers. from .ptr import ( NamespacePath as NamespacePath, ) -from .types import ( +from .pretty_struct import ( Struct as Struct, ) from ._codec import ( - _def_msgspec_codec as _def_msgspec_codec, _ctxvar_MsgCodec as _ctxvar_MsgCodec, diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 0da454ad..c26de8d4 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -47,7 +47,7 @@ from types import ModuleType import msgspec from msgspec import msgpack -from .types import Struct +from .pretty_struct import Struct # TODO: API changes towards being interchange lib agnostic! diff --git a/tractor/msg/pretty_struct.py b/tractor/msg/pretty_struct.py new file mode 100644 index 00000000..143fc7a4 --- /dev/null +++ b/tractor/msg/pretty_struct.py @@ -0,0 +1,269 @@ +# tractor: structured concurrent "actors". +# Copyright 2018-eternity Tyler Goodlet. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +Prettified version of `msgspec.Struct` for easier console grokin. + +''' +from __future__ import annotations +from collections import UserList +from typing import ( + Any, + Iterator, +) + +from msgspec import ( + msgpack, + Struct as _Struct, + structs, +) +from pprint import ( + saferepr, +) + +# TODO: auto-gen type sig for input func both for +# type-msgs and logging of RPC tasks? +# taken and modified from: +# https://stackoverflow.com/a/57110117 +# import inspect +# from typing import List + +# def my_function(input_1: str, input_2: int) -> list[int]: +# pass + +# def types_of(func): +# specs = inspect.getfullargspec(func) +# return_type = specs.annotations['return'] +# input_types = [t.__name__ for s, t in specs.annotations.items() if s != 'return'] +# return f'{func.__name__}({": ".join(input_types)}) -> {return_type}' + +# types_of(my_function) + + +class DiffDump(UserList): + ''' + Very simple list delegator that repr() dumps (presumed) tuple + elements of the form `tuple[str, Any, Any]` in a nice + multi-line readable form for analyzing `Struct` diffs. + + ''' + def __repr__(self) -> str: + if not len(self): + return super().__repr__() + + # format by displaying item pair's ``repr()`` on multiple, + # indented lines such that they are more easily visually + # comparable when printed to console when printed to + # console. + repstr: str = '[\n' + for k, left, right in self: + repstr += ( + f'({k},\n' + f'\t{repr(left)},\n' + f'\t{repr(right)},\n' + ')\n' + ) + repstr += ']\n' + return repstr + + +class Struct( + _Struct, + + # https://jcristharif.com/msgspec/structs.html#tagged-unions + # tag='pikerstruct', + # tag=True, +): + ''' + A "human friendlier" (aka repl buddy) struct subtype. + + ''' + def _sin_props(self) -> Iterator[ + tuple[ + structs.FieldIinfo, + str, + Any, + ] + ]: + ''' + Iterate over all non-@property fields of this struct. + + ''' + fi: structs.FieldInfo + for fi in structs.fields(self): + key: str = fi.name + val: Any = getattr(self, key) + yield fi, key, val + + def to_dict( + self, + include_non_members: bool = True, + + ) -> dict: + ''' + Like it sounds.. direct delegation to: + https://jcristharif.com/msgspec/api.html#msgspec.structs.asdict + + BUT, by default we pop all non-member (aka not defined as + struct fields) fields by default. + + ''' + asdict: dict = structs.asdict(self) + if include_non_members: + return asdict + + # only return a dict of the struct members + # which were provided as input, NOT anything + # added as type-defined `@property` methods! + sin_props: dict = {} + fi: structs.FieldInfo + for fi, k, v in self._sin_props(): + sin_props[k] = asdict[k] + + return sin_props + + def pformat( + self, + field_indent: int = 2, + indent: int = 0, + + ) -> str: + ''' + Recursion-safe `pprint.pformat()` style formatting of + a `msgspec.Struct` for sane reading by a human using a REPL. + + ''' + # global whitespace indent + ws: str = ' '*indent + + # field whitespace indent + field_ws: str = ' '*(field_indent + indent) + + # qtn: str = ws + self.__class__.__qualname__ + qtn: str = self.__class__.__qualname__ + + obj_str: str = '' # accumulator + fi: structs.FieldInfo + k: str + v: Any + for fi, k, v in self._sin_props(): + + # TODO: how can we prefer `Literal['option1', 'option2, + # ..]` over .__name__ == `Literal` but still get only the + # latter for simple types like `str | int | None` etc..? + ft: type = fi.type + typ_name: str = getattr(ft, '__name__', str(ft)) + + # recurse to get sub-struct's `.pformat()` output Bo + if isinstance(v, Struct): + val_str: str = v.pformat( + indent=field_indent + indent, + field_indent=indent + field_indent, + ) + + else: # the `pprint` recursion-safe format: + # https://docs.python.org/3.11/library/pprint.html#pprint.saferepr + val_str: str = saferepr(v) + + # TODO: LOLOL use `textwrap.indent()` instead dawwwwwg! + obj_str += (field_ws + f'{k}: {typ_name} = {val_str},\n') + + return ( + f'{qtn}(\n' + f'{obj_str}' + f'{ws})' + ) + + # TODO: use a pprint.PrettyPrinter instance around ONLY rendering + # inside a known tty? + # def __repr__(self) -> str: + # ... + + # __str__ = __repr__ = pformat + __repr__ = pformat + + def copy( + self, + update: dict | None = None, + + ) -> Struct: + ''' + Validate-typecast all self defined fields, return a copy of + us with all such fields. + + NOTE: This is kinda like the default behaviour in + `pydantic.BaseModel` except a copy of the object is + returned making it compat with `frozen=True`. + + ''' + if update: + for k, v in update.items(): + setattr(self, k, v) + + # NOTE: roundtrip serialize to validate + # - enode to msgpack binary format, + # - decode that back to a struct. + return msgpack.Decoder(type=type(self)).decode( + msgpack.Encoder().encode(self) + ) + + def typecast( + self, + + # TODO: allow only casting a named subset? + # fields: set[str] | None = None, + + ) -> None: + ''' + Cast all fields using their declared type annotations + (kinda like what `pydantic` does by default). + + NOTE: this of course won't work on frozen types, use + ``.copy()`` above in such cases. + + ''' + # https://jcristharif.com/msgspec/api.html#msgspec.structs.fields + fi: structs.FieldInfo + for fi in structs.fields(self): + setattr( + self, + fi.name, + fi.type(getattr(self, fi.name)), + ) + + def __sub__( + self, + other: Struct, + + ) -> DiffDump[tuple[str, Any, Any]]: + ''' + Compare fields/items key-wise and return a ``DiffDump`` + for easy visual REPL comparison B) + + ''' + diffs: DiffDump[tuple[str, Any, Any]] = DiffDump() + for fi in structs.fields(self): + attr_name: str = fi.name + ours: Any = getattr(self, attr_name) + theirs: Any = getattr(other, attr_name) + if ours != theirs: + diffs.append(( + attr_name, + ours, + theirs, + )) + + return diffs diff --git a/tractor/msg/types.py b/tractor/msg/types.py index e457370e..d2fb0877 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -20,12 +20,9 @@ types. ''' from __future__ import annotations -from collections import UserList from contextlib import contextmanager as cm from typing import ( Any, - Iterator, - Optional, Union, ) @@ -33,252 +30,8 @@ from msgspec import ( msgpack, Raw, Struct as _Struct, - structs, -) -from msgspec.msgpack import ( - Encoder, - Decoder, -) -from pprint import ( - saferepr, ) -# TODO: auto-gen type sig for input func both for -# type-msgs and logging of RPC tasks? -# taken and modified from: -# https://stackoverflow.com/a/57110117 -# import inspect -# from typing import List - -# def my_function(input_1: str, input_2: int) -> list[int]: -# pass - -# def types_of(func): -# specs = inspect.getfullargspec(func) -# return_type = specs.annotations['return'] -# input_types = [t.__name__ for s, t in specs.annotations.items() if s != 'return'] -# return f'{func.__name__}({": ".join(input_types)}) -> {return_type}' - -# types_of(my_function) - - -class DiffDump(UserList): - ''' - Very simple list delegator that repr() dumps (presumed) tuple - elements of the form `tuple[str, Any, Any]` in a nice - multi-line readable form for analyzing `Struct` diffs. - - ''' - def __repr__(self) -> str: - if not len(self): - return super().__repr__() - - # format by displaying item pair's ``repr()`` on multiple, - # indented lines such that they are more easily visually - # comparable when printed to console when printed to - # console. - repstr: str = '[\n' - for k, left, right in self: - repstr += ( - f'({k},\n' - f'\t{repr(left)},\n' - f'\t{repr(right)},\n' - ')\n' - ) - repstr += ']\n' - return repstr - - -class Struct( - _Struct, - - # https://jcristharif.com/msgspec/structs.html#tagged-unions - # tag='pikerstruct', - # tag=True, -): - ''' - A "human friendlier" (aka repl buddy) struct subtype. - - ''' - def _sin_props(self) -> Iterator[ - tuple[ - structs.FieldIinfo, - str, - Any, - ] - ]: - ''' - Iterate over all non-@property fields of this struct. - - ''' - fi: structs.FieldInfo - for fi in structs.fields(self): - key: str = fi.name - val: Any = getattr(self, key) - yield fi, key, val - - def to_dict( - self, - include_non_members: bool = True, - - ) -> dict: - ''' - Like it sounds.. direct delegation to: - https://jcristharif.com/msgspec/api.html#msgspec.structs.asdict - - BUT, by default we pop all non-member (aka not defined as - struct fields) fields by default. - - ''' - asdict: dict = structs.asdict(self) - if include_non_members: - return asdict - - # only return a dict of the struct members - # which were provided as input, NOT anything - # added as type-defined `@property` methods! - sin_props: dict = {} - fi: structs.FieldInfo - for fi, k, v in self._sin_props(): - sin_props[k] = asdict[k] - - return sin_props - - def pformat( - self, - field_indent: int = 2, - indent: int = 0, - - ) -> str: - ''' - Recursion-safe `pprint.pformat()` style formatting of - a `msgspec.Struct` for sane reading by a human using a REPL. - - ''' - # global whitespace indent - ws: str = ' '*indent - - # field whitespace indent - field_ws: str = ' '*(field_indent + indent) - - # qtn: str = ws + self.__class__.__qualname__ - qtn: str = self.__class__.__qualname__ - - obj_str: str = '' # accumulator - fi: structs.FieldInfo - k: str - v: Any - for fi, k, v in self._sin_props(): - - # TODO: how can we prefer `Literal['option1', 'option2, - # ..]` over .__name__ == `Literal` but still get only the - # latter for simple types like `str | int | None` etc..? - ft: type = fi.type - typ_name: str = getattr(ft, '__name__', str(ft)) - - # recurse to get sub-struct's `.pformat()` output Bo - if isinstance(v, Struct): - val_str: str = v.pformat( - indent=field_indent + indent, - field_indent=indent + field_indent, - ) - - else: # the `pprint` recursion-safe format: - # https://docs.python.org/3.11/library/pprint.html#pprint.saferepr - val_str: str = saferepr(v) - - # TODO: LOLOL use `textwrap.indent()` instead dawwwwwg! - obj_str += (field_ws + f'{k}: {typ_name} = {val_str},\n') - - return ( - f'{qtn}(\n' - f'{obj_str}' - f'{ws})' - ) - - # TODO: use a pprint.PrettyPrinter instance around ONLY rendering - # inside a known tty? - # def __repr__(self) -> str: - # ... - - # __str__ = __repr__ = pformat - __repr__ = pformat - - def copy( - self, - update: dict | None = None, - - ) -> Struct: - ''' - Validate-typecast all self defined fields, return a copy of - us with all such fields. - - NOTE: This is kinda like the default behaviour in - `pydantic.BaseModel` except a copy of the object is - returned making it compat with `frozen=True`. - - ''' - if update: - for k, v in update.items(): - setattr(self, k, v) - - # NOTE: roundtrip serialize to validate - # - enode to msgpack binary format, - # - decode that back to a struct. - return msgpack.Decoder(type=type(self)).decode( - msgpack.Encoder().encode(self) - ) - - def typecast( - self, - - # TODO: allow only casting a named subset? - # fields: set[str] | None = None, - - ) -> None: - ''' - Cast all fields using their declared type annotations - (kinda like what `pydantic` does by default). - - NOTE: this of course won't work on frozen types, use - ``.copy()`` above in such cases. - - ''' - # https://jcristharif.com/msgspec/api.html#msgspec.structs.fields - fi: structs.FieldInfo - for fi in structs.fields(self): - setattr( - self, - fi.name, - fi.type(getattr(self, fi.name)), - ) - - def __sub__( - self, - other: Struct, - - ) -> DiffDump[tuple[str, Any, Any]]: - ''' - Compare fields/items key-wise and return a ``DiffDump`` - for easy visual REPL comparison B) - - ''' - diffs: DiffDump[tuple[str, Any, Any]] = DiffDump() - for fi in structs.fields(self): - attr_name: str = fi.name - ours: Any = getattr(self, attr_name) - theirs: Any = getattr(other, attr_name) - if ours != theirs: - diffs.append(( - attr_name, - ours, - theirs, - )) - - return diffs - -# ------ - ------ -# # TODO: integration with our ``enable_modules: list[str]`` caps sys. # # ``pkgutil.resolve_name()`` internally uses @@ -307,15 +60,15 @@ class Struct( # that are spawned **after** the configure call is made. _lifo_codecs: list[ tuple[ - Encoder, - Decoder, + msgpack.Encoder, + msgpack.Decoder, ], -] = [(Encoder(), Decoder())] +] = [(msgpack.Encoder(), msgpack.Decoder())] def get_msg_codecs() -> tuple[ - Encoder, - Decoder, + msgpack.Encoder, + msgpack.Decoder, ]: ''' Return the currently configured ``msgspec`` codec set. @@ -344,13 +97,13 @@ def configure_native_msgs( # defining every struct type in the union. In this case tag_field # defaults to "type", and tag defaults to the struct class name # (e.g. "Get")." - enc = Encoder() + enc = msgpack.Encoder() types_union = Union[tagged_structs[0]] | Any for struct in tagged_structs[1:]: types_union |= struct - dec = Decoder(types_union) + dec = msgpack.Decoder(types_union) _lifo_codecs.append((enc, dec)) try: @@ -367,7 +120,7 @@ class Header(_Struct, tag=True): ''' uid: str - msgtype: Optional[str] = None + msgtype: str|None = None class Msg(_Struct, tag=True): @@ -379,23 +132,23 @@ class Msg(_Struct, tag=True): payload: Raw -_root_dec = Decoder(Msg) -_root_enc = Encoder() +_root_dec = msgpack.Decoder(Msg) +_root_enc = msgpack.Encoder() # sub-decoders for retreiving embedded # payload data and decoding to a sender # side defined (struct) type. _subdecs: dict[ - Optional[str], - Decoder] = { - None: Decoder(Any), + str|None, + msgpack.Decoder] = { + None: msgpack.Decoder(Any), } @cm def enable_context( msg_subtypes: list[list[_Struct]] -) -> Decoder: +) -> msgpack.Decoder: for types in msg_subtypes: first = types[0] @@ -410,7 +163,7 @@ def enable_context( type_union |= typ tags.append(typ.__name__) - dec = Decoder(type_union) + dec = msgpack.Decoder(type_union) # register all tags for this union sub-decoder for tag in tags: -- 2.34.1 From c5985169ccfd66b6e43dfe70a1a5c692a8dd871d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 28 Mar 2024 10:45:01 -0400 Subject: [PATCH 007/190] Init def of "SC shuttle prot" with "msg-spec-limiting" As per the long outstanding GH issue this starts our rigorous journey into an attempt at a type-safe, cross-actor SC, IPC protocol Bo boop -> https://github.com/goodboy/tractor/issues/36 The idea is to "formally" define our SC "shuttle (dialog) protocol" by specifying a new `.msg.types.Msg` subtype-set which can fully encapsulate all IPC msg schemas needed in order to accomplish cross-process SC! The msg set deviated a little in terms of (type) names from the existing `dict`-msgs currently used in the runtime impl but, I think the name changes are much better in terms of explicitly representing the internal semantics of the actor runtime machinery/subsystems and the IPC-msg-dialog required for SC enforced RPC. ------ - ------ In cursory, the new formal msgs-spec includes the following msg-subtypes of a new top-level `Msg` boxing type (that holds the base field schema for all msgs): - `Start` to request RPC task scheduling by passing a `FuncSpec` payload (to replace the currently used `{'cmd': ... }` dict msg impl) - `StartAck` to allow the RPC task callee-side to report a `IpcCtxSpec` payload immediately back to the caller (currently responded naively via a `{'functype': ... }` msg) - `Started` to deliver the first value from `Context.started()` (instead of the existing `{'started': ... }`) - `Yield` to shuttle `MsgStream.send()`-ed values (instead of our `{'yield': ... }`) - `Stop` to terminate a `Context.open_stream()` session/block (over `{'stop': True }`) - `Return` to deliver the final value from the `Actor.start_remote_task()` (which is a `{'return': ... }`) - `Error` to box `RemoteActorError` exceptions via a `.pld: ErrorData` payload, planned to replace/extend the current `RemoteActorError.msgdata` mechanism internal to `._exceptions.pack/unpack_error()` The new `tractor.msg.types` includes all the above msg defs as well an API for rendering a "payload type specification" using a `payload_type_spec: Union[Type]` that can be passed to `msgspec.msgpack.Decoder(type=payload_type_spec)`. This ensures that (for a subset of the above msg set) `Msg.pld: PayloadT` data is type-parameterized using `msgspec`'s new `Generic[PayloadT]` field support and thus enables providing for an API where IPC `Context` dialogs can strictly define the allowed payload-datatype-set via type union! Iow, this is the foundation for supporting `Channel`/`Context`/`MsgStream` IPC primitives which are type checked/safe as desired in GH issue: - https://github.com/goodboy/tractor/issues/365 Misc notes on current impl(s) status: ------ - ------ - add a `.msg.types.mk_msg_spec()` which uses the new `msgspec` support for `class MyStruct[Struct, Generic[T]]` parameterize-able fields and delivers our boxing SC-msg-(sub)set with the desired `payload_types` applied to `.pld`: - https://jcristharif.com/msgspec/supported-types.html#generic-types - as a note this impl seems to need to use `type.new_class()` dynamic subtype generation, though i don't really get *why* still.. but without that the `msgspec.msgpack.Decoder` doesn't seem to reject `.pld` limited `Msg` subtypes as demonstrated in the new test. - around this ^ add a `.msg._codec.limit_msg_spec()` cm which exposes this payload type limiting API such that it can be applied per task via a `MsgCodec` in app code. - the orig approach in https://github.com/goodboy/tractor/pull/311 was the idea of making payload fields `.pld: Raw` wherein we could have per-field/sub-msg decoders dynamically loaded depending on the particular application-layer schema in use. I don't want to lose the idea of this since I think it might be useful for an idea I have about capability-based-fields(-sharing, maybe using field-subset encryption?), and as such i've kept the (ostensibly) working impls in TODO-comments in `.msg._codec` wherein maybe we can add a `MsgCodec._payload_decs: dict` table for this later on. |_ also left in the `.msg.types.enc/decmsg()` impls but renamed as `enc/dec_payload()` (but reworked to not rely on the lifo codec stack tables; now removed) such that we can prolly move them to `MsgCodec` methods in the future. - add an unused `._codec.mk_tagged_union_dec()` helper which was originally factored out the #311 proto-code but didn't end up working as desired with the new parameterized generic fields approach (now in `msg.types.mk_msg_spec()`) Testing/deps work: ------ - ------ - new `test_limit_msgspec()` which ensures all the `.types` content is correct but without using the wrapping APIs in `._codec`; i.e. using a in-line `Decoder` instead of a `MsgCodec`. - pin us to `msgspec>=0.18.5` which has the needed generic-types support (which took me way too long yester to figure out when implementing all this XD)! --- tests/test_caps_msging.py | 181 ++++++++++++++- tractor/msg/_codec.py | 144 +++++++++++- tractor/msg/types.py | 474 ++++++++++++++++++++++++++------------ 3 files changed, 644 insertions(+), 155 deletions(-) diff --git a/tests/test_caps_msging.py b/tests/test_caps_msging.py index f659cb13..b101c1e0 100644 --- a/tests/test_caps_msging.py +++ b/tests/test_caps_msging.py @@ -6,12 +6,22 @@ B~) ''' from typing import ( Any, + _GenericAlias, Type, + Union, ) from contextvars import ( Context, ) +# from inspect import Parameter +from msgspec import ( + structs, + msgpack, + # defstruct, + Struct, + ValidationError, +) import tractor from tractor.msg import ( _def_msgspec_codec, @@ -23,6 +33,12 @@ from tractor.msg import ( apply_codec, current_msgspec_codec, ) +from tractor.msg.types import ( + PayloadT, + Msg, + # Started, + mk_msg_spec, +) import trio # TODO: wrap these into `._codec` such that user can just pass @@ -54,7 +70,7 @@ def mk_custom_codec() -> MsgCodec: # apply custom hooks and set a `Decoder` which only # loads `NamespacePath` types. nsp_codec: MsgCodec = mk_codec( - dec_types=NamespacePath, + ipc_msg_spec=NamespacePath, enc_hook=enc_hook, dec_hook=dec_hook, ) @@ -196,3 +212,166 @@ def test_codec_hooks_mod(): await p.cancel_actor() trio.run(main) + + +def chk_pld_type( + generic: Msg|_GenericAlias, + payload_type: Type[Struct]|Any, + pld: Any, + +) -> bool: + + roundtrip: bool = False + pld_val_type: Type = type(pld) + + # gen_paramed: _GenericAlias = generic[payload_type] + # TODO: verify that the overridden subtypes + # DO NOT have modified type-annots from original! + # 'Start', .pld: FuncSpec + # 'StartAck', .pld: IpcCtxSpec + # 'Stop', .pld: UNSEt + # 'Error', .pld: ErrorData + # for typedef in ( + # [gen_paramed] + # + + + # # type-var should always be set for these sub-types + # # as well! + # Msg.__subclasses__() + # ): + # if typedef.__name__ not in [ + # 'Msg', + # 'Started', + # 'Yield', + # 'Return', + # ]: + # continue + # payload_type: Type[Struct] = CustomPayload + + # TODO: can remove all this right!? + # + # when parameterized (like `Msg[Any]`) then + # we expect an alias as input. + # if isinstance(generic, _GenericAlias): + # assert payload_type in generic.__args__ + # else: + # assert PayloadType in generic.__parameters__ + # pld_param: Parameter = generic.__signature__.parameters['pld'] + # assert pld_param.annotation is PayloadType + + type_spec: Union[Type[Struct]] + msg_types: list[Msg[payload_type]] + ( + type_spec, + msg_types, + ) = mk_msg_spec( + payload_type=payload_type, + ) + enc = msgpack.Encoder() + dec = msgpack.Decoder( + type=type_spec, # like `Msg[Any]` + ) + + # verify the boxed-type for all variable payload-type msgs. + for typedef in msg_types: + + pld_field = structs.fields(typedef)[1] + assert pld_field.type in {payload_type, PayloadT} + # TODO: does this need to work to get all subtypes to + # adhere? + assert pld_field.type is payload_type + + kwargs: dict[str, Any] = { + 'cid': '666', + 'pld': pld, + } + enc_msg = typedef(**kwargs) + + wire_bytes: bytes = enc.encode(enc_msg) + + try: + dec_msg = dec.decode(wire_bytes) + assert dec_msg.pld == pld + assert (roundtrip := (dec_msg == enc_msg)) + + except ValidationError as ve: + # breakpoint() + if pld_val_type is payload_type: + raise ValueError( + 'Got `ValidationError` despite type-var match!?\n' + f'pld_val_type: {pld_val_type}\n' + f'payload_type: {payload_type}\n' + ) from ve + + else: + # ow we good cuz the pld spec mismatched. + print( + 'Got expected `ValidationError` since,\n' + f'{pld_val_type} is not {payload_type}\n' + ) + else: + if ( + pld_val_type is not payload_type + and payload_type is not Any + ): + raise ValueError( + 'DID NOT `ValidationError` despite expected type match!?\n' + f'pld_val_type: {pld_val_type}\n' + f'payload_type: {payload_type}\n' + ) + + return roundtrip + + + +def test_limit_msgspec(): + + async def main(): + async with tractor.open_root_actor( + debug_mode=True + ): + + # ensure we can round-trip a boxing `Msg` + assert chk_pld_type( + Msg, + Any, + None, + ) + + # TODO: don't need this any more right since + # `msgspec>=0.15` has the nice generics stuff yah?? + # + # manually override the type annot of the payload + # field and ensure it propagates to all msg-subtypes. + # Msg.__annotations__['pld'] = Any + + # verify that a mis-typed payload value won't decode + assert not chk_pld_type( + Msg, + int, + pld='doggy', + ) + + # parametrize the boxed `.pld` type as a custom-struct + # and ensure that parametrization propagates + # to all payload-msg-spec-able subtypes! + class CustomPayload(Struct): + name: str + value: Any + + assert not chk_pld_type( + Msg, + CustomPayload, + pld='doggy', + ) + + assert chk_pld_type( + Msg, + CustomPayload, + pld=CustomPayload(name='doggy', value='urmom') + ) + + # uhh bc we can `.pause_from_sync()` now! :surfer: + # breakpoint() + + trio.run(main) diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index c26de8d4..5ce02055 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -47,20 +47,25 @@ from types import ModuleType import msgspec from msgspec import msgpack -from .pretty_struct import Struct +from tractor.msg.pretty_struct import Struct +from tractor.msg.types import ( + mk_msg_spec, + Msg, +) # TODO: API changes towards being interchange lib agnostic! +# # -[ ] capnproto has pre-compiled schema for eg.. # * https://capnproto.org/language.html # * http://capnproto.github.io/pycapnp/quickstart.html # * https://github.com/capnproto/pycapnp/blob/master/examples/addressbook.capnp +# class MsgCodec(Struct): ''' A IPC msg interchange format lib's encoder + decoder pair. ''' - lib: ModuleType = msgspec # ad-hoc type extensions @@ -70,12 +75,22 @@ class MsgCodec(Struct): # struct type unions # https://jcristharif.com/msgspec/structs.html#tagged-unions - types: Union[Type[Struct]]|Any = Any + ipc_msg_spec: Union[Type[Struct]]|Any = Any + payload_msg_spec: Union[Type[Struct]] = Any # post-configure cached props _enc: msgpack.Encoder|None = None _dec: msgpack.Decoder|None = None + # TODO: a sub-decoder system as well? + # see related comments in `.msg.types` + # _payload_decs: ( + # dict[ + # str, + # msgpack.Decoder, + # ] + # |None + # ) = None # TODO: use `functools.cached_property` for these ? # https://docs.python.org/3/library/functools.html#functools.cached_property @@ -88,8 +103,9 @@ class MsgCodec(Struct): enc_hook: Callable|None = None, reset: bool = False, - # TODO: what's the default for this? + # TODO: what's the default for this, and do we care? # write_buffer_size: int + # **kwargs, ) -> msgpack.Encoder: @@ -131,7 +147,7 @@ class MsgCodec(Struct): def decoder( self, - types: Union[Type[Struct]]|None = None, + ipc_msg_spec: Union[Type[Struct]]|None = None, dec_hook: Callable|None = None, reset: bool = False, **kwargs, @@ -152,7 +168,7 @@ class MsgCodec(Struct): or reset ): self._dec = self.lib.msgpack.Decoder( - types or self.types, + type=ipc_msg_spec or self.ipc_msg_spec, dec_hook=dec_hook or self.dec_hook, **kwargs, ) @@ -169,10 +185,39 @@ class MsgCodec(Struct): determined by the ''' - return self.dec.decode(msg) +def mk_tagged_union_dec( + tagged_structs: list[Struct], + +) -> tuple[ + list[str], + msgpack.Decoder, +]: + # See "tagged unions" docs: + # https://jcristharif.com/msgspec/structs.html#tagged-unions + + # "The quickest way to enable tagged unions is to set tag=True when + # defining every struct type in the union. In this case tag_field + # defaults to "type", and tag defaults to the struct class name + # (e.g. "Get")." + first: Struct = tagged_structs[0] + types_union: Union[Type[Struct]] = Union[ + first + ]|Any + tags: list[str] = [first.__name__] + + for struct in tagged_structs[1:]: + types_union |= struct + tags.append(struct.__name__) + + dec = msgpack.Decoder(types_union) + return ( + tags, + dec, + ) + # TODO: struct aware messaging coders as per: # - https://github.com/goodboy/tractor/issues/36 # - https://github.com/goodboy/tractor/issues/196 @@ -181,13 +226,18 @@ class MsgCodec(Struct): def mk_codec( libname: str = 'msgspec', + # for codec-ing boxed `Msg`-with-payload msgs + payload_types: Union[Type[Struct]]|None = None, + + # TODO: do we want to allow NOT/using a diff `Msg`-set? + # # struct type unions set for `Decoder` # https://jcristharif.com/msgspec/structs.html#tagged-unions - dec_types: Union[Type[Struct]]|Any = Any, + ipc_msg_spec: Union[Type[Struct]]|Any = Any, cache_now: bool = True, - # proxy to the `Struct.__init__()` + # proxy as `Struct(**kwargs)` **kwargs, ) -> MsgCodec: @@ -197,14 +247,59 @@ def mk_codec( `msgspec` ;). ''' + # (manually) generate a msg-payload-spec for all relevant + # god-boxing-msg subtypes, parameterizing the `Msg.pld: PayloadT` + # for the decoder such that all sub-type msgs in our SCIPP + # will automatically decode to a type-"limited" payload (`Struct`) + # object (set). + payload_type_spec: Union[Type[Msg]]|None = None + if payload_types: + ( + payload_type_spec, + msg_types, + ) = mk_msg_spec( + payload_type=payload_types, + ) + assert len(payload_type_spec.__args__) == len(msg_types) + + # TODO: sub-decode `.pld: Raw`? + # see similar notes inside `.msg.types`.. + # + # not sure we'll end up wanting/needing this + # though it might have unforeseen advantages in terms + # of enabling encrypted appliciation layer (only) + # payloads? + # + # register sub-payload decoders to load `.pld: Raw` + # decoded `Msg`-packets using a dynamic lookup (table) + # instead of a pre-defined msg-spec via `Generic` + # parameterization. + # + # ( + # tags, + # payload_dec, + # ) = mk_tagged_union_dec( + # tagged_structs=list(payload_types.__args__), + # ) + # _payload_decs: ( + # dict[str, msgpack.Decoder]|None + # ) = { + # # pre-seed decoders for std-py-type-set for use when + # # `Msg.pld == None|Any`. + # None: msgpack.Decoder(Any), + # Any: msgpack.Decoder(Any), + # } + # for name in tags: + # _payload_decs[name] = payload_dec + codec = MsgCodec( - types=dec_types, + ipc_msg_spec=ipc_msg_spec, + payload_msg_spec=payload_type_spec, **kwargs, ) assert codec.lib.__name__ == libname - # by default config and cache the codec pair for given - # input settings. + # by default, config-n-cache the codec pair from input settings. if cache_now: assert codec.enc assert codec.dec @@ -251,3 +346,28 @@ def current_msgspec_codec() -> MsgCodec: ''' return _ctxvar_MsgCodec.get() + + +@cm +def limit_msg_spec( + payload_types: Union[Type[Struct]], + + # TODO: don't need this approach right? + # + # tagged_structs: list[Struct]|None = None, + + **codec_kwargs, +): + ''' + Apply a `MsgCodec` that will natively decode the SC-msg set's + `Msg.pld: Union[Type[Struct]]` payload fields using + tagged-unions of `msgspec.Struct`s from the `payload_types` + for all IPC contexts in use by the current `trio.Task`. + + ''' + msgspec_codec: MsgCodec = mk_codec( + payload_types=payload_types, + **codec_kwargs, + ) + with apply_codec(msgspec_codec): + yield msgspec_codec diff --git a/tractor/msg/types.py b/tractor/msg/types.py index d2fb0877..732a0f5d 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -15,23 +15,315 @@ # along with this program. If not, see . ''' -Extensions to built-in or (heavily used but 3rd party) friend-lib -types. +Define our strictly typed IPC message spec for the SCIPP: + +that is, + +the "Structurred-Concurrency-Inter-Process-(dialog)-(un)Protocol". ''' + from __future__ import annotations -from contextlib import contextmanager as cm +# from contextlib import contextmanager as cm +import types from typing import ( Any, + Generic, + Literal, + Type, + TypeVar, Union, ) from msgspec import ( msgpack, Raw, - Struct as _Struct, + Struct, + UNSET, ) + +# TODO: can also remove yah? +# +# class Header(Struct, tag=True): +# ''' +# A msg header which defines payload properties + +# ''' +# payload_tag: str|None = None + +# type variable for the boxed payload field `.pld` +PayloadT = TypeVar('PayloadT') + + +class Msg( + Struct, + Generic[PayloadT], + tag=True, + tag_field='msg_type', +): + ''' + The "god" boxing msg type. + + Boxes user data-msgs in a `.pld` and uses `msgspec`'s tagged + unions support to enable a spec from a common msg inheritance + tree. + + ''' + # header: Header + # TODO: use UNSET here? + cid: str|None # call/context-id + + # The msgs "payload" (spelled without vowels): + # https://en.wikipedia.org/wiki/Payload_(computing) + # + # NOTE: inherited from any `Msg` (and maybe overriden + # by use of `limit_msg_spec()`), but by default is + # parameterized to be `Any`. + # + # XXX this `Union` must strictly NOT contain `Any` if + # a limited msg-type-spec is intended, such that when + # creating and applying a new `MsgCodec` its + # `.decoder: Decoder` is configured with a `Union[Type[Struct]]` which + # restricts the allowed payload content (this `.pld` field) + # by type system defined loading constraints B) + # + # TODO: could also be set to `msgspec.Raw` if the sub-decoders + # approach is preferred over the generic parameterization + # approach as take by `mk_msg_spec()` below. + pld: PayloadT + + +# TODO: better name, like `Call/TaskInput`? +class FuncSpec(Struct): + # TODO: can we combine these 2 into a `NamespacePath` field? + ns: str + func: str + + kwargs: dict + uid: str # (calling) actor-id + + +class Start( + Msg, +): + ''' + Initial request to remotely schedule an RPC `trio.Task` via + `Actor.start_remote_task()`. + + It is called by all the following public APIs: + + - `ActorNursery.run_in_actor()` + + - `Portal.run()` + `|_.run_from_ns()` + `|_.open_stream_from()` + `|_._submit_for_result()` + + - `Context.open_context()` + + ''' + pld: FuncSpec + + +FuncType: Literal[ + 'asyncfunc', + 'asyncgen', + 'context', # TODO: the only one eventually? +] = 'context' + + +class IpcCtxSpec(Struct): + ''' + An inter-actor-`trio.Task`-comms `Context` spec. + + ''' + functype: FuncType + + # TODO: as part of the reponse we should report our allowed + # msg spec which should be generated from the type-annots as + # desired in # https://github.com/goodboy/tractor/issues/365 + # When this does not match what the starter/caller side + # expects we of course raise a `TypeError` just like if + # a function had been called using an invalid signature. + # + # msgspec: MsgSpec + + +class StartAck( + Msg, + Generic[PayloadT], +): + ''' + Init response to a `Cmd` request indicating the far + end's RPC callable "type". + + ''' + pld: IpcCtxSpec + + +class Started( + Msg, + Generic[PayloadT], +): + ''' + Packet to shuttle the "first value" delivered by + `Context.started(value: Any)` from a `@tractor.context` + decorated IPC endpoint. + + ''' + + +# TODO: instead of using our existing `Start` +# for this (as we did with the original `{'cmd': ..}` style) +# class Cancel(Msg): +# cid: str + + +class Yield( + Msg, + Generic[PayloadT], +): + ''' + Per IPC transmission of a value from `await MsgStream.send()`. + + ''' + + +class Stop(Msg): + ''' + Stream termination signal much like an IPC version + of `StopAsyncIteration`. + + ''' + pld: UNSET + + +class Return( + Msg, + Generic[PayloadT], +): + ''' + Final `return ` from a remotely scheduled + func-as-`trio.Task`. + + ''' + + +class ErrorData(Struct): + ''' + Remote actor error meta-data as needed originally by + `RemoteActorError.msgdata: dict`. + + ''' + src_uid: str + src_type_str: str + boxed_type_str: str + + relay_path: list[str] + tb_str: str + + # `ContextCancelled` + canceller: str|None = None + + # `StreamOverrun` + sender: str|None = None + + +class Error(Msg): + ''' + A pkt that wraps `RemoteActorError`s for relay. + + ''' + pld: ErrorData + + +# TODO: should be make a msg version of `ContextCancelled?` +# and/or with a scope field or a full `ActorCancelled`? +# class Cancelled(Msg): +# cid: str + +# TODO what about overruns? +# class Overrun(Msg): +# cid: str + + +def mk_msg_spec( + payload_type: Union[Type] = Any, + boxing_msg_set: set[Msg] = { + Started, + Yield, + Return, + }, + +) -> tuple[ + Union[Type[Msg]], + list[Type[Msg]], +]: + ''' + Generate a payload-type-parameterized `Msg` specification such + that IPC msgs which can be `Msg.pld` (payload) type + limited/filterd are specified given an input `payload_type: + Union[Type]`. + + ''' + submsg_types: list[Type[Msg]] = Msg.__subclasses__() + + # TODO: see below as well, + # => union building approach with `.__class_getitem__()` + # doesn't seem to work..? + # + # payload_type_spec: Union[Type[Msg]] + # + msg_types: list[Msg] = [] + for msgtype in boxing_msg_set: + + # check inheritance sanity + assert msgtype in submsg_types + + # TODO: wait why do we need the dynamic version here? + # -[ ] paraming the `PayloadT` values via `Generic[T]` + # doesn't seem to work at all? + # -[ ] is there a way to get it to work at module level + # just using inheritance or maybe a metaclass? + # + # index_paramed_msg_type: Msg = msgtype[payload_type] + + # TODO: WHY do we need to dynamically generate the + # subtype-msgs here to ensure the `.pld` parameterization + # propagates as well as works at all in terms of the + # `msgpack.Decoder()`..? + # + # dynamically create the payload type-spec-limited msg set. + manual_paramed_msg_subtype: Type = types.new_class( + msgtype.__name__, + ( + # XXX NOTE XXX this seems to be THE ONLY + # way to get this to work correctly!?! + Msg[payload_type], + Generic[PayloadT], + ), + {}, + ) + + # TODO: grok the diff here better.. + # assert index_paramed_msg_type == manual_paramed_msg_subtype + + # XXX TODO: why does the manual method work but not the + # `.__class_getitem__()` one!?! + paramed_msg_type = manual_paramed_msg_subtype + + # payload_type_spec |= paramed_msg_type + msg_types.append(paramed_msg_type) + + + payload_type_spec: Union[Type[Msg]] = Union[*msg_types] + return ( + payload_type_spec, + msg_types, + ) + + # TODO: integration with our ``enable_modules: list[str]`` caps sys. # # ``pkgutil.resolve_name()`` internally uses @@ -43,160 +335,58 @@ from msgspec import ( # - https://stackoverflow.com/a/63320902 # - https://docs.python.org/3/library/sys.html#sys.meta_path -# the new "Implicit Namespace Packages" might be relevant? -# - https://www.python.org/dev/peps/pep-0420/ - -# add implicit serialized message type support so that paths can be -# handed directly to IPC primitives such as streams and `Portal.run()` -# calls: -# - via ``msgspec``: -# - https://jcristharif.com/msgspec/api.html#struct -# - https://jcristharif.com/msgspec/extending.html -# via ``msgpack-python``: -# https://github.com/msgpack/msgpack-python#packingunpacking-of-custom-data-type -# LIFO codec stack that is appended when the user opens the -# ``configure_native_msgs()`` cm below to configure a new codec set -# which will be applied to all new (msgspec relevant) IPC transports -# that are spawned **after** the configure call is made. -_lifo_codecs: list[ - tuple[ - msgpack.Encoder, - msgpack.Decoder, - ], -] = [(msgpack.Encoder(), msgpack.Decoder())] - - -def get_msg_codecs() -> tuple[ - msgpack.Encoder, - msgpack.Decoder, -]: - ''' - Return the currently configured ``msgspec`` codec set. - - The defaults are defined above. - - ''' - global _lifo_codecs - return _lifo_codecs[-1] - - -@cm -def configure_native_msgs( - tagged_structs: list[_Struct], -): - ''' - Push a codec set that will natively decode - tagged structs provied in ``tagged_structs`` - in all IPC transports and pop the codec on exit. - - ''' - # See "tagged unions" docs: - # https://jcristharif.com/msgspec/structs.html#tagged-unions - - # "The quickest way to enable tagged unions is to set tag=True when - # defining every struct type in the union. In this case tag_field - # defaults to "type", and tag defaults to the struct class name - # (e.g. "Get")." - enc = msgpack.Encoder() - - types_union = Union[tagged_structs[0]] | Any - for struct in tagged_structs[1:]: - types_union |= struct - - dec = msgpack.Decoder(types_union) - - _lifo_codecs.append((enc, dec)) - try: - print("YOYOYOOYOYOYOY") - yield enc, dec - finally: - print("NONONONONON") - _lifo_codecs.pop() - - -class Header(_Struct, tag=True): - ''' - A msg header which defines payload properties - - ''' - uid: str - msgtype: str|None = None - - -class Msg(_Struct, tag=True): - ''' - The "god" msg type, a box for task level msg types. - - ''' - header: Header - payload: Raw - - -_root_dec = msgpack.Decoder(Msg) -_root_enc = msgpack.Encoder() - +# TODO: do we still want to try and support the sub-decoder with +# `Raw` technique in the case that the `Generic` approach gives +# future grief? +# # sub-decoders for retreiving embedded # payload data and decoding to a sender # side defined (struct) type. -_subdecs: dict[ +_payload_decs: dict[ str|None, - msgpack.Decoder] = { + msgpack.Decoder, +] = { + # default decoder is used when `Header.payload_tag == None` None: msgpack.Decoder(Any), } -@cm -def enable_context( - msg_subtypes: list[list[_Struct]] -) -> msgpack.Decoder: +def dec_payload( + msg: Msg, + msg_dec: msgpack.Decoder = msgpack.Decoder( + type=Msg[Any] + ), - for types in msg_subtypes: - first = types[0] +) -> Any|Struct: - # register using the default tag_field of "type" - # which seems to map to the class "name". - tags = [first.__name__] - - # create a tagged union decoder for this type set - type_union = Union[first] - for typ in types[1:]: - type_union |= typ - tags.append(typ.__name__) - - dec = msgpack.Decoder(type_union) - - # register all tags for this union sub-decoder - for tag in tags: - _subdecs[tag] = dec - try: - yield dec - finally: - for tag in tags: - _subdecs.pop(tag) + msg: Msg = msg_dec.decode(msg) + payload_tag: str = msg.header.payload_tag + payload_dec: msgpack.Decoder = _payload_decs[payload_tag] + return payload_dec.decode(msg.pld) -def decmsg(msg: Msg) -> Any: - msg = _root_dec.decode(msg) - tag_field = msg.header.msgtype - dec = _subdecs[tag_field] - return dec.decode(msg.payload) - - -def encmsg( - dialog_id: str | int, +def enc_payload( + enc: msgpack.Encoder, payload: Any, -) -> Msg: + cid: str, - tag_field = None +) -> bytes: - plbytes = _root_enc.encode(payload) - if b'type' in plbytes: - assert isinstance(payload, _Struct) - tag_field = type(payload).__name__ + # tag_field: str|None = None + + plbytes = enc.encode(payload) + if b'msg_type' in plbytes: + assert isinstance(payload, Struct) + + # tag_field: str = type(payload).__name__ payload = Raw(plbytes) msg = Msg( - Header(dialog_id, tag_field), - payload, + cid=cid, + pld=payload, + # Header( + # payload_tag=tag_field, + # # dialog_id, + # ), ) - return _root_enc.encode(msg) + return enc.encode(msg) -- 2.34.1 From e1e87c95c5b8580bece658131a7398aa47b3d65c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 28 Mar 2024 13:07:03 -0400 Subject: [PATCH 008/190] Tweak msg-spec test suite mod name --- tests/{test_caps_msging.py => test_caps_based_msging.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_caps_msging.py => test_caps_based_msging.py} (100%) diff --git a/tests/test_caps_msging.py b/tests/test_caps_based_msging.py similarity index 100% rename from tests/test_caps_msging.py rename to tests/test_caps_based_msging.py -- 2.34.1 From 89bc5ab8c4fbf000687192ae40e09110df301b9d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 28 Mar 2024 13:08:18 -0400 Subject: [PATCH 009/190] Change to multi-line-static-`dict` style msgs Re-arranging such that element-orders are line-arranged to our new IPC `.msg.types.Msg` fields spec in prep for replacing the current `dict`-as-msg impls with the `msgspec.Struct` native versions! --- tractor/_exceptions.py | 4 +++- tractor/_rpc.py | 35 +++++++++++++++++++++++++---------- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 0e1d6d10..b1a8ee63 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -536,7 +536,9 @@ def pack_error( # content's `.msgdata`). error_msg['tb_str'] = tb_str - pkt: dict = {'error': error_msg} + pkt: dict = { + 'error': error_msg, + } if cid: pkt['cid'] = cid diff --git a/tractor/_rpc.py b/tractor/_rpc.py index b108fdda..ef6cbe00 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -84,7 +84,10 @@ async def _invoke_non_context( # TODO: can we unify this with the `context=True` impl below? if inspect.isasyncgen(coro): - await chan.send({'functype': 'asyncgen', 'cid': cid}) + await chan.send({ + 'cid': cid, + 'functype': 'asyncgen', + }) # XXX: massive gotcha! If the containing scope # is cancelled and we execute the below line, # any ``ActorNursery.__aexit__()`` WON'T be @@ -104,18 +107,27 @@ async def _invoke_non_context( # to_send = await chan.recv_nowait() # if to_send is not None: # to_yield = await coro.asend(to_send) - await chan.send({'yield': item, 'cid': cid}) + await chan.send({ + 'yield': item, + 'cid': cid, + }) log.runtime(f"Finished iterating {coro}") # TODO: we should really support a proper # `StopAsyncIteration` system here for returning a final # value if desired - await chan.send({'stop': True, 'cid': cid}) + await chan.send({ + 'stop': True, + 'cid': cid, + }) # one way @stream func that gets treated like an async gen # TODO: can we unify this with the `context=True` impl below? elif treat_as_gen: - await chan.send({'functype': 'asyncgen', 'cid': cid}) + await chan.send({ + 'cid': cid, + 'functype': 'asyncgen', + }) # XXX: the async-func may spawn further tasks which push # back values like an async-generator would but must # manualy construct the response dict-packet-responses as @@ -128,7 +140,10 @@ async def _invoke_non_context( if not cs.cancelled_caught: # task was not cancelled so we can instruct the # far end async gen to tear down - await chan.send({'stop': True, 'cid': cid}) + await chan.send({ + 'stop': True, + 'cid': cid + }) else: # regular async function/method # XXX: possibly just a scheduled `Actor._cancel_task()` @@ -177,10 +192,10 @@ async def _invoke_non_context( and chan.connected() ): try: - await chan.send( - {'return': result, - 'cid': cid} - ) + await chan.send({ + 'return': result, + 'cid': cid, + }) except ( BrokenPipeError, trio.BrokenResourceError, @@ -474,8 +489,8 @@ async def _invoke( # "least sugary" type of RPC ep with support for # bi-dir streaming B) await chan.send({ + 'cid': cid, 'functype': 'context', - 'cid': cid }) # TODO: should we also use an `.open_context()` equiv -- 2.34.1 From 4251ee4c518cd839c435d7e8711bafb5daaad066 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 29 Mar 2024 12:46:59 -0400 Subject: [PATCH 010/190] Drop `MsgCodec.decoder()/.encoder()` design Instead just instantiate `msgpack.Encoder/Decoder` instances inside `mk_codec()` and assign them directly as `._enc/._dec` fields. Explicitly take in named-args to both and proxy to the coder/decoder instantiation calls directly. Shuffling some codec internals: - rename `mk_codec()` inputs as `ipc_msg_spec` and `ipc_pld_spec`, make them mutex such that a payload type spec can't be passed if the built-in msg-spec isn't used. => expose `MsgCodec.ipc_pld_spec` directly from `._dec.type` => presume input `ipc_msg_spec` is `Any` by default when no `ipc_pld_spec` is passed since we have no way atm to enable a similar type-restricted-payload feature without a wrapping "shuttle protocol" ;) - move all the payload-sub-decoders stuff prototyped in GH#311 (inside `.types`) to `._codec` as commented-for-later-maybe `MsgCodec` methods including: - `.mk_pld_subdec()` for registering - `.enc/dec_payload()` for sub-codec field loading. - also comment out `._codec.mk_tagged_union_dec()` as the orig tag-to-decoder table factory, now mostly superseded by `.types.mk_msg_spec()` which takes the generic parameterizing approach instead. - change naming to `types.mk_msg_spec(payload_type_union)` input, making it more explicit that it expects a `Union[Type]`. Oh right, and start exposing all the `.types.Msg` subtypes in the `.msg` subpkg in prep for usage throughout the runtime B) --- tractor/msg/__init__.py | 37 ++++ tractor/msg/_codec.py | 394 +++++++++++++++++++++++----------------- tractor/msg/types.py | 113 +++--------- 3 files changed, 297 insertions(+), 247 deletions(-) diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index b5c261cc..a93fa888 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -33,3 +33,40 @@ from ._codec import ( MsgCodec as MsgCodec, current_msgspec_codec as current_msgspec_codec, ) + +from .types import ( + Msg as Msg, + + Start, # with pld + FuncSpec as FuncSpec, + + StartAck, # with pld + IpcCtxSpec as IpcCtxSpec, + + Started, + Yield, + Stop, + Return, + + Error, # with pld + ErrorData as ErrorData +) + + +# built-in SC shuttle protocol msg type set in +# approx order of the IPC txn-state spaces. +__spec__: list[Msg] = [ + + # inter-actor RPC initiation + Start, + StartAck, + + # no-outcome-yet IAC (inter-actor-communication) + Started, + Yield, + Stop, + + # termination outcomes + Return, + Error, +] diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 5ce02055..e6cb4f1f 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -29,6 +29,7 @@ ToDo: backends we prolly should offer: - https://capnproto.org/language.html#language-reference ''' +from __future__ import annotations from contextvars import ( ContextVar, Token, @@ -54,18 +55,36 @@ from tractor.msg.types import ( ) -# TODO: API changes towards being interchange lib agnostic! +# TODO: overall IPC msg-spec features (i.e. in this mod)! # -# -[ ] capnproto has pre-compiled schema for eg.. -# * https://capnproto.org/language.html -# * http://capnproto.github.io/pycapnp/quickstart.html -# * https://github.com/capnproto/pycapnp/blob/master/examples/addressbook.capnp +# -[ ] API changes towards being interchange lib agnostic! +# -[ ] capnproto has pre-compiled schema for eg.. +# * https://capnproto.org/language.html +# * http://capnproto.github.io/pycapnp/quickstart.html +# * https://github.com/capnproto/pycapnp/blob/master/examples/addressbook.capnp +# +# -[ ] struct aware messaging coders as per: +# -[x] https://github.com/goodboy/tractor/issues/36 +# -[ ] https://github.com/goodboy/tractor/issues/196 +# -[ ] https://github.com/goodboy/tractor/issues/365 # class MsgCodec(Struct): ''' A IPC msg interchange format lib's encoder + decoder pair. ''' + # post-configure-cached when prop-accessed (see `mk_codec()` + # OR can be passed directly as, + # `MsgCodec(_enc=, _dec=)` + _enc: msgpack.Encoder|None = None + _dec: msgpack.Decoder|None = None + + # struct type unions + # https://jcristharif.com/msgspec/structs.html#tagged-unions + @property + def ipc_pld_spec(self) -> Union[Type[Struct]]: + return self._dec.type + lib: ModuleType = msgspec # ad-hoc type extensions @@ -73,16 +92,8 @@ class MsgCodec(Struct): enc_hook: Callable[[Any], Any]|None = None # coder dec_hook: Callable[[type, Any], Any]|None = None # decoder - # struct type unions - # https://jcristharif.com/msgspec/structs.html#tagged-unions - ipc_msg_spec: Union[Type[Struct]]|Any = Any - payload_msg_spec: Union[Type[Struct]] = Any - - # post-configure cached props - _enc: msgpack.Encoder|None = None - _dec: msgpack.Decoder|None = None - # TODO: a sub-decoder system as well? + # payload_msg_specs: Union[Type[Struct]] = Any # see related comments in `.msg.types` # _payload_decs: ( # dict[ @@ -91,42 +102,18 @@ class MsgCodec(Struct): # ] # |None # ) = None + # OR + # ) = { + # # pre-seed decoders for std-py-type-set for use when + # # `Msg.pld == None|Any`. + # None: msgpack.Decoder(Any), + # Any: msgpack.Decoder(Any), + # } # TODO: use `functools.cached_property` for these ? # https://docs.python.org/3/library/functools.html#functools.cached_property @property def enc(self) -> msgpack.Encoder: - return self._enc or self.encoder() - - def encoder( - self, - enc_hook: Callable|None = None, - reset: bool = False, - - # TODO: what's the default for this, and do we care? - # write_buffer_size: int - # - **kwargs, - - ) -> msgpack.Encoder: - ''' - Set or get the maybe-cached `msgspec.msgpack.Encoder` - instance configured for this codec. - - When `reset=True` any previously configured encoder will - be recreated and then cached with the new settings passed - as input. - - ''' - if ( - self._enc is None - or reset - ): - self._enc = self.lib.msgpack.Encoder( - enc_hook=enc_hook or self.enc_hook, - # write_buffer_size=write_buffer_size, - ) - return self._enc def encode( @@ -139,40 +126,10 @@ class MsgCodec(Struct): on a tranport protocol connection. ''' - return self.enc.encode(py_obj) + return self._enc.encode(py_obj) @property def dec(self) -> msgpack.Decoder: - return self._dec or self.decoder() - - def decoder( - self, - ipc_msg_spec: Union[Type[Struct]]|None = None, - dec_hook: Callable|None = None, - reset: bool = False, - **kwargs, - # ext_hook: ext_hook_sig - - ) -> msgpack.Decoder: - ''' - Set or get the maybe-cached `msgspec.msgpack.Decoder` - instance configured for this codec. - - When `reset=True` any previously configured decoder will - be recreated and then cached with the new settings passed - as input. - - ''' - if ( - self._dec is None - or reset - ): - self._dec = self.lib.msgpack.Decoder( - type=ipc_msg_spec or self.ipc_msg_spec, - dec_hook=dec_hook or self.dec_hook, - **kwargs, - ) - return self._dec def decode( @@ -185,60 +142,165 @@ class MsgCodec(Struct): determined by the ''' - return self.dec.decode(msg) + return self._dec.decode(msg) + + # TODO: do we still want to try and support the sub-decoder with + # `.Raw` technique in the case that the `Generic` approach gives + # future grief? + # + # -[ ] + # + #def mk_pld_subdec( + # self, + # payload_types: Union[Type[Struct]], + + #) -> msgpack.Decoder: + # # TODO: sub-decoder suppor for `.pld: Raw`? + # # => see similar notes inside `.msg.types`.. + # # + # # not sure we'll end up needing this though it might have + # # unforeseen advantages in terms of enabling encrypted + # # appliciation layer (only) payloads? + # # + # # register sub-payload decoders to load `.pld: Raw` + # # decoded `Msg`-packets using a dynamic lookup (table) + # # instead of a pre-defined msg-spec via `Generic` + # # parameterization. + # # + # ( + # tags, + # payload_dec, + # ) = mk_tagged_union_dec( + # tagged_structs=list(payload_types.__args__), + # ) + # # register sub-decoders by tag + # subdecs: dict[str, msgpack.Decoder]|None = self._payload_decs + # for name in tags: + # subdecs.setdefault( + # name, + # payload_dec, + # ) + + # return payload_dec + + # sub-decoders for retreiving embedded + # payload data and decoding to a sender + # side defined (struct) type. + # def dec_payload( + # codec: MsgCodec, + # msg: Msg, + + # ) -> Any|Struct: + + # msg: Msg = codec.dec.decode(msg) + # payload_tag: str = msg.header.payload_tag + # payload_dec: msgpack.Decoder = codec._payload_decs[payload_tag] + # return payload_dec.decode(msg.pld) + + # def enc_payload( + # codec: MsgCodec, + # payload: Any, + # cid: str, + + # ) -> bytes: + + # # tag_field: str|None = None + + # plbytes = codec.enc.encode(payload) + # if b'msg_type' in plbytes: + # assert isinstance(payload, Struct) + + # # tag_field: str = type(payload).__name__ + # payload = msgspec.Raw(plbytes) + + # msg = Msg( + # cid=cid, + # pld=payload, + # # Header( + # # payload_tag=tag_field, + # # # dialog_id, + # # ), + # ) + # return codec.enc.encode(msg) -def mk_tagged_union_dec( - tagged_structs: list[Struct], + #def mk_tagged_union_dec( + # tagged_structs: list[Struct], -) -> tuple[ - list[str], - msgpack.Decoder, -]: - # See "tagged unions" docs: - # https://jcristharif.com/msgspec/structs.html#tagged-unions + #) -> tuple[ + # list[str], + # msgpack.Decoder, + #]: + # ''' + # Create a `msgpack.Decoder` for an input `list[msgspec.Struct]` + # and return a `list[str]` of each struct's `tag_field: str` value + # which can be used to "map to" the initialized dec. - # "The quickest way to enable tagged unions is to set tag=True when - # defining every struct type in the union. In this case tag_field - # defaults to "type", and tag defaults to the struct class name - # (e.g. "Get")." - first: Struct = tagged_structs[0] - types_union: Union[Type[Struct]] = Union[ - first - ]|Any - tags: list[str] = [first.__name__] + # ''' + # # See "tagged unions" docs: + # # https://jcristharif.com/msgspec/structs.html#tagged-unions - for struct in tagged_structs[1:]: - types_union |= struct - tags.append(struct.__name__) + # # "The quickest way to enable tagged unions is to set tag=True when + # # defining every struct type in the union. In this case tag_field + # # defaults to "type", and tag defaults to the struct class name + # # (e.g. "Get")." + # first: Struct = tagged_structs[0] + # types_union: Union[Type[Struct]] = Union[ + # first + # ]|Any + # tags: list[str] = [first.__name__] - dec = msgpack.Decoder(types_union) - return ( - tags, - dec, - ) + # for struct in tagged_structs[1:]: + # types_union |= struct + # tags.append( + # getattr( + # struct, + # struct.__struct_config__.tag_field, + # struct.__name__, + # ) + # ) + + # dec = msgpack.Decoder(types_union) + # return ( + # tags, + # dec, + # ) -# TODO: struct aware messaging coders as per: -# - https://github.com/goodboy/tractor/issues/36 -# - https://github.com/goodboy/tractor/issues/196 -# - https://github.com/goodboy/tractor/issues/365 def mk_codec( - libname: str = 'msgspec', - - # for codec-ing boxed `Msg`-with-payload msgs - payload_types: Union[Type[Struct]]|None = None, - - # TODO: do we want to allow NOT/using a diff `Msg`-set? + ipc_msg_spec: Union[Type[Struct]]|Any|None = None, # + # ^TODO^: in the long run, do we want to allow using a diff IPC `Msg`-set? + # it would break the runtime, but maybe say if you wanted + # to add some kinda field-specific or wholesale `.pld` ecryption? + # struct type unions set for `Decoder` # https://jcristharif.com/msgspec/structs.html#tagged-unions - ipc_msg_spec: Union[Type[Struct]]|Any = Any, + ipc_pld_spec: Union[Type[Struct]]|Any|None = None, - cache_now: bool = True, + # TODO: offering a per-msg(-field) type-spec such that + # the fields can be dynamically NOT decoded and left as `Raw` + # values which are later loaded by a sub-decoder specified + # by `tag_field: str` value key? + # payload_msg_specs: dict[ + # str, # tag_field value as sub-decoder key + # Union[Type[Struct]] # `Msg.pld` type spec + # ]|None = None, + + libname: str = 'msgspec', # proxy as `Struct(**kwargs)` + # ------ - ------ + dec_hook: Callable|None = None, + enc_hook: Callable|None = None, + # ------ - ------ **kwargs, + # + # Encoder: + # write_buffer_size=write_buffer_size, + # + # Decoder: + # ext_hook: ext_hook_sig ) -> MsgCodec: ''' @@ -247,75 +309,81 @@ def mk_codec( `msgspec` ;). ''' - # (manually) generate a msg-payload-spec for all relevant - # god-boxing-msg subtypes, parameterizing the `Msg.pld: PayloadT` - # for the decoder such that all sub-type msgs in our SCIPP - # will automatically decode to a type-"limited" payload (`Struct`) - # object (set). - payload_type_spec: Union[Type[Msg]]|None = None - if payload_types: + if ( + ipc_msg_spec is not None + and ipc_pld_spec + ): + raise RuntimeError( + f'If a payload spec is provided,\n' + "the builtin SC-shuttle-protocol's msg set\n" + f'(i.e. `{Msg}`) MUST be used!\n\n' + f'However both values were passed as => mk_codec(\n' + f' ipc_msg_spec={ipc_msg_spec}`\n' + f' ipc_pld_spec={ipc_pld_spec}`\n)\n' + ) + + elif ( + ipc_pld_spec + and + + # XXX required for now (or maybe forever?) until + # we can dream up a way to allow parameterizing and/or + # custom overrides to the `Msg`-spec protocol itself? + ipc_msg_spec is None + ): + # (manually) generate a msg-payload-spec for all relevant + # god-boxing-msg subtypes, parameterizing the `Msg.pld: PayloadT` + # for the decoder such that all sub-type msgs in our SCIPP + # will automatically decode to a type-"limited" payload (`Struct`) + # object (set). ( - payload_type_spec, + ipc_msg_spec, msg_types, ) = mk_msg_spec( - payload_type=payload_types, + payload_type_union=ipc_pld_spec, ) - assert len(payload_type_spec.__args__) == len(msg_types) + assert len(ipc_msg_spec.__args__) == len(msg_types) + assert ipc_msg_spec - # TODO: sub-decode `.pld: Raw`? - # see similar notes inside `.msg.types`.. - # - # not sure we'll end up wanting/needing this - # though it might have unforeseen advantages in terms - # of enabling encrypted appliciation layer (only) - # payloads? - # - # register sub-payload decoders to load `.pld: Raw` - # decoded `Msg`-packets using a dynamic lookup (table) - # instead of a pre-defined msg-spec via `Generic` - # parameterization. - # - # ( - # tags, - # payload_dec, - # ) = mk_tagged_union_dec( - # tagged_structs=list(payload_types.__args__), - # ) - # _payload_decs: ( - # dict[str, msgpack.Decoder]|None - # ) = { - # # pre-seed decoders for std-py-type-set for use when - # # `Msg.pld == None|Any`. - # None: msgpack.Decoder(Any), - # Any: msgpack.Decoder(Any), - # } - # for name in tags: - # _payload_decs[name] = payload_dec + dec = msgpack.Decoder( + type=ipc_msg_spec, # like `Msg[Any]` + ) + + else: + ipc_msg_spec = ipc_msg_spec or Any + + enc = msgpack.Encoder( + enc_hook=enc_hook, + ) + dec = msgpack.Decoder( + type=ipc_msg_spec, # like `Msg[Any]` + dec_hook=dec_hook, + ) codec = MsgCodec( - ipc_msg_spec=ipc_msg_spec, - payload_msg_spec=payload_type_spec, - **kwargs, + _enc=enc, + _dec=dec, + # payload_msg_specs=payload_msg_specs, + # **kwargs, ) - assert codec.lib.__name__ == libname - # by default, config-n-cache the codec pair from input settings. - if cache_now: - assert codec.enc - assert codec.dec + # sanity on expected backend support + assert codec.lib.__name__ == libname return codec # instance of the default `msgspec.msgpack` codec settings, i.e. # no custom structs, hooks or other special types. -_def_msgspec_codec: MsgCodec = mk_codec() +_def_msgspec_codec: MsgCodec = mk_codec(ipc_msg_spec=Any) # NOTE: provides for per-`trio.Task` specificity of the # IPC msging codec used by the transport layer when doing # `Channel.send()/.recv()` of wire data. _ctxvar_MsgCodec: ContextVar[MsgCodec] = ContextVar( 'msgspec_codec', + + # TODO: move this to our new `Msg`-spec! default=_def_msgspec_codec, ) @@ -353,7 +421,7 @@ def limit_msg_spec( payload_types: Union[Type[Struct]], # TODO: don't need this approach right? - # + # -> related to the `MsgCodec._payload_decs` stuff above.. # tagged_structs: list[Struct]|None = None, **codec_kwargs, diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 732a0f5d..7d64e766 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -22,9 +22,7 @@ that is, the "Structurred-Concurrency-Inter-Process-(dialog)-(un)Protocol". ''' - from __future__ import annotations -# from contextlib import contextmanager as cm import types from typing import ( Any, @@ -36,14 +34,12 @@ from typing import ( ) from msgspec import ( - msgpack, - Raw, Struct, UNSET, ) - -# TODO: can also remove yah? +# TODO: sub-decoded `Raw` fields? +# -[ ] see `MsgCodec._payload_decs` notes # # class Header(Struct, tag=True): # ''' @@ -70,7 +66,6 @@ class Msg( tree. ''' - # header: Header # TODO: use UNSET here? cid: str|None # call/context-id @@ -94,9 +89,24 @@ class Msg( pld: PayloadT -# TODO: better name, like `Call/TaskInput`? +# TODO: caps based RPC support in the payload? +# +# -[ ] integration with our ``enable_modules: list[str]`` caps sys. +# ``pkgutil.resolve_name()`` internally uses +# ``importlib.import_module()`` which can be filtered by +# inserting a ``MetaPathFinder`` into ``sys.meta_path`` (which +# we could do before entering the ``Actor._process_messages()`` +# loop)? +# - https://github.com/python/cpython/blob/main/Lib/pkgutil.py#L645 +# - https://stackoverflow.com/questions/1350466/preventing-python-code-from-importing-certain-modules +# - https://stackoverflow.com/a/63320902 +# - https://docs.python.org/3/library/sys.html#sys.meta_path +# +# -[ ] can we combine .ns + .func into a native `NamespacePath` field? +# +# -[ ]better name, like `Call/TaskInput`? +# class FuncSpec(Struct): - # TODO: can we combine these 2 into a `NamespacePath` field? ns: str func: str @@ -249,7 +259,7 @@ class Error(Msg): def mk_msg_spec( - payload_type: Union[Type] = Any, + payload_type_union: Union[Type] = Any, boxing_msg_set: set[Msg] = { Started, Yield, @@ -261,10 +271,13 @@ def mk_msg_spec( list[Type[Msg]], ]: ''' - Generate a payload-type-parameterized `Msg` specification such - that IPC msgs which can be `Msg.pld` (payload) type - limited/filterd are specified given an input `payload_type: - Union[Type]`. + Create a payload-(data-)type-parameterized IPC message specification. + + Allows generating IPC msg types from the above builtin set + with a payload (field) restricted data-type via the `Msg.pld: + PayloadT` type var. This allows runtime-task contexts to use + the python type system to limit/filter payload values as + determined by the input `payload_type_union: Union[Type]`. ''' submsg_types: list[Type[Msg]] = Msg.__subclasses__() @@ -287,7 +300,7 @@ def mk_msg_spec( # -[ ] is there a way to get it to work at module level # just using inheritance or maybe a metaclass? # - # index_paramed_msg_type: Msg = msgtype[payload_type] + # index_paramed_msg_type: Msg = msgtype[payload_type_union] # TODO: WHY do we need to dynamically generate the # subtype-msgs here to ensure the `.pld` parameterization @@ -300,7 +313,7 @@ def mk_msg_spec( ( # XXX NOTE XXX this seems to be THE ONLY # way to get this to work correctly!?! - Msg[payload_type], + Msg[payload_type_union], Generic[PayloadT], ), {}, @@ -322,71 +335,3 @@ def mk_msg_spec( payload_type_spec, msg_types, ) - - -# TODO: integration with our ``enable_modules: list[str]`` caps sys. -# -# ``pkgutil.resolve_name()`` internally uses -# ``importlib.import_module()`` which can be filtered by inserting -# a ``MetaPathFinder`` into ``sys.meta_path`` (which we could do before -# entering the ``Actor._process_messages()`` loop). -# https://github.com/python/cpython/blob/main/Lib/pkgutil.py#L645 -# https://stackoverflow.com/questions/1350466/preventing-python-code-from-importing-certain-modules -# - https://stackoverflow.com/a/63320902 -# - https://docs.python.org/3/library/sys.html#sys.meta_path - -# TODO: do we still want to try and support the sub-decoder with -# `Raw` technique in the case that the `Generic` approach gives -# future grief? -# -# sub-decoders for retreiving embedded -# payload data and decoding to a sender -# side defined (struct) type. -_payload_decs: dict[ - str|None, - msgpack.Decoder, -] = { - # default decoder is used when `Header.payload_tag == None` - None: msgpack.Decoder(Any), -} - - -def dec_payload( - msg: Msg, - msg_dec: msgpack.Decoder = msgpack.Decoder( - type=Msg[Any] - ), - -) -> Any|Struct: - - msg: Msg = msg_dec.decode(msg) - payload_tag: str = msg.header.payload_tag - payload_dec: msgpack.Decoder = _payload_decs[payload_tag] - return payload_dec.decode(msg.pld) - - -def enc_payload( - enc: msgpack.Encoder, - payload: Any, - cid: str, - -) -> bytes: - - # tag_field: str|None = None - - plbytes = enc.encode(payload) - if b'msg_type' in plbytes: - assert isinstance(payload, Struct) - - # tag_field: str = type(payload).__name__ - payload = Raw(plbytes) - - msg = Msg( - cid=cid, - pld=payload, - # Header( - # payload_tag=tag_field, - # # dialog_id, - # ), - ) - return enc.encode(msg) -- 2.34.1 From 48606b6c7765812b2aca63c8a14b6e6fb021e070 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 29 Mar 2024 13:48:08 -0400 Subject: [PATCH 011/190] Tweak msging tests to match codec api changes Mostly adjusting input args/logic to various spec/codec signatures and new runtime semantics: - `test_msg_spec_xor_pld_spec()` to verify that a shuttle prot spec and payload spec are necessarily mutex and that `mk_codec()` enforces it. - switch to `ipc_msg_spec` input in `mk_custom_codec()` helper. - drop buncha commented cruft from `test_limit_msgspec()` including no longer needed type union instance checks in dunder attributes. --- tests/test_caps_based_msging.py | 100 +++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 34 deletions(-) diff --git a/tests/test_caps_based_msging.py b/tests/test_caps_based_msging.py index b101c1e0..98ab7fa3 100644 --- a/tests/test_caps_based_msging.py +++ b/tests/test_caps_based_msging.py @@ -1,5 +1,6 @@ ''' -Functional audits for our "capability based messaging (schema)" feats. +Low-level functional audits for our +"capability based messaging"-spec feats. B~) @@ -22,6 +23,7 @@ from msgspec import ( Struct, ValidationError, ) +import pytest import tractor from tractor.msg import ( _def_msgspec_codec, @@ -34,13 +36,29 @@ from tractor.msg import ( current_msgspec_codec, ) from tractor.msg.types import ( - PayloadT, + # PayloadT, Msg, # Started, mk_msg_spec, ) import trio + +def test_msg_spec_xor_pld_spec(): + ''' + If the `.msg.types.Msg`-set is overridden, we + can't also support a `Msg.pld` spec. + + ''' + # apply custom hooks and set a `Decoder` which only + # loads `NamespacePath` types. + with pytest.raises(RuntimeError): + mk_codec( + ipc_msg_spec=Any, + ipc_pld_spec=NamespacePath, + ) + + # TODO: wrap these into `._codec` such that user can just pass # a type table of some sort? def enc_hook(obj: Any) -> Any: @@ -66,11 +84,13 @@ def ex_func(*args): print(f'ex_func({args})') -def mk_custom_codec() -> MsgCodec: +def mk_custom_codec( + ipc_msg_spec: Type[Any] = Any, +) -> MsgCodec: # apply custom hooks and set a `Decoder` which only # loads `NamespacePath` types. nsp_codec: MsgCodec = mk_codec( - ipc_msg_spec=NamespacePath, + ipc_msg_spec=ipc_msg_spec, enc_hook=enc_hook, dec_hook=dec_hook, ) @@ -225,16 +245,9 @@ def chk_pld_type( pld_val_type: Type = type(pld) # gen_paramed: _GenericAlias = generic[payload_type] - # TODO: verify that the overridden subtypes - # DO NOT have modified type-annots from original! - # 'Start', .pld: FuncSpec - # 'StartAck', .pld: IpcCtxSpec - # 'Stop', .pld: UNSEt - # 'Error', .pld: ErrorData # for typedef in ( # [gen_paramed] # + - # # type-var should always be set for these sub-types # # as well! # Msg.__subclasses__() @@ -246,56 +259,75 @@ def chk_pld_type( # 'Return', # ]: # continue - # payload_type: Type[Struct] = CustomPayload - # TODO: can remove all this right!? - # - # when parameterized (like `Msg[Any]`) then - # we expect an alias as input. - # if isinstance(generic, _GenericAlias): - # assert payload_type in generic.__args__ - # else: - # assert PayloadType in generic.__parameters__ - # pld_param: Parameter = generic.__signature__.parameters['pld'] - # assert pld_param.annotation is PayloadType + # TODO: verify that the overridden subtypes + # DO NOT have modified type-annots from original! + # 'Start', .pld: FuncSpec + # 'StartAck', .pld: IpcCtxSpec + # 'Stop', .pld: UNSEt + # 'Error', .pld: ErrorData - type_spec: Union[Type[Struct]] + + pld_type_spec: Union[Type[Struct]] msg_types: list[Msg[payload_type]] + + # make a one-off dec to compare with our `MsgCodec` instance + # which does the below `mk_msg_spec()` call internally ( - type_spec, + pld_type_spec, msg_types, ) = mk_msg_spec( - payload_type=payload_type, + payload_type_union=payload_type, ) enc = msgpack.Encoder() dec = msgpack.Decoder( - type=type_spec, # like `Msg[Any]` + type=pld_type_spec or Any, # like `Msg[Any]` + ) + + codec: MsgCodec = mk_codec( + # NOTE: this ONLY accepts `Msg.pld` fields of a specified + # type union. + ipc_pld_spec=payload_type, + ) + + # assert codec.dec == dec + # XXX-^ not sure why these aren't "equal" but when cast + # to `str` they seem to match ?? .. kk + assert ( + str(pld_type_spec) + == + str(codec.ipc_pld_spec) + == + str(dec.type) + == + str(codec.dec.type) ) # verify the boxed-type for all variable payload-type msgs. for typedef in msg_types: pld_field = structs.fields(typedef)[1] - assert pld_field.type in {payload_type, PayloadT} - # TODO: does this need to work to get all subtypes to - # adhere? assert pld_field.type is payload_type + # TODO-^ does this need to work to get all subtypes to adhere? kwargs: dict[str, Any] = { 'cid': '666', 'pld': pld, } - enc_msg = typedef(**kwargs) + enc_msg: Msg = typedef(**kwargs) - wire_bytes: bytes = enc.encode(enc_msg) + wire_bytes: bytes = codec.enc.encode(enc_msg) + _wire_bytes: bytes = enc.encode(enc_msg) try: - dec_msg = dec.decode(wire_bytes) + _dec_msg = dec.decode(wire_bytes) + dec_msg = codec.dec.decode(wire_bytes) + assert dec_msg.pld == pld - assert (roundtrip := (dec_msg == enc_msg)) + assert _dec_msg.pld == pld + assert (roundtrip := (_dec_msg == enc_msg)) except ValidationError as ve: - # breakpoint() if pld_val_type is payload_type: raise ValueError( 'Got `ValidationError` despite type-var match!?\n' -- 2.34.1 From a38ac07af513ada34cff0db87a95f5e73acd0788 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 29 Mar 2024 18:46:37 -0400 Subject: [PATCH 012/190] Be mega pedantic with msg-spec building Turns out the generics based payload speccing API, as in https://jcristharif.com/msgspec/supported-types.html#generic-types, DOES WORK properly as long as we don't rely on inheritance from `Msg` a parent `Generic`.. So let's get real pedantic in the `mk_msg_spec()` internals as well as verification in the test suite! Fixes in `.msg.types`: - implement (as part of tinker testing) multiple spec union building methods via a `spec_build_method: str` to `mk_msg_spec()` and leave a buncha notes around what did and didn't work: - 'indexed_generics' is the only method THAT WORKS and the one that you'd expect being closest to the `msgspec` docs (link above). - 'defstruct' using dynamically defined msgs => doesn't work! - 'types_new_class' using dynamically defined msgs but with `types.new_clas()` => ALSO doesn't work.. - explicitly separate the `.pld` type-constrainable by user code msg set into `types._payload_spec_msgs` putting the others in a `types._runtime_spec_msgs` and the full set defined as `.__spec__` (moving it out of the pkg-mod and back to `.types` as well). - for the `_payload_spec_msgs` msgs manually make them inherit `Generic[PayloadT]` and (redunantly) define a `.pld: PayloadT` field. - make `IpcCtxSpec.functype` an in line `Literal`. - toss in some TODO notes about choosing a better `Msg.cid` type. Fixes/tweaks around `.msg._codec`: - rename `MsgCodec.ipc/pld_msg_spec` -> `.msg/pld_spec` - make `._enc/._dec` non optional fields - wow, ^facepalm^ , make sure `._ipc.MsgpackTCPStream.__init__()` uses `mk_codec()` since `MsgCodec` can't be (easily) constructed directly. Get more detailed in testing: - inside the `chk_pld_type()` helper ensure `roundtrip` is always set to some value, `None` by default but a bool depending on legit outcome. - drop input `generic`; no longer used. - drop the masked `typedef` loop from `Msg.__subclasses__()`. - for add an `expect_roundtrip: bool` and use to jump into debugger when any expectation doesn't match the outcome. - use new `MsgCodec` field names (as per first section above). - ensure the encoded msg matches the decoded one from both the ad-hoc decoder and codec loaded values. - ensure the pld checking is only applied to msgs in the `types._payload_spec_msgs` set by `typef.__name__` filtering since `mk_msg_spec()` now returns the full `.types.Msg` set. --- tests/test_caps_based_msging.py | 150 +++++++++++++--------- tractor/_ipc.py | 3 +- tractor/msg/__init__.py | 38 ++---- tractor/msg/_codec.py | 32 +++-- tractor/msg/types.py | 220 ++++++++++++++++++++++++-------- 5 files changed, 289 insertions(+), 154 deletions(-) diff --git a/tests/test_caps_based_msging.py b/tests/test_caps_based_msging.py index 98ab7fa3..abdda0a5 100644 --- a/tests/test_caps_based_msging.py +++ b/tests/test_caps_based_msging.py @@ -35,6 +35,7 @@ from tractor.msg import ( apply_codec, current_msgspec_codec, ) +from tractor.msg import types from tractor.msg.types import ( # PayloadT, Msg, @@ -235,31 +236,15 @@ def test_codec_hooks_mod(): def chk_pld_type( - generic: Msg|_GenericAlias, - payload_type: Type[Struct]|Any, + payload_spec: Type[Struct]|Any, pld: Any, + expect_roundtrip: bool|None = None, + ) -> bool: - roundtrip: bool = False pld_val_type: Type = type(pld) - # gen_paramed: _GenericAlias = generic[payload_type] - # for typedef in ( - # [gen_paramed] - # + - # # type-var should always be set for these sub-types - # # as well! - # Msg.__subclasses__() - # ): - # if typedef.__name__ not in [ - # 'Msg', - # 'Started', - # 'Yield', - # 'Return', - # ]: - # continue - # TODO: verify that the overridden subtypes # DO NOT have modified type-annots from original! # 'Start', .pld: FuncSpec @@ -267,48 +252,64 @@ def chk_pld_type( # 'Stop', .pld: UNSEt # 'Error', .pld: ErrorData - - pld_type_spec: Union[Type[Struct]] - msg_types: list[Msg[payload_type]] - - # make a one-off dec to compare with our `MsgCodec` instance - # which does the below `mk_msg_spec()` call internally - ( - pld_type_spec, - msg_types, - ) = mk_msg_spec( - payload_type_union=payload_type, - ) - enc = msgpack.Encoder() - dec = msgpack.Decoder( - type=pld_type_spec or Any, # like `Msg[Any]` - ) - codec: MsgCodec = mk_codec( # NOTE: this ONLY accepts `Msg.pld` fields of a specified # type union. - ipc_pld_spec=payload_type, + ipc_pld_spec=payload_spec, + ) + + # make a one-off dec to compare with our `MsgCodec` instance + # which does the below `mk_msg_spec()` call internally + ipc_msg_spec: Union[Type[Struct]] + msg_types: list[Msg[payload_spec]] + ( + ipc_msg_spec, + msg_types, + ) = mk_msg_spec( + payload_type_union=payload_spec, + ) + _enc = msgpack.Encoder() + _dec = msgpack.Decoder( + type=ipc_msg_spec or Any, # like `Msg[Any]` + ) + + assert ( + payload_spec + == + codec.pld_spec ) # assert codec.dec == dec - # XXX-^ not sure why these aren't "equal" but when cast + # + # ^-XXX-^ not sure why these aren't "equal" but when cast # to `str` they seem to match ?? .. kk + assert ( - str(pld_type_spec) + str(ipc_msg_spec) == - str(codec.ipc_pld_spec) + str(codec.msg_spec) == - str(dec.type) + str(_dec.type) == str(codec.dec.type) ) # verify the boxed-type for all variable payload-type msgs. + if not msg_types: + breakpoint() + + roundtrip: bool|None = None + pld_spec_msg_names: list[str] = [ + td.__name__ for td in types._payload_spec_msgs + ] for typedef in msg_types: + skip_runtime_msg: bool = typedef.__name__ not in pld_spec_msg_names + if skip_runtime_msg: + continue + pld_field = structs.fields(typedef)[1] - assert pld_field.type is payload_type - # TODO-^ does this need to work to get all subtypes to adhere? + assert pld_field.type is payload_spec # TODO-^ does this need to work to get all subtypes to adhere? kwargs: dict[str, Any] = { 'cid': '666', @@ -316,44 +317,72 @@ def chk_pld_type( } enc_msg: Msg = typedef(**kwargs) + _wire_bytes: bytes = _enc.encode(enc_msg) wire_bytes: bytes = codec.enc.encode(enc_msg) - _wire_bytes: bytes = enc.encode(enc_msg) + assert _wire_bytes == wire_bytes + ve: ValidationError|None = None try: - _dec_msg = dec.decode(wire_bytes) dec_msg = codec.dec.decode(wire_bytes) + _dec_msg = _dec.decode(wire_bytes) - assert dec_msg.pld == pld - assert _dec_msg.pld == pld - assert (roundtrip := (_dec_msg == enc_msg)) + # decoded msg and thus payload should be exactly same! + assert (roundtrip := ( + _dec_msg + == + dec_msg + == + enc_msg + )) - except ValidationError as ve: - if pld_val_type is payload_type: + if ( + expect_roundtrip is not None + and expect_roundtrip != roundtrip + ): + breakpoint() + + assert ( + pld + == + dec_msg.pld + == + enc_msg.pld + ) + # assert (roundtrip := (_dec_msg == enc_msg)) + + except ValidationError as _ve: + ve = _ve + roundtrip: bool = False + if pld_val_type is payload_spec: raise ValueError( 'Got `ValidationError` despite type-var match!?\n' f'pld_val_type: {pld_val_type}\n' - f'payload_type: {payload_type}\n' + f'payload_type: {payload_spec}\n' ) from ve else: # ow we good cuz the pld spec mismatched. print( 'Got expected `ValidationError` since,\n' - f'{pld_val_type} is not {payload_type}\n' + f'{pld_val_type} is not {payload_spec}\n' ) else: if ( - pld_val_type is not payload_type - and payload_type is not Any + payload_spec is not Any + and + pld_val_type is not payload_spec ): raise ValueError( 'DID NOT `ValidationError` despite expected type match!?\n' f'pld_val_type: {pld_val_type}\n' - f'payload_type: {payload_type}\n' + f'payload_type: {payload_spec}\n' ) - return roundtrip + # full code decode should always be attempted! + if roundtrip is None: + breakpoint() + return roundtrip def test_limit_msgspec(): @@ -365,9 +394,10 @@ def test_limit_msgspec(): # ensure we can round-trip a boxing `Msg` assert chk_pld_type( - Msg, + # Msg, Any, None, + expect_roundtrip=True, ) # TODO: don't need this any more right since @@ -379,7 +409,7 @@ def test_limit_msgspec(): # verify that a mis-typed payload value won't decode assert not chk_pld_type( - Msg, + # Msg, int, pld='doggy', ) @@ -392,13 +422,13 @@ def test_limit_msgspec(): value: Any assert not chk_pld_type( - Msg, + # Msg, CustomPayload, pld='doggy', ) assert chk_pld_type( - Msg, + # Msg, CustomPayload, pld=CustomPayload(name='doggy', value='urmom') ) diff --git a/tractor/_ipc.py b/tractor/_ipc.py index b1c2ccd2..5f71c38c 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -48,6 +48,7 @@ from tractor._exceptions import TransportClosed from tractor.msg import ( _ctxvar_MsgCodec, MsgCodec, + mk_codec, ) log = get_logger(__name__) @@ -162,7 +163,7 @@ class MsgpackTCPStream(MsgTransport): # allow for custom IPC msg interchange format # dynamic override Bo - self.codec: MsgCodec = codec or MsgCodec() + self.codec: MsgCodec = codec or mk_codec() async def _iter_packets(self) -> AsyncGenerator[dict, None]: ''' diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index a93fa888..0c8809a9 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -37,36 +37,20 @@ from ._codec import ( from .types import ( Msg as Msg, - Start, # with pld + Start as Start, # with pld FuncSpec as FuncSpec, - StartAck, # with pld + StartAck as StartAck, # with pld IpcCtxSpec as IpcCtxSpec, - Started, - Yield, - Stop, - Return, + Started as Started, + Yield as Yield, + Stop as Stop, + Return as Return, - Error, # with pld - ErrorData as ErrorData + Error as Error, # with pld + ErrorData as ErrorData, + + # full msg spec set + __spec__ as __spec__, ) - - -# built-in SC shuttle protocol msg type set in -# approx order of the IPC txn-state spaces. -__spec__: list[Msg] = [ - - # inter-actor RPC initiation - Start, - StartAck, - - # no-outcome-yet IAC (inter-actor-communication) - Started, - Yield, - Stop, - - # termination outcomes - Return, - Error, -] diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index e6cb4f1f..4477d393 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -73,16 +73,15 @@ class MsgCodec(Struct): A IPC msg interchange format lib's encoder + decoder pair. ''' - # post-configure-cached when prop-accessed (see `mk_codec()` - # OR can be passed directly as, - # `MsgCodec(_enc=, _dec=)` - _enc: msgpack.Encoder|None = None - _dec: msgpack.Decoder|None = None + _enc: msgpack.Encoder + _dec: msgpack.Decoder + + pld_spec: Union[Type[Struct]]|None # struct type unions # https://jcristharif.com/msgspec/structs.html#tagged-unions @property - def ipc_pld_spec(self) -> Union[Type[Struct]]: + def msg_spec(self) -> Union[Type[Struct]]: return self._dec.type lib: ModuleType = msgspec @@ -142,6 +141,7 @@ class MsgCodec(Struct): determined by the ''' + # https://jcristharif.com/msgspec/usage.html#typed-decoding return self._dec.decode(msg) # TODO: do we still want to try and support the sub-decoder with @@ -149,6 +149,7 @@ class MsgCodec(Struct): # future grief? # # -[ ] + # -> https://jcristharif.com/msgspec/api.html#raw # #def mk_pld_subdec( # self, @@ -224,6 +225,20 @@ class MsgCodec(Struct): # return codec.enc.encode(msg) + +# TODO: sub-decoded `Raw` fields? +# -[ ] see `MsgCodec._payload_decs` notes +# +# XXX if we wanted something more complex then field name str-keys +# we might need a header field type to describe the lookup sys? +# class Header(Struct, tag=True): +# ''' +# A msg header which defines payload properties + +# ''' +# payload_tag: str|None = None + + #def mk_tagged_union_dec( # tagged_structs: list[Struct], @@ -345,10 +360,6 @@ def mk_codec( assert len(ipc_msg_spec.__args__) == len(msg_types) assert ipc_msg_spec - dec = msgpack.Decoder( - type=ipc_msg_spec, # like `Msg[Any]` - ) - else: ipc_msg_spec = ipc_msg_spec or Any @@ -363,6 +374,7 @@ def mk_codec( codec = MsgCodec( _enc=enc, _dec=dec, + pld_spec=ipc_pld_spec, # payload_msg_specs=payload_msg_specs, # **kwargs, ) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 7d64e766..2411f0f9 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -34,20 +34,13 @@ from typing import ( ) from msgspec import ( + defstruct, + # field, Struct, UNSET, + UnsetType, ) -# TODO: sub-decoded `Raw` fields? -# -[ ] see `MsgCodec._payload_decs` notes -# -# class Header(Struct, tag=True): -# ''' -# A msg header which defines payload properties - -# ''' -# payload_tag: str|None = None - # type variable for the boxed payload field `.pld` PayloadT = TypeVar('PayloadT') @@ -57,6 +50,9 @@ class Msg( Generic[PayloadT], tag=True, tag_field='msg_type', + + # eq=True, + # order=True, ): ''' The "god" boxing msg type. @@ -66,8 +62,13 @@ class Msg( tree. ''' - # TODO: use UNSET here? cid: str|None # call/context-id + # ^-TODO-^: more explicit type? + # -[ ] use UNSET here? + # https://jcristharif.com/msgspec/supported-types.html#unset + # + # -[ ] `uuid.UUID` which has multi-protocol support + # https://jcristharif.com/msgspec/supported-types.html#uuid # The msgs "payload" (spelled without vowels): # https://en.wikipedia.org/wiki/Payload_(computing) @@ -136,19 +137,18 @@ class Start( pld: FuncSpec -FuncType: Literal[ - 'asyncfunc', - 'asyncgen', - 'context', # TODO: the only one eventually? -] = 'context' - - class IpcCtxSpec(Struct): ''' An inter-actor-`trio.Task`-comms `Context` spec. ''' - functype: FuncType + # TODO: maybe better names for all these? + # -[ ] obvi ^ would need sync with `._rpc` + functype: Literal[ + 'asyncfunc', + 'asyncgen', + 'context', # TODO: the only one eventually? + ] # TODO: as part of the reponse we should report our allowed # msg spec which should be generated from the type-annots as @@ -182,6 +182,7 @@ class Started( decorated IPC endpoint. ''' + pld: PayloadT # TODO: instead of using our existing `Start` @@ -198,6 +199,7 @@ class Yield( Per IPC transmission of a value from `await MsgStream.send()`. ''' + pld: PayloadT class Stop(Msg): @@ -206,7 +208,7 @@ class Stop(Msg): of `StopAsyncIteration`. ''' - pld: UNSET + pld: UnsetType = UNSET class Return( @@ -218,6 +220,7 @@ class Return( func-as-`trio.Task`. ''' + pld: PayloadT class ErrorData(Struct): @@ -258,13 +261,47 @@ class Error(Msg): # cid: str +# built-in SC shuttle protocol msg type set in +# approx order of the IPC txn-state spaces. +__spec__: list[Msg] = [ + + # inter-actor RPC initiation + Start, + StartAck, + + # no-outcome-yet IAC (inter-actor-communication) + Started, + Yield, + Stop, + + # termination outcomes + Return, + Error, +] + +_runtime_spec_msgs: list[Msg] = [ + Start, + StartAck, + Stop, + Error, +] +_payload_spec_msgs: list[Msg] = [ + Started, + Yield, + Return, +] + + def mk_msg_spec( payload_type_union: Union[Type] = Any, - boxing_msg_set: set[Msg] = { - Started, - Yield, - Return, - }, + + # boxing_msg_set: list[Msg] = _payload_spec_msgs, + spec_build_method: Literal[ + 'indexed_generics', # works + 'defstruct', + 'types_new_class', + + ] = 'indexed_generics', ) -> tuple[ Union[Type[Msg]], @@ -281,26 +318,58 @@ def mk_msg_spec( ''' submsg_types: list[Type[Msg]] = Msg.__subclasses__() + bases: tuple = ( + # XXX NOTE XXX the below generic-parameterization seems to + # be THE ONLY way to get this to work correctly in terms + # of getting ValidationError on a roundtrip? + Msg[payload_type_union], + Generic[PayloadT], + ) + defstruct_bases: tuple = ( + Msg, # [payload_type_union], + # Generic[PayloadT], + # ^-XXX-^: not allowed? lul.. + ) + ipc_msg_types: list[Msg] = [] - # TODO: see below as well, - # => union building approach with `.__class_getitem__()` - # doesn't seem to work..? - # - # payload_type_spec: Union[Type[Msg]] - # - msg_types: list[Msg] = [] - for msgtype in boxing_msg_set: + idx_msg_types: list[Msg] = [] + defs_msg_types: list[Msg] = [] + nc_msg_types: list[Msg] = [] + + for msgtype in __spec__: + + # for the NON-payload (user api) type specify-able + # msgs types, we simply aggregate the def as is + # for inclusion in the output type `Union`. + if msgtype not in _payload_spec_msgs: + ipc_msg_types.append(msgtype) + continue # check inheritance sanity assert msgtype in submsg_types # TODO: wait why do we need the dynamic version here? - # -[ ] paraming the `PayloadT` values via `Generic[T]` - # doesn't seem to work at all? - # -[ ] is there a way to get it to work at module level - # just using inheritance or maybe a metaclass? + # XXX ANSWER XXX -> BC INHERITANCE.. don't work w generics.. # - # index_paramed_msg_type: Msg = msgtype[payload_type_union] + # NOTE previously bc msgtypes WERE NOT inheritting + # directly the `Generic[PayloadT]` type, the manual method + # of generic-paraming with `.__class_getitem__()` wasn't + # working.. + # + # XXX but bc i changed that to make every subtype inherit + # it, this manual "indexed parameterization" method seems + # to work? + # + # -[x] paraming the `PayloadT` values via `Generic[T]` + # does work it seems but WITHOUT inheritance of generics + # + # -[-] is there a way to get it to work at module level + # just using inheritance or maybe a metaclass? + # => thot that `defstruct` might work, but NOPE, see + # below.. + # + idxed_msg_type: Msg = msgtype[payload_type_union] + idx_msg_types.append(idxed_msg_type) # TODO: WHY do we need to dynamically generate the # subtype-msgs here to ensure the `.pld` parameterization @@ -308,30 +377,69 @@ def mk_msg_spec( # `msgpack.Decoder()`..? # # dynamically create the payload type-spec-limited msg set. - manual_paramed_msg_subtype: Type = types.new_class( - msgtype.__name__, - ( - # XXX NOTE XXX this seems to be THE ONLY - # way to get this to work correctly!?! - Msg[payload_type_union], - Generic[PayloadT], - ), - {}, + newclass_msgtype: Type = types.new_class( + name=msgtype.__name__, + bases=bases, + kwds={}, + ) + nc_msg_types.append( + newclass_msgtype[payload_type_union] ) - # TODO: grok the diff here better.. + # with `msgspec.structs.defstruct` + # XXX ALSO DOESN'T WORK + defstruct_msgtype = defstruct( + name=msgtype.__name__, + fields=[ + ('cid', str), + + # XXX doesn't seem to work.. + # ('pld', PayloadT), + + ('pld', payload_type_union), + ], + bases=defstruct_bases, + ) + defs_msg_types.append(defstruct_msgtype) + # assert index_paramed_msg_type == manual_paramed_msg_subtype - # XXX TODO: why does the manual method work but not the - # `.__class_getitem__()` one!?! - paramed_msg_type = manual_paramed_msg_subtype + # paramed_msg_type = manual_paramed_msg_subtype - # payload_type_spec |= paramed_msg_type - msg_types.append(paramed_msg_type) + # ipc_payload_msgs_type_union |= index_paramed_msg_type + idx_spec: Union[Type[Msg]] = Union[*idx_msg_types] + def_spec: Union[Type[Msg]] = Union[*defs_msg_types] + nc_spec: Union[Type[Msg]] = Union[*nc_msg_types] + + specs: dict[str, Union[Type[Msg]]] = { + 'indexed_generics': idx_spec, + 'defstruct': def_spec, + 'types_new_class': nc_spec, + } + msgtypes_table: dict[str, list[Msg]] = { + 'indexed_generics': idx_msg_types, + 'defstruct': defs_msg_types, + 'types_new_class': nc_msg_types, + } + + # XXX lol apparently type unions can't ever + # be equal eh? + # TODO: grok the diff here better.. + # + # assert ( + # idx_spec + # == + # nc_spec + # == + # def_spec + # ) + # breakpoint() + + pld_spec: Union[Type] = specs[spec_build_method] + runtime_spec: Union[Type] = Union[*ipc_msg_types] - payload_type_spec: Union[Type[Msg]] = Union[*msg_types] return ( - payload_type_spec, - msg_types, + pld_spec | runtime_spec, + msgtypes_table[spec_build_method] + ipc_msg_types, ) -- 2.34.1 From 8468bcca3651b524d4982e9ef1ce731ae4b78bb2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Apr 2024 09:21:30 -0400 Subject: [PATCH 013/190] Get msg spec type limiting working with a `RunVar` Since `contextvars.ContextVar` seems to reset to the default in every new task, switching to using `trio.lowlevel.RunVar` kinda gets close to what we'd like where a child scope can override what's in the rent but ideally without modifying the rent's. I tried `tricycle.TreeVar` as well but it also seems to reset across (embedded) nurseries in our runtime; need to try it again bc apparently that's not how it's suppose to work? NOTE that for now i'm keeping the `.msg.types._ctxvar_MsgCodec` set to the `msgspec` default (`Any` types) so that the test suite will still pass until the runtime is ported to the new msg-spec + codec. Surrounding and in support of all this the `Msg`-set impl deats changed a bit as well as various stuff in `.msg` sub-mods: - drop the `.pld` struct types for `Error`, `Start`, `StartAck` since we don't really need the `.pld` payload field in those cases since they're runtime control msgs for starting RPC tasks and handling remote errors; we can just put the fields directly on each msg since the user will never want/need to override the `.pld` field type. - add a couple new runtime msgs and include them in `msg.__spec__` and make them NOT inherit from `Msg` since they are runtime-specific and thus have no need for `.pld` type constraints: - `Aid` the actor-id identity handshake msg. - `SpawnSpec`: the spawn data passed from a parent actor down to a a child in `Actor._from_parent()` for which we need a shuttle protocol msg, so might as well make it a pendatic one ;) - fix some `Actor.uid` field types that were type-borked on `Error` - add notes about how we need built-in `debug_mode` msgs in order to avoid msg-type errors when using the TTY lock machinery and a different `.pld` spec then the default `Any` is in use.. -> since `devx._debug.lock_tty_for_child()` and it's client side `wait_for_parent_stdin_hijack()` use `Context.started('Locked')` and `MsgStream.send('pdb_unlock')` string values as their `.pld` contents we'd need to either always do a `ipc_pld_spec | str` or pre-define some dedicated `Msg` types which get `Union`-ed in for this? - break out `msg.pretty_struct.Struct._sin_props()` into a helper func `iter_fields()` since the impl doesn't require a struct instance. - as mentioned above since `ContextVar` didn't work as anticipated I next tried `tricycle.TreeVar` but that too didn't seem to keep the `apply_codec()` setting intact across `Portal.open_context()`/`Context.open_stream()` (it kept reverting to the default `.pld: Any` default setting) so I finalized on a trio.lowlevel.RunVar` for now despite it basically being a `global`.. -> will probably come back to test this with `TreeVar` and some hot tips i picked up from @mikenerone in the `trio` gitter, which i put in comments surrounding proto-code. --- tractor/msg/__init__.py | 13 +- tractor/msg/_codec.py | 95 ++++++++++-- tractor/msg/pretty_struct.py | 43 +++--- tractor/msg/types.py | 270 +++++++++++++++++++++++++++++------ 4 files changed, 337 insertions(+), 84 deletions(-) diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index 0c8809a9..d8f37477 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -31,25 +31,24 @@ from ._codec import ( apply_codec as apply_codec, mk_codec as mk_codec, MsgCodec as MsgCodec, - current_msgspec_codec as current_msgspec_codec, + current_codec as current_codec, ) from .types import ( Msg as Msg, - Start as Start, # with pld - FuncSpec as FuncSpec, + Aid as Aid, + SpawnSpec as SpawnSpec, - StartAck as StartAck, # with pld - IpcCtxSpec as IpcCtxSpec, + Start as Start, + StartAck as StartAck, Started as Started, Yield as Yield, Stop as Stop, Return as Return, - Error as Error, # with pld - ErrorData as ErrorData, + Error as Error, # full msg spec set __spec__ as __spec__, diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 4477d393..32a58a56 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -30,13 +30,13 @@ ToDo: backends we prolly should offer: ''' from __future__ import annotations -from contextvars import ( - ContextVar, - Token, -) from contextlib import ( contextmanager as cm, ) +# from contextvars import ( +# ContextVar, +# Token, +# ) from typing import ( Any, Callable, @@ -47,6 +47,12 @@ from types import ModuleType import msgspec from msgspec import msgpack +from trio.lowlevel import ( + RunVar, + RunVarToken, +) +# TODO: see notes below from @mikenerone.. +# from tricycle import TreeVar from tractor.msg.pretty_struct import Struct from tractor.msg.types import ( @@ -72,6 +78,9 @@ class MsgCodec(Struct): ''' A IPC msg interchange format lib's encoder + decoder pair. + Pretty much nothing more then delegation to underlying + `msgspec..Encoder/Decoder`s for now. + ''' _enc: msgpack.Encoder _dec: msgpack.Decoder @@ -86,11 +95,6 @@ class MsgCodec(Struct): lib: ModuleType = msgspec - # ad-hoc type extensions - # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types - enc_hook: Callable[[Any], Any]|None = None # coder - dec_hook: Callable[[type, Any], Any]|None = None # decoder - # TODO: a sub-decoder system as well? # payload_msg_specs: Union[Type[Struct]] = Any # see related comments in `.msg.types` @@ -304,7 +308,8 @@ def mk_codec( libname: str = 'msgspec', - # proxy as `Struct(**kwargs)` + # proxy as `Struct(**kwargs)` for ad-hoc type extensions + # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types # ------ - ------ dec_hook: Callable|None = None, enc_hook: Callable|None = None, @@ -389,14 +394,52 @@ def mk_codec( # no custom structs, hooks or other special types. _def_msgspec_codec: MsgCodec = mk_codec(ipc_msg_spec=Any) -# NOTE: provides for per-`trio.Task` specificity of the +# The built-in IPC `Msg` spec. +# Our composing "shuttle" protocol which allows `tractor`-app code +# to use any `msgspec` supported type as the `Msg.pld` payload, +# https://jcristharif.com/msgspec/supported-types.html +# +_def_tractor_codec: MsgCodec = mk_codec( + ipc_pld_spec=Any, +) +# TODO: IDEALLY provides for per-`trio.Task` specificity of the # IPC msging codec used by the transport layer when doing # `Channel.send()/.recv()` of wire data. -_ctxvar_MsgCodec: ContextVar[MsgCodec] = ContextVar( + +# ContextVar-TODO: DIDN'T WORK, kept resetting in every new task to default!? +# _ctxvar_MsgCodec: ContextVar[MsgCodec] = ContextVar( + +# TreeVar-TODO: DIDN'T WORK, kept resetting in every new embedded nursery +# even though it's supposed to inherit from a parent context ??? +# +# _ctxvar_MsgCodec: TreeVar[MsgCodec] = TreeVar( +# +# ^-NOTE-^: for this to work see the mods by @mikenerone from `trio` gitter: +# +# 22:02:54 even for regular contextvars, all you have to do is: +# `task: Task = trio.lowlevel.current_task()` +# `task.parent_nursery.parent_task.context.run(my_ctx_var.set, new_value)` +# +# From a comment in his prop code he couldn't share outright: +# 1. For every TreeVar set in the current task (which covers what +# we need from SynchronizerFacade), walk up the tree until the +# root or finding one where the TreeVar is already set, setting +# it in all of the contexts along the way. +# 2. For each of those, we also forcibly set the values that are +# pending for child nurseries that have not yet accessed the +# TreeVar. +# 3. We similarly set the pending values for the child nurseries +# of the *current* task. +# + +# TODO: STOP USING THIS, since it's basically a global and won't +# allow sub-IPC-ctxs to limit the msg-spec however desired.. +_ctxvar_MsgCodec: MsgCodec = RunVar( 'msgspec_codec', # TODO: move this to our new `Msg`-spec! default=_def_msgspec_codec, + # default=_def_tractor_codec, ) @@ -410,15 +453,36 @@ def apply_codec( runtime context such that all IPC msgs are processed with it for that task. + Uses a `tricycle.TreeVar` to ensure the scope of the codec + matches the `@cm` block and DOES NOT change to the original + (default) value in new tasks (as it does for `ContextVar`). + + See the docs: + - https://tricycle.readthedocs.io/en/latest/reference.html#tree-variables + - https://github.com/oremanj/tricycle/blob/master/tricycle/_tests/test_tree_var.py + ''' - token: Token = _ctxvar_MsgCodec.set(codec) + orig: MsgCodec = _ctxvar_MsgCodec.get() + assert orig is not codec + token: RunVarToken = _ctxvar_MsgCodec.set(codec) + + # TODO: for TreeVar approach, see docs for @cm `.being()` API: + # https://tricycle.readthedocs.io/en/latest/reference.html#tree-variables + # try: + # with _ctxvar_MsgCodec.being(codec): + # new = _ctxvar_MsgCodec.get() + # assert new is codec + # yield codec + try: yield _ctxvar_MsgCodec.get() finally: _ctxvar_MsgCodec.reset(token) + assert _ctxvar_MsgCodec.get() is orig -def current_msgspec_codec() -> MsgCodec: + +def current_codec() -> MsgCodec: ''' Return the current `trio.Task.context`'s value for `msgspec_codec` used by `Channel.send/.recv()` @@ -449,5 +513,6 @@ def limit_msg_spec( payload_types=payload_types, **codec_kwargs, ) - with apply_codec(msgspec_codec): + with apply_codec(msgspec_codec) as applied_codec: + assert applied_codec is msgspec_codec yield msgspec_codec diff --git a/tractor/msg/pretty_struct.py b/tractor/msg/pretty_struct.py index 143fc7a4..412b6ed6 100644 --- a/tractor/msg/pretty_struct.py +++ b/tractor/msg/pretty_struct.py @@ -80,6 +80,28 @@ class DiffDump(UserList): return repstr +def iter_fields(struct: Struct) -> Iterator[ + tuple[ + structs.FieldIinfo, + str, + Any, + ] +]: + ''' + Iterate over all non-@property fields of this struct. + + ''' + fi: structs.FieldInfo + for fi in structs.fields(struct): + key: str = fi.name + val: Any = getattr(struct, key) + yield ( + fi, + key, + val, + ) + + class Struct( _Struct, @@ -91,23 +113,6 @@ class Struct( A "human friendlier" (aka repl buddy) struct subtype. ''' - def _sin_props(self) -> Iterator[ - tuple[ - structs.FieldIinfo, - str, - Any, - ] - ]: - ''' - Iterate over all non-@property fields of this struct. - - ''' - fi: structs.FieldInfo - for fi in structs.fields(self): - key: str = fi.name - val: Any = getattr(self, key) - yield fi, key, val - def to_dict( self, include_non_members: bool = True, @@ -130,7 +135,7 @@ class Struct( # added as type-defined `@property` methods! sin_props: dict = {} fi: structs.FieldInfo - for fi, k, v in self._sin_props(): + for fi, k, v in iter_fields(self): sin_props[k] = asdict[k] return sin_props @@ -159,7 +164,7 @@ class Struct( fi: structs.FieldInfo k: str v: Any - for fi, k, v in self._sin_props(): + for fi, k, v in iter_fields(self): # TODO: how can we prefer `Literal['option1', 'option2, # ..]` over .__name__ == `Literal` but still get only the diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 2411f0f9..a81473d7 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -26,6 +26,7 @@ from __future__ import annotations import types from typing import ( Any, + Callable, Generic, Literal, Type, @@ -37,8 +38,12 @@ from msgspec import ( defstruct, # field, Struct, - UNSET, - UnsetType, + # UNSET, + # UnsetType, +) + +from tractor.msg import ( + pretty_struct, ) # type variable for the boxed payload field `.pld` @@ -48,11 +53,19 @@ PayloadT = TypeVar('PayloadT') class Msg( Struct, Generic[PayloadT], + + # https://jcristharif.com/msgspec/structs.html#tagged-unions tag=True, tag_field='msg_type', - # eq=True, + # https://jcristharif.com/msgspec/structs.html#field-ordering + # kw_only=True, + + # https://jcristharif.com/msgspec/structs.html#equality-and-order # order=True, + + # https://jcristharif.com/msgspec/structs.html#encoding-decoding-as-arrays + # as_array=True, ): ''' The "god" boxing msg type. @@ -90,6 +103,53 @@ class Msg( pld: PayloadT +class Aid( + Struct, + tag=True, + tag_field='msg_type', +): + ''' + Actor-identity msg. + + Initial contact exchange enabling an actor "mailbox handshake" + delivering the peer identity (and maybe eventually contact) + info. + + Used by discovery protocol to register actors as well as + conduct the initial comms (capability) filtering. + + ''' + name: str + uuid: str + # TODO: use built-in support for UUIDs? + # -[ ] `uuid.UUID` which has multi-protocol support + # https://jcristharif.com/msgspec/supported-types.html#uuid + + +class SpawnSpec( + pretty_struct.Struct, + tag=True, + tag_field='msg_type', +): + ''' + Initial runtime spec handed down from a spawning parent to its + child subactor immediately following first contact via an + `Aid` msg. + + ''' + _parent_main_data: dict + _runtime_vars: dict[str, Any] + + # module import capability + enable_modules: dict[str, str] + + # TODO: not just sockaddr pairs? + # -[ ] abstract into a `TransportAddr` type? + reg_addrs: list[tuple[str, int]] + bind_addrs: list[tuple[str, int]] + + + # TODO: caps based RPC support in the payload? # # -[ ] integration with our ``enable_modules: list[str]`` caps sys. @@ -105,18 +165,31 @@ class Msg( # # -[ ] can we combine .ns + .func into a native `NamespacePath` field? # -# -[ ]better name, like `Call/TaskInput`? +# -[ ] better name, like `Call/TaskInput`? # -class FuncSpec(Struct): - ns: str - func: str - - kwargs: dict - uid: str # (calling) actor-id +# -[ ] XXX a debugger lock msg transaction with payloads like, +# child -> `.pld: DebugLock` -> root +# child <- `.pld: DebugLocked` <- root +# child -> `.pld: DebugRelease` -> root +# +# WHY => when a pld spec is provided it might not allow for +# debug mode msgs as they currently are (using plain old `pld. +# str` payloads) so we only when debug_mode=True we need to +# union in this debugger payload set? +# +# mk_msg_spec( +# MyPldSpec, +# debug_mode=True, +# ) -> ( +# Union[MyPldSpec] +# | Union[DebugLock, DebugLocked, DebugRelease] +# ) class Start( - Msg, + Struct, + tag=True, + tag_field='msg_type', ): ''' Initial request to remotely schedule an RPC `trio.Task` via @@ -134,14 +207,26 @@ class Start( - `Context.open_context()` ''' - pld: FuncSpec + cid: str + + ns: str + func: str + + kwargs: dict + uid: tuple[str, str] # (calling) actor-id -class IpcCtxSpec(Struct): +class StartAck( + Struct, + tag=True, + tag_field='msg_type', +): ''' - An inter-actor-`trio.Task`-comms `Context` spec. + Init response to a `Cmd` request indicating the far + end's RPC spec, namely its callable "type". ''' + cid: str # TODO: maybe better names for all these? # -[ ] obvi ^ would need sync with `._rpc` functype: Literal[ @@ -160,18 +245,6 @@ class IpcCtxSpec(Struct): # msgspec: MsgSpec -class StartAck( - Msg, - Generic[PayloadT], -): - ''' - Init response to a `Cmd` request indicating the far - end's RPC callable "type". - - ''' - pld: IpcCtxSpec - - class Started( Msg, Generic[PayloadT], @@ -202,13 +275,19 @@ class Yield( pld: PayloadT -class Stop(Msg): +class Stop( + Struct, + tag=True, + tag_field='msg_type', +): ''' Stream termination signal much like an IPC version of `StopAsyncIteration`. ''' - pld: UnsetType = UNSET + cid: str + # TODO: do we want to support a payload on stop? + # pld: UnsetType = UNSET class Return( @@ -223,32 +302,33 @@ class Return( pld: PayloadT -class ErrorData(Struct): +class Error( + Struct, + tag=True, + tag_field='msg_type', +): ''' - Remote actor error meta-data as needed originally by + A pkt that wraps `RemoteActorError`s for relay and raising. + + Fields are 1-to-1 meta-data as needed originally by `RemoteActorError.msgdata: dict`. ''' - src_uid: str + src_uid: tuple[str, str] src_type_str: str boxed_type_str: str - - relay_path: list[str] + relay_path: list[tuple[str, str]] tb_str: str + cid: str|None = None + + # TODO: use UNSET or don't include them via + # # `ContextCancelled` - canceller: str|None = None + canceller: tuple[str, str]|None = None # `StreamOverrun` - sender: str|None = None - - -class Error(Msg): - ''' - A pkt that wraps `RemoteActorError`s for relay. - - ''' - pld: ErrorData + sender: tuple[str, str]|None = None # TODO: should be make a msg version of `ContextCancelled?` @@ -265,6 +345,12 @@ class Error(Msg): # approx order of the IPC txn-state spaces. __spec__: list[Msg] = [ + # identity handshake + Aid, + + # spawn specification from parent + SpawnSpec, + # inter-actor RPC initiation Start, StartAck, @@ -280,6 +366,8 @@ __spec__: list[Msg] = [ ] _runtime_spec_msgs: list[Msg] = [ + Aid, + SpawnSpec, Start, StartAck, Stop, @@ -443,3 +531,99 @@ def mk_msg_spec( pld_spec | runtime_spec, msgtypes_table[spec_build_method] + ipc_msg_types, ) + + +# TODO: make something similar to this inside `._codec` such that +# user can just pass a type table of some sort? +# def mk_dict_msg_codec_hooks() -> tuple[Callable, Callable]: +# ''' +# Deliver a `enc_hook()`/`dec_hook()` pair which does +# manual convertion from our above native `Msg` set +# to `dict` equivalent (wire msgs) in order to keep legacy compat +# with the original runtime implementation. + +# Note: this is is/was primarly used while moving the core +# runtime over to using native `Msg`-struct types wherein we +# start with the send side emitting without loading +# a typed-decoder and then later flipping the switch over to +# load to the native struct types once all runtime usage has +# been adjusted appropriately. + +# ''' +# def enc_to_dict(msg: Any) -> Any: +# ''' +# Encode `Msg`-structs to `dict` msgs instead +# of using `msgspec.msgpack.Decoder.type`-ed +# features. + +# ''' +# match msg: +# case Start(): +# dctmsg: dict = pretty_struct.Struct.to_dict( +# msg +# )['pld'] + +# case Error(): +# dctmsg: dict = pretty_struct.Struct.to_dict( +# msg +# )['pld'] +# return {'error': dctmsg} + + +# def dec_from_dict( +# type: Type, +# obj: Any, +# ) -> Any: +# ''' +# Decode to `Msg`-structs from `dict` msgs instead +# of using `msgspec.msgpack.Decoder.type`-ed +# features. + +# ''' +# cid: str = obj.get('cid') +# match obj: +# case {'cmd': pld}: +# return Start( +# cid=cid, +# pld=pld, +# ) +# case {'functype': pld}: +# return StartAck( +# cid=cid, +# functype=pld, +# # pld=IpcCtxSpec( +# # functype=pld, +# # ), +# ) +# case {'started': pld}: +# return Started( +# cid=cid, +# pld=pld, +# ) +# case {'yield': pld}: +# return Yield( +# cid=obj['cid'], +# pld=pld, +# ) +# case {'stop': pld}: +# return Stop( +# cid=cid, +# ) +# case {'return': pld}: +# return Return( +# cid=cid, +# pld=pld, +# ) + +# case {'error': pld}: +# return Error( +# cid=cid, +# pld=ErrorData( +# **pld +# ), +# ) + +# return ( +# # enc_to_dict, +# dec_from_dict, +# ) -- 2.34.1 From bc87c51ff105c2d54a5933cfe60a00fa9b2149a7 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Apr 2024 11:14:43 -0400 Subject: [PATCH 014/190] Get `test_codec_hooks_mod` working with `Msg`s Though the runtime hasn't been changed over in this patch (it was in the local index at the time however), the test does now demonstrate that using a `Started` the correctly typed `.pld` will codec correctly when passed manually to `MsgCodec.encode/decode()`. Despite not having the runtime ported to the new shuttle msg set (meaning the mentioned test will fail without the runtime port patch), I was able to get this first original test working that limits payload packets as a `Msg.pld: NamespacePath`this as long as we spec `enc/dec_hook()`s then the `Msg.pld` will be processed correctly as per: https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types in both the `Any` and `NamespacePath|None` spec cases. ^- turns out in this case -^ that the codec hooks only get invoked on the unknown-fields NOT the entire `Struct`-msg. A further gotcha was merging a `|None` into the `pld_spec` since this test spawns a subactor and opens a context via `send_back_nsp()` and that func has no explicit `return` - so of course it delivers a `Return(pld=None)` which will fail if we only spec `NamespacePath`. --- tests/test_caps_based_msging.py | 305 ++++++++++++++++++++++++-------- 1 file changed, 236 insertions(+), 69 deletions(-) diff --git a/tests/test_caps_based_msging.py b/tests/test_caps_based_msging.py index abdda0a5..b42d9e35 100644 --- a/tests/test_caps_based_msging.py +++ b/tests/test_caps_based_msging.py @@ -7,7 +7,6 @@ B~) ''' from typing import ( Any, - _GenericAlias, Type, Union, ) @@ -26,20 +25,23 @@ from msgspec import ( import pytest import tractor from tractor.msg import ( - _def_msgspec_codec, + _codec, _ctxvar_MsgCodec, NamespacePath, MsgCodec, mk_codec, apply_codec, - current_msgspec_codec, + current_codec, ) -from tractor.msg import types +from tractor.msg import ( + types, +) +from tractor import _state from tractor.msg.types import ( # PayloadT, Msg, - # Started, + Started, mk_msg_spec, ) import trio @@ -60,56 +62,110 @@ def test_msg_spec_xor_pld_spec(): ) -# TODO: wrap these into `._codec` such that user can just pass -# a type table of some sort? -def enc_hook(obj: Any) -> Any: - if isinstance(obj, NamespacePath): - return str(obj) - else: - raise NotImplementedError( - f'Objects of type {type(obj)} are not supported' - ) - - -def dec_hook(type: Type, obj: Any) -> Any: - print(f'type is: {type}') - if type is NamespacePath: - return NamespacePath(obj) - else: - raise NotImplementedError( - f'Objects of type {type(obj)} are not supported' - ) - - def ex_func(*args): print(f'ex_func({args})') def mk_custom_codec( - ipc_msg_spec: Type[Any] = Any, -) -> MsgCodec: - # apply custom hooks and set a `Decoder` which only - # loads `NamespacePath` types. - nsp_codec: MsgCodec = mk_codec( - ipc_msg_spec=ipc_msg_spec, - enc_hook=enc_hook, - dec_hook=dec_hook, - ) + pld_spec: Union[Type]|Any, - # TODO: validate `MsgCodec` interface/semantics? - # -[ ] simple field tests to ensure caching + reset is workin? - # -[ ] custom / changing `.decoder()` calls? - # - # dec = nsp_codec.decoder( - # types=NamespacePath, - # ) - # assert nsp_codec.dec is dec +) -> MsgCodec: + ''' + Create custom `msgpack` enc/dec-hooks and set a `Decoder` + which only loads `NamespacePath` types. + + ''' + uid: tuple[str, str] = tractor.current_actor().uid + + # XXX NOTE XXX: despite defining `NamespacePath` as a type + # field on our `Msg.pld`, we still need a enc/dec_hook() pair + # to cast to/from that type on the wire. See the docs: + # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types + + def enc_nsp(obj: Any) -> Any: + match obj: + case NamespacePath(): + print( + f'{uid}: `NamespacePath`-Only ENCODE?\n' + f'type: {type(obj)}\n' + f'obj: {obj}\n' + ) + + return str(obj) + + logmsg: str = ( + f'{uid}: Encoding `{obj}: <{type(obj)}>` not supported' + f'type: {type(obj)}\n' + f'obj: {obj}\n' + ) + print(logmsg) + raise NotImplementedError(logmsg) + + def dec_nsp( + type: Type, + obj: Any, + + ) -> Any: + print( + f'{uid}: CUSTOM DECODE\n' + f'input type: {type}\n' + f'obj: {obj}\n' + f'type(obj): `{type(obj).__class__}`\n' + ) + nsp = None + + # This never seems to hit? + if isinstance(obj, Msg): + print(f'Msg type: {obj}') + + if ( + type is NamespacePath + and isinstance(obj, str) + and ':' in obj + ): + nsp = NamespacePath(obj) + + if nsp: + print(f'Returning NSP instance: {nsp}') + return nsp + + logmsg: str = ( + f'{uid}: Decoding `{obj}: <{type(obj)}>` not supported' + f'input type: {type(obj)}\n' + f'obj: {obj}\n' + f'type(obj): `{type(obj).__class__}`\n' + ) + print(logmsg) + raise NotImplementedError(logmsg) + + + nsp_codec: MsgCodec = mk_codec( + ipc_pld_spec=pld_spec, + + # NOTE XXX: the encode hook MUST be used no matter what since + # our `NamespacePath` is not any of a `Any` native type nor + # a `msgspec.Struct` subtype - so `msgspec` has no way to know + # how to encode it unless we provide the custom hook. + # + # AGAIN that is, regardless of whether we spec an + # `Any`-decoded-pld the enc has no knowledge (by default) + # how to enc `NamespacePath` (nsp), so we add a custom + # hook to do that ALWAYS. + enc_hook=enc_nsp, + + # XXX NOTE: pretty sure this is mutex with the `type=` to + # `Decoder`? so it won't work in tandem with the + # `ipc_pld_spec` passed above? + dec_hook=dec_nsp, + ) return nsp_codec @tractor.context async def send_back_nsp( - ctx: tractor.Context, + ctx: Context, + expect_debug: bool, + use_any_spec: bool, ) -> None: ''' @@ -117,28 +173,65 @@ async def send_back_nsp( and ensure we can round trip a func ref with our parent. ''' - task: trio.Task = trio.lowlevel.current_task() - task_ctx: Context = task.context - assert _ctxvar_MsgCodec not in task_ctx + # debug mode sanity check + assert expect_debug == _state.debug_mode() - nsp_codec: MsgCodec = mk_custom_codec() + # task: trio.Task = trio.lowlevel.current_task() + + # TreeVar + # curr_codec = _ctxvar_MsgCodec.get_in(task) + + # ContextVar + # task_ctx: Context = task.context + # assert _ctxvar_MsgCodec not in task_ctx + + curr_codec = _ctxvar_MsgCodec.get() + assert curr_codec is _codec._def_tractor_codec + + if use_any_spec: + pld_spec = Any + else: + # NOTE: don't need the |None here since + # the parent side will never send `None` like + # we do here in the implicit return at the end of this + # `@context` body. + pld_spec = NamespacePath # |None + + nsp_codec: MsgCodec = mk_custom_codec( + pld_spec=pld_spec, + ) with apply_codec(nsp_codec) as codec: chk_codec_applied( custom_codec=nsp_codec, enter_value=codec, ) + # ensure roundtripping works locally nsp = NamespacePath.from_ref(ex_func) - await ctx.started(nsp) + wire_bytes: bytes = nsp_codec.encode( + Started( + cid=ctx.cid, + pld=nsp + ) + ) + msg: Started = nsp_codec.decode(wire_bytes) + pld = msg.pld + assert pld == nsp + await ctx.started(nsp) async with ctx.open_stream() as ipc: async for msg in ipc: - assert msg == f'{__name__}:ex_func' + if use_any_spec: + assert msg == f'{__name__}:ex_func' - # TODO: as per below - # assert isinstance(msg, NamespacePath) - assert isinstance(msg, str) + # TODO: as per below + # assert isinstance(msg, NamespacePath) + assert isinstance(msg, str) + else: + assert isinstance(msg, NamespacePath) + + await ipc.send(msg) def chk_codec_applied( @@ -146,11 +239,20 @@ def chk_codec_applied( enter_value: MsgCodec, ) -> MsgCodec: - task: trio.Task = trio.lowlevel.current_task() - task_ctx: Context = task.context + # task: trio.Task = trio.lowlevel.current_task() - assert _ctxvar_MsgCodec in task_ctx - curr_codec: MsgCodec = task.context[_ctxvar_MsgCodec] + # TreeVar + # curr_codec = _ctxvar_MsgCodec.get_in(task) + + # ContextVar + # task_ctx: Context = task.context + # assert _ctxvar_MsgCodec in task_ctx + # curr_codec: MsgCodec = task.context[_ctxvar_MsgCodec] + + # RunVar + curr_codec: MsgCodec = _ctxvar_MsgCodec.get() + last_read_codec = _ctxvar_MsgCodec.get() + assert curr_codec is last_read_codec assert ( # returned from `mk_codec()` @@ -163,14 +265,31 @@ def chk_codec_applied( curr_codec is # public API for all of the above - current_msgspec_codec() + current_codec() # the default `msgspec` settings - is not _def_msgspec_codec + is not _codec._def_msgspec_codec + is not _codec._def_tractor_codec ) -def test_codec_hooks_mod(): +@pytest.mark.parametrize( + 'ipc_pld_spec', + [ + # _codec._def_msgspec_codec, + Any, + # _codec._def_tractor_codec, + NamespacePath|None, + ], + ids=[ + 'any_type', + 'nsp_type', + ] +) +def test_codec_hooks_mod( + debug_mode: bool, + ipc_pld_spec: Union[Type]|Any, +): ''' Audit the `.msg.MsgCodec` override apis details given our impl uses `contextvars` to accomplish per `trio` task codec @@ -178,11 +297,21 @@ def test_codec_hooks_mod(): ''' async def main(): - task: trio.Task = trio.lowlevel.current_task() - task_ctx: Context = task.context - assert _ctxvar_MsgCodec not in task_ctx - async with tractor.open_nursery() as an: + # task: trio.Task = trio.lowlevel.current_task() + + # ContextVar + # task_ctx: Context = task.context + # assert _ctxvar_MsgCodec not in task_ctx + + # TreeVar + # def_codec: MsgCodec = _ctxvar_MsgCodec.get_in(task) + def_codec = _ctxvar_MsgCodec.get() + assert def_codec is _codec._def_tractor_codec + + async with tractor.open_nursery( + debug_mode=debug_mode, + ) as an: p: tractor.Portal = await an.start_actor( 'sub', enable_modules=[__name__], @@ -192,7 +321,9 @@ def test_codec_hooks_mod(): # - codec not modified -> decode nsp as `str` # - codec modified with hooks -> decode nsp as # `NamespacePath` - nsp_codec: MsgCodec = mk_custom_codec() + nsp_codec: MsgCodec = mk_custom_codec( + pld_spec=ipc_pld_spec, + ) with apply_codec(nsp_codec) as codec: chk_codec_applied( custom_codec=nsp_codec, @@ -202,9 +333,22 @@ def test_codec_hooks_mod(): async with ( p.open_context( send_back_nsp, + # TODO: send the original nsp here and + # test with `limit_msg_spec()` above? + expect_debug=debug_mode, + use_any_spec=(ipc_pld_spec==Any), + ) as (ctx, first), ctx.open_stream() as ipc, ): + if ipc_pld_spec is NamespacePath: + assert isinstance(first, NamespacePath) + + print( + 'root: ENTERING CONTEXT BLOCK\n' + f'type(first): {type(first)}\n' + f'first: {first}\n' + ) # ensure codec is still applied across # `tractor.Context` + its embedded nursery. chk_codec_applied( @@ -212,23 +356,46 @@ def test_codec_hooks_mod(): enter_value=codec, ) - assert first == f'{__name__}:ex_func' + first_nsp = NamespacePath(first) + + # ensure roundtripping works + wire_bytes: bytes = nsp_codec.encode( + Started( + cid=ctx.cid, + pld=first_nsp + ) + ) + msg: Started = nsp_codec.decode(wire_bytes) + pld = msg.pld + assert pld == first_nsp + + # try a manual decode of the started msg+pld + # TODO: actually get the decoder loading # to native once we spec our SCIPP msgspec # (structurred-conc-inter-proc-protocol) # implemented as per, # https://github.com/goodboy/tractor/issues/36 # - # assert isinstance(first, NamespacePath) - assert isinstance(first, str) + if ipc_pld_spec is NamespacePath: + assert isinstance(first, NamespacePath) + + # `Any`-payload-spec case + else: + assert isinstance(first, str) + assert first == f'{__name__}:ex_func' + await ipc.send(first) - with trio.move_on_after(1): + with trio.move_on_after(.6): async for msg in ipc: + print(msg) # TODO: as per above # assert isinstance(msg, NamespacePath) assert isinstance(msg, str) + await ipc.send(msg) + await trio.sleep(0.1) await p.cancel_actor() -- 2.34.1 From b56b3aa8907214a38d829f742038124bc766ee03 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Apr 2024 13:33:06 -0400 Subject: [PATCH 015/190] Add timeouts around some context test bodies Since with my in-index runtime-port to our native msg-spec it seems these ones are hanging B( - `test_one_end_stream_not_opened()` - `test_maybe_allow_overruns_stream()` Tossing in some `trio.fail_after()`s seems to at least gnab them as failures B) --- tests/test_context_stream_semantics.py | 134 ++++++++++++++----------- 1 file changed, 74 insertions(+), 60 deletions(-) diff --git a/tests/test_context_stream_semantics.py b/tests/test_context_stream_semantics.py index 121abaa8..36a5fd9f 100644 --- a/tests/test_context_stream_semantics.py +++ b/tests/test_context_stream_semantics.py @@ -6,6 +6,7 @@ sync-opening a ``tractor.Context`` beforehand. ''' from itertools import count +import math import platform from pprint import pformat from typing import ( @@ -845,7 +846,10 @@ async def keep_sending_from_callee( ('caller', 1, never_open_stream), ('callee', 0, keep_sending_from_callee), ], - ids='overrun_condition={}'.format, + ids=[ + ('caller_1buf_never_open_stream'), + ('callee_0buf_keep_sending_from_callee'), + ] ) def test_one_end_stream_not_opened( overrun_by: tuple[str, int, Callable], @@ -869,29 +873,30 @@ def test_one_end_stream_not_opened( enable_modules=[__name__], ) - async with portal.open_context( - entrypoint, - ) as (ctx, sent): - assert sent is None + with trio.fail_after(0.8): + async with portal.open_context( + entrypoint, + ) as (ctx, sent): + assert sent is None - if 'caller' in overrunner: + if 'caller' in overrunner: - async with ctx.open_stream() as stream: + async with ctx.open_stream() as stream: - # itersend +1 msg more then the buffer size - # to cause the most basic overrun. - for i in range(buf_size): - print(f'sending {i}') - await stream.send(i) + # itersend +1 msg more then the buffer size + # to cause the most basic overrun. + for i in range(buf_size): + print(f'sending {i}') + await stream.send(i) - else: - # expect overrun error to be relayed back - # and this sleep interrupted - await trio.sleep_forever() + else: + # expect overrun error to be relayed back + # and this sleep interrupted + await trio.sleep_forever() - else: - # callee overruns caller case so we do nothing here - await trio.sleep_forever() + else: + # callee overruns caller case so we do nothing here + await trio.sleep_forever() await portal.cancel_actor() @@ -1055,54 +1060,63 @@ def test_maybe_allow_overruns_stream( loglevel=loglevel, debug_mode=debug_mode, ) - seq = list(range(10)) - async with portal.open_context( - echo_back_sequence, - seq=seq, - wait_for_cancel=cancel_ctx, - be_slow=(slow_side == 'child'), - allow_overruns_side=allow_overruns_side, - ) as (ctx, sent): - assert sent is None + # stream-sequence batch info with send delay to determine + # approx timeout determining whether test has hung. + total_batches: int = 2 + num_items: int = 10 + seq = list(range(num_items)) + parent_send_delay: float = 0.16 + timeout: float = math.ceil( + total_batches * num_items * parent_send_delay + ) + with trio.fail_after(timeout): + async with portal.open_context( + echo_back_sequence, + seq=seq, + wait_for_cancel=cancel_ctx, + be_slow=(slow_side == 'child'), + allow_overruns_side=allow_overruns_side, - async with ctx.open_stream( - msg_buffer_size=1 if slow_side == 'parent' else None, - allow_overruns=(allow_overruns_side in {'parent', 'both'}), - ) as stream: + ) as (ctx, sent): + assert sent is None - total_batches: int = 2 - for _ in range(total_batches): - for msg in seq: - # print(f'root tx {msg}') - await stream.send(msg) - if slow_side == 'parent': - # NOTE: we make the parent slightly - # slower, when it is slow, to make sure - # that in the overruns everywhere case - await trio.sleep(0.16) + async with ctx.open_stream( + msg_buffer_size=1 if slow_side == 'parent' else None, + allow_overruns=(allow_overruns_side in {'parent', 'both'}), + ) as stream: - batch = [] - async for msg in stream: - print(f'root rx {msg}') - batch.append(msg) - if batch == seq: - break + for _ in range(total_batches): + for msg in seq: + # print(f'root tx {msg}') + await stream.send(msg) + if slow_side == 'parent': + # NOTE: we make the parent slightly + # slower, when it is slow, to make sure + # that in the overruns everywhere case + await trio.sleep(parent_send_delay) + + batch = [] + async for msg in stream: + print(f'root rx {msg}') + batch.append(msg) + if batch == seq: + break + + if cancel_ctx: + # cancel the remote task + print('Requesting `ctx.cancel()` in parent!') + await ctx.cancel() + + res: str|ContextCancelled = await ctx.result() if cancel_ctx: - # cancel the remote task - print('Requesting `ctx.cancel()` in parent!') - await ctx.cancel() + assert isinstance(res, ContextCancelled) + assert tuple(res.canceller) == current_actor().uid - res: str|ContextCancelled = await ctx.result() - - if cancel_ctx: - assert isinstance(res, ContextCancelled) - assert tuple(res.canceller) == current_actor().uid - - else: - print(f'RX ROOT SIDE RESULT {res}') - assert res == 'yo' + else: + print(f'RX ROOT SIDE RESULT {res}') + assert res == 'yo' # cancel the daemon await portal.cancel_actor() -- 2.34.1 From 09eed9d7e1027c03cb743012434290fa9618a234 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Apr 2024 13:41:52 -0400 Subject: [PATCH 016/190] WIP porting runtime to use `Msg`-spec --- tractor/_context.py | 245 +++++++++------- tractor/_entry.py | 1 + tractor/_exceptions.py | 94 ++++-- tractor/_ipc.py | 99 ++++++- tractor/_portal.py | 19 +- tractor/_rpc.py | 644 ++++++++++++++++++++++++----------------- tractor/_runtime.py | 174 +++++++---- tractor/_spawn.py | 30 +- tractor/_streaming.py | 46 ++- tractor/devx/_debug.py | 3 + 10 files changed, 878 insertions(+), 477 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 51b23302..02dcac39 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -53,7 +53,14 @@ from ._exceptions import ( _raise_from_no_key_in_msg, ) from .log import get_logger -from .msg import NamespacePath +from .msg import ( + NamespacePath, + Msg, + Return, + Started, + Stop, + Yield, +) from ._ipc import Channel from ._streaming import MsgStream from ._state import ( @@ -96,7 +103,8 @@ async def _drain_to_final_msg( # wait for a final context result by collecting (but # basically ignoring) any bi-dir-stream msgs still in transit # from the far end. - pre_result_drained: list[dict] = [] + # pre_result_drained: list[dict] = [] + pre_result_drained: list[Msg] = [] while not ( ctx.maybe_error and not ctx._final_result_is_set() @@ -155,7 +163,10 @@ async def _drain_to_final_msg( # await pause() # pray to the `trio` gawds that we're corrent with this - msg: dict = await ctx._recv_chan.receive() + # msg: dict = await ctx._recv_chan.receive() + msg: Msg = await ctx._recv_chan.receive() + # always capture unexpected/non-result msgs + pre_result_drained.append(msg) # NOTE: we get here if the far end was # `ContextCancelled` in 2 cases: @@ -175,24 +186,31 @@ async def _drain_to_final_msg( # continue to bubble up as normal. raise - try: - ctx._result: Any = msg['return'] - log.runtime( - 'Context delivered final draining msg:\n' - f'{pformat(msg)}' - ) - # XXX: only close the rx mem chan AFTER - # a final result is retreived. - # if ctx._recv_chan: - # await ctx._recv_chan.aclose() - # TODO: ^ we don't need it right? - break + match msg: + case Return( + cid=cid, + pld=res, + ): + # try: + # ctx._result: Any = msg['return'] + # ctx._result: Any = msg.pld + ctx._result: Any = res + log.runtime( + 'Context delivered final draining msg:\n' + f'{pformat(msg)}' + ) + # XXX: only close the rx mem chan AFTER + # a final result is retreived. + # if ctx._recv_chan: + # await ctx._recv_chan.aclose() + # TODO: ^ we don't need it right? + break - except KeyError: - # always capture unexpected/non-result msgs - pre_result_drained.append(msg) + # except KeyError: + # except AttributeError: + case Yield(): + # if 'yield' in msg: - if 'yield' in msg: # far end task is still streaming to us so discard # and report per local context state. if ( @@ -238,9 +256,10 @@ async def _drain_to_final_msg( # TODO: work out edge cases here where # a stream is open but the task also calls # this? - # -[ ] should be a runtime error if a stream is open - # right? - elif 'stop' in msg: + # -[ ] should be a runtime error if a stream is open right? + # Stop() + case Stop(): + # elif 'stop' in msg: log.cancel( 'Remote stream terminated due to "stop" msg:\n\n' f'{pformat(msg)}\n' @@ -249,78 +268,80 @@ async def _drain_to_final_msg( # It's an internal error if any other msg type without # a`'cid'` field arrives here! - if not msg.get('cid'): - raise InternalError( - 'Unexpected cid-missing msg?\n\n' - f'{msg}\n' - ) + case _: + # if not msg.get('cid'): + if not msg.cid: + raise InternalError( + 'Unexpected cid-missing msg?\n\n' + f'{msg}\n' + ) - # XXX fallthrough to handle expected error XXX - # TODO: replace this with `ctx.maybe_raise()` - # - # TODO: would this be handier for this case maybe? - # async with maybe_raise_on_exit() as raises: - # if raises: - # log.error('some msg about raising..') + # XXX fallthrough to handle expected error XXX + # TODO: replace this with `ctx.maybe_raise()` + # + # TODO: would this be handier for this case maybe? + # async with maybe_raise_on_exit() as raises: + # if raises: + # log.error('some msg about raising..') - re: Exception|None = ctx._remote_error - if re: - log.critical( - 'Remote ctx terminated due to "error" msg:\n' - f'{re}' - ) - assert msg is ctx._cancel_msg - # NOTE: this solved a super dupe edge case XD - # this was THE super duper edge case of: - # - local task opens a remote task, - # - requests remote cancellation of far end - # ctx/tasks, - # - needs to wait for the cancel ack msg - # (ctxc) or some result in the race case - # where the other side's task returns - # before the cancel request msg is ever - # rxed and processed, - # - here this surrounding drain loop (which - # iterates all ipc msgs until the ack or - # an early result arrives) was NOT exiting - # since we are the edge case: local task - # does not re-raise any ctxc it receives - # IFF **it** was the cancellation - # requester.. - # will raise if necessary, ow break from - # loop presuming any error terminates the - # context! - ctx._maybe_raise_remote_err( - re, - # NOTE: obvi we don't care if we - # overran the far end if we're already - # waiting on a final result (msg). - # raise_overrun_from_self=False, - raise_overrun_from_self=raise_overrun, - ) + re: Exception|None = ctx._remote_error + if re: + log.critical( + 'Remote ctx terminated due to "error" msg:\n' + f'{re}' + ) + assert msg is ctx._cancel_msg + # NOTE: this solved a super dupe edge case XD + # this was THE super duper edge case of: + # - local task opens a remote task, + # - requests remote cancellation of far end + # ctx/tasks, + # - needs to wait for the cancel ack msg + # (ctxc) or some result in the race case + # where the other side's task returns + # before the cancel request msg is ever + # rxed and processed, + # - here this surrounding drain loop (which + # iterates all ipc msgs until the ack or + # an early result arrives) was NOT exiting + # since we are the edge case: local task + # does not re-raise any ctxc it receives + # IFF **it** was the cancellation + # requester.. + # will raise if necessary, ow break from + # loop presuming any error terminates the + # context! + ctx._maybe_raise_remote_err( + re, + # NOTE: obvi we don't care if we + # overran the far end if we're already + # waiting on a final result (msg). + # raise_overrun_from_self=False, + raise_overrun_from_self=raise_overrun, + ) - break # OOOOOF, yeah obvi we need this.. + break # OOOOOF, yeah obvi we need this.. - # XXX we should never really get here - # right! since `._deliver_msg()` should - # always have detected an {'error': ..} - # msg and already called this right!?! - elif error := unpack_error( - msg=msg, - chan=ctx._portal.channel, - hide_tb=False, - ): - log.critical('SHOULD NEVER GET HERE!?') - assert msg is ctx._cancel_msg - assert error.msgdata == ctx._remote_error.msgdata - from .devx._debug import pause - await pause() - ctx._maybe_cancel_and_set_remote_error(error) - ctx._maybe_raise_remote_err(error) + # XXX we should never really get here + # right! since `._deliver_msg()` should + # always have detected an {'error': ..} + # msg and already called this right!?! + elif error := unpack_error( + msg=msg, + chan=ctx._portal.channel, + hide_tb=False, + ): + log.critical('SHOULD NEVER GET HERE!?') + assert msg is ctx._cancel_msg + assert error.msgdata == ctx._remote_error.msgdata + from .devx._debug import pause + await pause() + ctx._maybe_cancel_and_set_remote_error(error) + ctx._maybe_raise_remote_err(error) - else: - # bubble the original src key error - raise + else: + # bubble the original src key error + raise else: log.cancel( 'Skipping `MsgStream` drain since final outcome is set\n\n' @@ -710,10 +731,14 @@ class Context: async def send_stop(self) -> None: # await pause() - await self.chan.send({ - 'stop': True, - 'cid': self.cid - }) + # await self.chan.send({ + # # Stop( + # 'stop': True, + # 'cid': self.cid + # }) + await self.chan.send( + Stop(cid=self.cid) + ) def _maybe_cancel_and_set_remote_error( self, @@ -1398,17 +1423,19 @@ class Context: for msg in drained_msgs: # TODO: mask this by default.. - if 'return' in msg: + # if 'return' in msg: + if isinstance(msg, Return): # from .devx import pause # await pause() - raise InternalError( + # raise InternalError( + log.warning( 'Final `return` msg should never be drained !?!?\n\n' f'{msg}\n' ) log.cancel( 'Ctx drained pre-result msgs:\n' - f'{drained_msgs}' + f'{pformat(drained_msgs)}' ) self.maybe_raise( @@ -1616,7 +1643,18 @@ class Context: f'called `.started()` twice on context with {self.chan.uid}' ) - await self.chan.send({'started': value, 'cid': self.cid}) + # await self.chan.send( + # { + # 'started': value, + # 'cid': self.cid, + # } + # ) + await self.chan.send( + Started( + cid=self.cid, + pld=value, + ) + ) self._started_called = True async def _drain_overflows( @@ -1671,7 +1709,8 @@ class Context: async def _deliver_msg( self, - msg: dict, + # msg: dict, + msg: Msg, ) -> bool: ''' @@ -1855,7 +1894,7 @@ class Context: # anything different. return False else: - txt += f'\n{msg}\n' + # txt += f'\n{msg}\n' # raise local overrun and immediately pack as IPC # msg for far end. try: @@ -1986,15 +2025,17 @@ async def open_context_from_portal( ) assert ctx._remote_func_type == 'context' - msg: dict = await ctx._recv_chan.receive() + msg: Started = await ctx._recv_chan.receive() try: # the "first" value here is delivered by the callee's # ``Context.started()`` call. - first: Any = msg['started'] + # first: Any = msg['started'] + first: Any = msg.pld ctx._started_called: bool = True - except KeyError as src_error: + # except KeyError as src_error: + except AttributeError as src_error: _raise_from_no_key_in_msg( ctx=ctx, msg=msg, diff --git a/tractor/_entry.py b/tractor/_entry.py index 21c9ae48..bf719abb 100644 --- a/tractor/_entry.py +++ b/tractor/_entry.py @@ -136,6 +136,7 @@ def _trio_main( run_as_asyncio_guest(trio_main) else: trio.run(trio_main) + except KeyboardInterrupt: log.cancel( 'Actor received KBI\n' diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index b1a8ee63..7deda9d2 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -31,9 +31,16 @@ import textwrap import traceback import trio +from msgspec import structs from tractor._state import current_actor from tractor.log import get_logger +from tractor.msg import ( + Error, + Msg, + Stop, + Yield, +) if TYPE_CHECKING: from ._context import Context @@ -135,6 +142,8 @@ class RemoteActorError(Exception): # and instead render if from `.boxed_type_str`? self._boxed_type: BaseException = boxed_type self._src_type: BaseException|None = None + + # TODO: make this a `.errmsg: Error` throughout? self.msgdata: dict[str, Any] = msgdata # TODO: mask out eventually or place in `pack_error()` @@ -464,7 +473,23 @@ class AsyncioCancelled(Exception): ''' class MessagingError(Exception): - 'Some kind of unexpected SC messaging dialog issue' + ''' + IPC related msg (typing), transaction (ordering) or dialog + handling error. + + ''' + + +class MsgTypeError(MessagingError): + ''' + Equivalent of a `TypeError` for an IPC wire-message + due to an invalid field value (type). + + Normally this is re-raised from some `.msg._codec` + decode error raised by a backend interchange lib + like `msgspec` or `pycapnproto`. + + ''' def pack_error( @@ -473,7 +498,7 @@ def pack_error( tb: str|None = None, cid: str|None = None, -) -> dict[str, dict]: +) -> Error|dict[str, dict]: ''' Create an "error message" which boxes a locally caught exception's meta-data and encodes it for wire transport via an @@ -536,17 +561,23 @@ def pack_error( # content's `.msgdata`). error_msg['tb_str'] = tb_str - pkt: dict = { - 'error': error_msg, - } - if cid: - pkt['cid'] = cid + # Error() + # pkt: dict = { + # 'error': error_msg, + # } + pkt: Error = Error( + cid=cid, + **error_msg, + # TODO: just get rid of `.pld` on this msg? + ) + # if cid: + # pkt['cid'] = cid return pkt def unpack_error( - msg: dict[str, Any], + msg: dict[str, Any]|Error, chan: Channel|None = None, box_type: RemoteActorError = RemoteActorError, @@ -564,15 +595,17 @@ def unpack_error( ''' __tracebackhide__: bool = hide_tb - error_dict: dict[str, dict] | None - if ( - error_dict := msg.get('error') - ) is None: + error_dict: dict[str, dict]|None + if not isinstance(msg, Error): + # if ( + # error_dict := msg.get('error') + # ) is None: # no error field, nothing to unpack. return None # retrieve the remote error's msg encoded details - tb_str: str = error_dict.get('tb_str', '') + # tb_str: str = error_dict.get('tb_str', '') + tb_str: str = msg.tb_str message: str = ( f'{chan.uid}\n' + @@ -581,7 +614,8 @@ def unpack_error( # try to lookup a suitable error type from the local runtime # env then use it to construct a local instance. - boxed_type_str: str = error_dict['boxed_type_str'] + # boxed_type_str: str = error_dict['boxed_type_str'] + boxed_type_str: str = msg.boxed_type_str boxed_type: Type[BaseException] = get_err_type(boxed_type_str) if boxed_type_str == 'ContextCancelled': @@ -595,7 +629,11 @@ def unpack_error( # original source error. elif boxed_type_str == 'RemoteActorError': assert boxed_type is RemoteActorError - assert len(error_dict['relay_path']) >= 1 + # assert len(error_dict['relay_path']) >= 1 + assert len(msg.relay_path) >= 1 + + # TODO: mk RAE just take the `Error` instance directly? + error_dict: dict = structs.asdict(msg) exc = box_type( message, @@ -623,11 +661,12 @@ def is_multi_cancelled(exc: BaseException) -> bool: def _raise_from_no_key_in_msg( ctx: Context, - msg: dict, + msg: Msg, src_err: KeyError, log: StackLevelAdapter, # caller specific `log` obj expect_key: str = 'yield', + expect_msg: str = Yield, stream: MsgStream | None = None, # allow "deeper" tbs when debugging B^o @@ -660,8 +699,10 @@ def _raise_from_no_key_in_msg( # an internal error should never get here try: - cid: str = msg['cid'] - except KeyError as src_err: + cid: str = msg.cid + # cid: str = msg['cid'] + # except KeyError as src_err: + except AttributeError as src_err: raise MessagingError( f'IPC `Context` rx-ed msg without a ctx-id (cid)!?\n' f'cid: {cid}\n\n' @@ -672,7 +713,10 @@ def _raise_from_no_key_in_msg( # TODO: test that shows stream raising an expected error!!! # raise the error message in a boxed exception type! - if msg.get('error'): + # if msg.get('error'): + if isinstance(msg, Error): + # match msg: + # case Error(): raise unpack_error( msg, ctx.chan, @@ -683,8 +727,10 @@ def _raise_from_no_key_in_msg( # `MsgStream` termination msg. # TODO: does it make more sense to pack # the stream._eoc outside this in the calleer always? + # case Stop(): elif ( - msg.get('stop') + # msg.get('stop') + isinstance(msg, Stop) or ( stream and stream._eoc @@ -725,14 +771,16 @@ def _raise_from_no_key_in_msg( stream and stream._closed ): - raise trio.ClosedResourceError('This stream was closed') - + # TODO: our own error subtype? + raise trio.ClosedResourceError( + 'This stream was closed' + ) # always re-raise the source error if no translation error case # is activated above. _type: str = 'Stream' if stream else 'Context' raise MessagingError( - f"{_type} was expecting a '{expect_key}' message" + f"{_type} was expecting a '{expect_key.upper()}' message" " BUT received a non-error msg:\n" f'{pformat(msg)}' ) from src_err diff --git a/tractor/_ipc.py b/tractor/_ipc.py index 5f71c38c..6168c77c 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -38,17 +38,23 @@ from typing import ( Protocol, Type, TypeVar, + Union, ) +import msgspec from tricycle import BufferedReceiveStream import trio from tractor.log import get_logger -from tractor._exceptions import TransportClosed +from tractor._exceptions import ( + TransportClosed, + MsgTypeError, +) from tractor.msg import ( _ctxvar_MsgCodec, + _codec, MsgCodec, - mk_codec, + types, ) log = get_logger(__name__) @@ -163,7 +169,16 @@ class MsgpackTCPStream(MsgTransport): # allow for custom IPC msg interchange format # dynamic override Bo - self.codec: MsgCodec = codec or mk_codec() + self._task = trio.lowlevel.current_task() + self._codec: MsgCodec = ( + codec + or + _codec._ctxvar_MsgCodec.get() + ) + log.critical( + '!?!: USING STD `tractor` CODEC !?!?\n' + f'{self._codec}\n' + ) async def _iter_packets(self) -> AsyncGenerator[dict, None]: ''' @@ -171,7 +186,6 @@ class MsgpackTCPStream(MsgTransport): stream using the current task's `MsgCodec`. ''' - import msgspec # noqa decodes_failed: int = 0 while True: @@ -206,7 +220,19 @@ class MsgpackTCPStream(MsgTransport): try: # NOTE: lookup the `trio.Task.context`'s var for # the current `MsgCodec`. - yield _ctxvar_MsgCodec.get().decode(msg_bytes) + codec: MsgCodec = _ctxvar_MsgCodec.get() + if self._codec.pld_spec != codec.pld_spec: + # assert ( + # task := trio.lowlevel.current_task() + # ) is not self._task + # self._task = task + self._codec = codec + log.critical( + '.recv() USING NEW CODEC !?!?\n' + f'{self._codec}\n\n' + f'msg_bytes -> {msg_bytes}\n' + ) + yield codec.decode(msg_bytes) # TODO: remove, was only for orig draft impl # testing. @@ -221,6 +247,41 @@ class MsgpackTCPStream(MsgTransport): # # yield obj + # XXX NOTE: since the below error derives from + # `DecodeError` we need to catch is specially + # and always raise such that spec violations + # are never allowed to be caught silently! + except msgspec.ValidationError as verr: + + # decode the msg-bytes using the std msgpack + # interchange-prot (i.e. without any + # `msgspec.Struct` handling) so that we can + # determine what `.msg.types.Msg` is the culprit + # by reporting the received value. + msg_dict: dict = msgspec.msgpack.decode(msg_bytes) + msg_type_name: str = msg_dict['msg_type'] + msg_type = getattr(types, msg_type_name) + errmsg: str = ( + f'Received invalid IPC `{msg_type_name}` msg\n\n' + ) + + # XXX see if we can determine the exact invalid field + # such that we can comprehensively report the + # specific field's type problem + msgspec_msg: str = verr.args[0].rstrip('`') + msg, _, maybe_field = msgspec_msg.rpartition('$.') + if field_val := msg_dict.get(maybe_field): + field_type: Union[Type] = msg_type.__signature__.parameters[ + maybe_field + ].annotation + errmsg += ( + f'{msg.rstrip("`")}\n\n' + f'{msg_type}\n' + f' |_.{maybe_field}: {field_type} = {field_val}\n' + ) + + raise MsgTypeError(errmsg) from verr + except ( msgspec.DecodeError, UnicodeDecodeError, @@ -230,14 +291,15 @@ class MsgpackTCPStream(MsgTransport): # do with a channel drop - hope that receiving from the # channel will raise an expected error and bubble up. try: - msg_str: str | bytes = msg_bytes.decode() + msg_str: str|bytes = msg_bytes.decode() except UnicodeDecodeError: msg_str = msg_bytes - log.error( - '`msgspec` failed to decode!?\n' - 'dumping bytes:\n' - f'{msg_str!r}' + log.exception( + 'Failed to decode msg?\n' + f'{codec}\n\n' + 'Rxed bytes from wire:\n\n' + f'{msg_str!r}\n' ) decodes_failed += 1 else: @@ -258,8 +320,21 @@ class MsgpackTCPStream(MsgTransport): # NOTE: lookup the `trio.Task.context`'s var for # the current `MsgCodec`. - bytes_data: bytes = _ctxvar_MsgCodec.get().encode(msg) - # bytes_data: bytes = self.codec.encode(msg) + codec: MsgCodec = _ctxvar_MsgCodec.get() + # if self._codec != codec: + if self._codec.pld_spec != codec.pld_spec: + self._codec = codec + log.critical( + '.send() using NEW CODEC !?!?\n' + f'{self._codec}\n\n' + f'OBJ -> {msg}\n' + ) + if type(msg) not in types.__spec__: + log.warning( + 'Sending non-`Msg`-spec msg?\n\n' + f'{msg}\n' + ) + bytes_data: bytes = codec.encode(msg) # supposedly the fastest says, # https://stackoverflow.com/a/54027962 diff --git a/tractor/_portal.py b/tractor/_portal.py index ac602dd5..cc9052ba 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -45,7 +45,10 @@ from ._state import ( ) from ._ipc import Channel from .log import get_logger -from .msg import NamespacePath +from .msg import ( + NamespacePath, + Return, +) from ._exceptions import ( unpack_error, NoResult, @@ -66,7 +69,8 @@ log = get_logger(__name__) # `._raise_from_no_key_in_msg()` (after tweak to # accept a `chan: Channel` arg) in key block! def _unwrap_msg( - msg: dict[str, Any], + # msg: dict[str, Any], + msg: Return, channel: Channel, hide_tb: bool = True, @@ -79,18 +83,21 @@ def _unwrap_msg( __tracebackhide__: bool = hide_tb try: - return msg['return'] - except KeyError as ke: + return msg.pld + # return msg['return'] + # except KeyError as ke: + except AttributeError as err: # internal error should never get here - assert msg.get('cid'), ( + # assert msg.get('cid'), ( + assert msg.cid, ( "Received internal error at portal?" ) raise unpack_error( msg, channel - ) from ke + ) from err class Portal: diff --git a/tractor/_rpc.py b/tractor/_rpc.py index ef6cbe00..9b179524 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -57,6 +57,15 @@ from ._exceptions import ( from .devx import _debug from . import _state from .log import get_logger +from tractor.msg.types import ( + Start, + StartAck, + Started, + Stop, + Yield, + Return, + Error, +) if TYPE_CHECKING: from ._runtime import Actor @@ -84,10 +93,13 @@ async def _invoke_non_context( # TODO: can we unify this with the `context=True` impl below? if inspect.isasyncgen(coro): - await chan.send({ - 'cid': cid, - 'functype': 'asyncgen', - }) + # await chan.send({ + await chan.send( + StartAck( + cid=cid, + functype='asyncgen', + ) + ) # XXX: massive gotcha! If the containing scope # is cancelled and we execute the below line, # any ``ActorNursery.__aexit__()`` WON'T be @@ -107,27 +119,45 @@ async def _invoke_non_context( # to_send = await chan.recv_nowait() # if to_send is not None: # to_yield = await coro.asend(to_send) - await chan.send({ - 'yield': item, - 'cid': cid, - }) + # await chan.send({ + # # Yield() + # 'cid': cid, + # 'yield': item, + # }) + await chan.send( + Yield( + cid=cid, + pld=item, + ) + ) log.runtime(f"Finished iterating {coro}") # TODO: we should really support a proper # `StopAsyncIteration` system here for returning a final # value if desired - await chan.send({ - 'stop': True, - 'cid': cid, - }) + await chan.send( + Stop(cid=cid) + ) + # await chan.send({ + # # Stop( + # 'cid': cid, + # 'stop': True, + # }) # one way @stream func that gets treated like an async gen # TODO: can we unify this with the `context=True` impl below? elif treat_as_gen: - await chan.send({ - 'cid': cid, - 'functype': 'asyncgen', - }) + await chan.send( + StartAck( + cid=cid, + functype='asyncgen', + ) + ) + # await chan.send({ + # # StartAck() + # 'cid': cid, + # 'functype': 'asyncgen', + # }) # XXX: the async-func may spawn further tasks which push # back values like an async-generator would but must # manualy construct the response dict-packet-responses as @@ -140,10 +170,14 @@ async def _invoke_non_context( if not cs.cancelled_caught: # task was not cancelled so we can instruct the # far end async gen to tear down - await chan.send({ - 'stop': True, - 'cid': cid - }) + await chan.send( + Stop(cid=cid) + ) + # await chan.send({ + # # Stop( + # 'cid': cid, + # 'stop': True, + # }) else: # regular async function/method # XXX: possibly just a scheduled `Actor._cancel_task()` @@ -155,10 +189,17 @@ async def _invoke_non_context( # way: using the linked IPC context machinery. failed_resp: bool = False try: - await chan.send({ - 'functype': 'asyncfunc', - 'cid': cid - }) + await chan.send( + StartAck( + cid=cid, + functype='asyncfunc', + ) + ) + # await chan.send({ + # # StartAck() + # 'cid': cid, + # 'functype': 'asyncfunc', + # }) except ( trio.ClosedResourceError, trio.BrokenResourceError, @@ -192,10 +233,17 @@ async def _invoke_non_context( and chan.connected() ): try: - await chan.send({ - 'return': result, - 'cid': cid, - }) + # await chan.send({ + # # Return() + # 'cid': cid, + # 'return': result, + # }) + await chan.send( + Return( + cid=cid, + pld=result, + ) + ) except ( BrokenPipeError, trio.BrokenResourceError, @@ -376,6 +424,8 @@ async def _invoke( # XXX for .pause_from_sync()` usage we need to make sure # `greenback` is boostrapped in the subactor! await _debug.maybe_init_greenback() + # else: + # await pause() # TODO: possibly a specially formatted traceback # (not sure what typing is for this..)? @@ -488,10 +538,18 @@ async def _invoke( # a "context" endpoint type is the most general and # "least sugary" type of RPC ep with support for # bi-dir streaming B) - await chan.send({ - 'cid': cid, - 'functype': 'context', - }) + # StartAck + await chan.send( + StartAck( + cid=cid, + functype='context', + ) + ) + # await chan.send({ + # # StartAck() + # 'cid': cid, + # 'functype': 'context', + # }) # TODO: should we also use an `.open_context()` equiv # for this callee side by factoring the impl from @@ -515,10 +573,17 @@ async def _invoke( ctx._result = res # deliver final result to caller side. - await chan.send({ - 'return': res, - 'cid': cid - }) + await chan.send( + Return( + cid=cid, + pld=res, + ) + ) + # await chan.send({ + # # Return() + # 'cid': cid, + # 'return': res, + # }) # NOTE: this happens IFF `ctx._scope.cancel()` is # called by any of, @@ -691,7 +756,8 @@ async def try_ship_error_to_remote( try: # NOTE: normally only used for internal runtime errors # so ship to peer actor without a cid. - msg: dict = pack_error( + # msg: dict = pack_error( + msg: Error = pack_error( err, cid=cid, @@ -707,12 +773,13 @@ async def try_ship_error_to_remote( trio.BrokenResourceError, BrokenPipeError, ): - err_msg: dict = msg['error']['tb_str'] + # err_msg: dict = msg['error']['tb_str'] log.critical( 'IPC transport failure -> ' f'failed to ship error to {remote_descr}!\n\n' f'X=> {channel.uid}\n\n' - f'{err_msg}\n' + # f'{err_msg}\n' + f'{msg}\n' ) @@ -772,31 +839,6 @@ async def process_messages( with CancelScope(shield=shield) as loop_cs: task_status.started(loop_cs) async for msg in chan: - - # dedicated loop terminate sentinel - if msg is None: - - tasks: dict[ - tuple[Channel, str], - tuple[Context, Callable, trio.Event] - ] = actor._rpc_tasks.copy() - log.cancel( - f'Peer IPC channel terminated via `None` setinel msg?\n' - f'=> Cancelling all {len(tasks)} local RPC tasks..\n' - f'peer: {chan.uid}\n' - f'|_{chan}\n' - ) - for (channel, cid) in tasks: - if channel is chan: - await actor._cancel_task( - cid, - channel, - requesting_uid=channel.uid, - - ipc_msg=msg, - ) - break - log.transport( # type: ignore f'<= IPC msg from peer: {chan.uid}\n\n' @@ -806,216 +848,294 @@ async def process_messages( f'{pformat(msg)}\n' ) - cid = msg.get('cid') - if cid: - # deliver response to local caller/waiter - # via its per-remote-context memory channel. - await actor._push_result( - chan, - cid, - msg, - ) + match msg: - log.runtime( - 'Waiting on next IPC msg from\n' - f'peer: {chan.uid}:\n' - f'|_{chan}\n' + # if msg is None: + # dedicated loop terminate sentinel + case None: - # f'last msg: {msg}\n' - ) - continue - - # process a 'cmd' request-msg upack - # TODO: impl with native `msgspec.Struct` support !! - # -[ ] implement with ``match:`` syntax? - # -[ ] discard un-authed msgs as per, - # - try: - ( - ns, - funcname, - kwargs, - actorid, - cid, - ) = msg['cmd'] - - except KeyError: - # This is the non-rpc error case, that is, an - # error **not** raised inside a call to ``_invoke()`` - # (i.e. no cid was provided in the msg - see above). - # Push this error to all local channel consumers - # (normally portals) by marking the channel as errored - assert chan.uid - exc = unpack_error(msg, chan=chan) - chan._exc = exc - raise exc - - log.runtime( - 'Handling RPC cmd from\n' - f'peer: {actorid}\n' - '\n' - f'=> {ns}.{funcname}({kwargs})\n' - ) - if ns == 'self': - if funcname == 'cancel': - func: Callable = actor.cancel - kwargs |= { - 'req_chan': chan, - } - - # don't start entire actor runtime cancellation - # if this actor is currently in debug mode! - pdb_complete: trio.Event|None = _debug.Lock.local_pdb_complete - if pdb_complete: - await pdb_complete.wait() - - # Either of `Actor.cancel()`/`.cancel_soon()` - # was called, so terminate this IPC msg - # loop, exit back out into `async_main()`, - # and immediately start the core runtime - # machinery shutdown! - with CancelScope(shield=True): - await _invoke( - actor, - cid, - chan, - func, - kwargs, - is_rpc=False, - ) - - log.runtime( - 'Cancelling IPC transport msg-loop with peer:\n' + tasks: dict[ + tuple[Channel, str], + tuple[Context, Callable, trio.Event] + ] = actor._rpc_tasks.copy() + log.cancel( + f'Peer IPC channel terminated via `None` setinel msg?\n' + f'=> Cancelling all {len(tasks)} local RPC tasks..\n' + f'peer: {chan.uid}\n' f'|_{chan}\n' ) - loop_cs.cancel() + for (channel, cid) in tasks: + if channel is chan: + await actor._cancel_task( + cid, + channel, + requesting_uid=channel.uid, + + ipc_msg=msg, + ) break - if funcname == '_cancel_task': - func: Callable = actor._cancel_task - - # we immediately start the runtime machinery - # shutdown - # with CancelScope(shield=True): - target_cid: str = kwargs['cid'] - kwargs |= { - # NOTE: ONLY the rpc-task-owning - # parent IPC channel should be able to - # cancel it! - 'parent_chan': chan, - 'requesting_uid': chan.uid, - 'ipc_msg': msg, - } - # TODO: remove? already have emit in meth. - # log.runtime( - # f'Rx RPC task cancel request\n' - # f'<= canceller: {chan.uid}\n' - # f' |_{chan}\n\n' - # f'=> {actor}\n' - # f' |_cid: {target_cid}\n' - # ) - try: - await _invoke( - actor, - cid, - chan, - func, - kwargs, - is_rpc=False, - ) - except BaseException: - log.exception( - 'Failed to cancel task?\n' - f'<= canceller: {chan.uid}\n' - f' |_{chan}\n\n' - f'=> {actor}\n' - f' |_cid: {target_cid}\n' - ) - continue - else: - # normally registry methods, eg. - # ``.register_actor()`` etc. - func: Callable = getattr(actor, funcname) - - else: - # complain to client about restricted modules - try: - func = actor._get_rpc_func(ns, funcname) - except ( - ModuleNotExposed, - AttributeError, - ) as err: - err_msg: dict[str, dict] = pack_error( - err, - cid=cid, - ) - await chan.send(err_msg) - continue - - # schedule a task for the requested RPC function - # in the actor's main "service nursery". - # TODO: possibly a service-tn per IPC channel for - # supervision isolation? would avoid having to - # manage RPC tasks individually in `._rpc_tasks` - # table? - log.runtime( - f'Spawning task for RPC request\n' - f'<= caller: {chan.uid}\n' - f' |_{chan}\n\n' - # TODO: maddr style repr? - # f' |_@ /ipv4/{chan.raddr}/tcp/{chan.rport}/' - # f'cid="{cid[-16:]} .."\n\n' - - f'=> {actor}\n' - f' |_cid: {cid}\n' - f' |>> {func}()\n' - ) - assert actor._service_n # wait why? do it at top? - try: - ctx: Context = await actor._service_n.start( - partial( - _invoke, - actor, - cid, + # cid = msg.get('cid') + # if cid: + case ( + StartAck(cid=cid) + | Started(cid=cid) + | Yield(cid=cid) + | Stop(cid=cid) + | Return(cid=cid) + | Error(cid=cid) + ): + # deliver response to local caller/waiter + # via its per-remote-context memory channel. + await actor._push_result( chan, - func, - kwargs, - ), - name=funcname, - ) + cid, + msg, + ) - except ( - RuntimeError, - BaseExceptionGroup, - ): - # avoid reporting a benign race condition - # during actor runtime teardown. - nursery_cancelled_before_task: bool = True - break + log.runtime( + 'Waiting on next IPC msg from\n' + f'peer: {chan.uid}:\n' + f'|_{chan}\n' - # in the lone case where a ``Context`` is not - # delivered, it's likely going to be a locally - # scoped exception from ``_invoke()`` itself. - if isinstance(err := ctx, Exception): - log.warning( - 'Task for RPC failed?' - f'|_ {func}()\n\n' + # f'last msg: {msg}\n' + ) + continue - f'{err}' - ) - continue + # process a 'cmd' request-msg upack + # TODO: impl with native `msgspec.Struct` support !! + # -[ ] implement with ``match:`` syntax? + # -[ ] discard un-authed msgs as per, + # + case Start( + cid=cid, + ns=ns, + func=funcname, + kwargs=kwargs, + uid=actorid, + ): + # try: + # ( + # ns, + # funcname, + # kwargs, + # actorid, + # cid, + # ) = msg['cmd'] - else: - # mark that we have ongoing rpc tasks - actor._ongoing_rpc_tasks = trio.Event() + # # TODO: put in `case Error():` right? + # except KeyError: + # # This is the non-rpc error case, that is, an + # # error **not** raised inside a call to ``_invoke()`` + # # (i.e. no cid was provided in the msg - see above). + # # Push this error to all local channel consumers + # # (normally portals) by marking the channel as errored + # assert chan.uid + # exc = unpack_error(msg, chan=chan) + # chan._exc = exc + # raise exc - # store cancel scope such that the rpc task can be - # cancelled gracefully if requested - actor._rpc_tasks[(chan, cid)] = ( - ctx, - func, - trio.Event(), - ) + log.runtime( + 'Handling RPC `Start` request from\n' + f'peer: {actorid}\n' + '\n' + f'=> {ns}.{funcname}({kwargs})\n' + ) + # case Start( + # ns='self', + # funcname='cancel', + # ): + if ns == 'self': + if funcname == 'cancel': + func: Callable = actor.cancel + kwargs |= { + 'req_chan': chan, + } + + # don't start entire actor runtime cancellation + # if this actor is currently in debug mode! + pdb_complete: trio.Event|None = _debug.Lock.local_pdb_complete + if pdb_complete: + await pdb_complete.wait() + + # Either of `Actor.cancel()`/`.cancel_soon()` + # was called, so terminate this IPC msg + # loop, exit back out into `async_main()`, + # and immediately start the core runtime + # machinery shutdown! + with CancelScope(shield=True): + await _invoke( + actor, + cid, + chan, + func, + kwargs, + is_rpc=False, + ) + + log.runtime( + 'Cancelling IPC transport msg-loop with peer:\n' + f'|_{chan}\n' + ) + loop_cs.cancel() + break + + # case Start( + # ns='self', + # funcname='_cancel_task', + # ): + if funcname == '_cancel_task': + func: Callable = actor._cancel_task + + # we immediately start the runtime machinery + # shutdown + # with CancelScope(shield=True): + target_cid: str = kwargs['cid'] + kwargs |= { + # NOTE: ONLY the rpc-task-owning + # parent IPC channel should be able to + # cancel it! + 'parent_chan': chan, + 'requesting_uid': chan.uid, + 'ipc_msg': msg, + } + # TODO: remove? already have emit in meth. + # log.runtime( + # f'Rx RPC task cancel request\n' + # f'<= canceller: {chan.uid}\n' + # f' |_{chan}\n\n' + # f'=> {actor}\n' + # f' |_cid: {target_cid}\n' + # ) + try: + await _invoke( + actor, + cid, + chan, + func, + kwargs, + is_rpc=False, + ) + except BaseException: + log.exception( + 'Failed to cancel task?\n' + f'<= canceller: {chan.uid}\n' + f' |_{chan}\n\n' + f'=> {actor}\n' + f' |_cid: {target_cid}\n' + ) + continue + + # case Start( + # ns='self', + # funcname='register_actor', + # ): + else: + # normally registry methods, eg. + # ``.register_actor()`` etc. + func: Callable = getattr(actor, funcname) + + # case Start( + # ns=str(), + # funcname=funcname, + # ): + else: + # complain to client about restricted modules + try: + func = actor._get_rpc_func(ns, funcname) + except ( + ModuleNotExposed, + AttributeError, + ) as err: + err_msg: dict[str, dict] = pack_error( + err, + cid=cid, + ) + await chan.send(err_msg) + continue + + # schedule a task for the requested RPC function + # in the actor's main "service nursery". + # TODO: possibly a service-tn per IPC channel for + # supervision isolation? would avoid having to + # manage RPC tasks individually in `._rpc_tasks` + # table? + log.runtime( + f'Spawning task for RPC request\n' + f'<= caller: {chan.uid}\n' + f' |_{chan}\n\n' + # TODO: maddr style repr? + # f' |_@ /ipv4/{chan.raddr}/tcp/{chan.rport}/' + # f'cid="{cid[-16:]} .."\n\n' + + f'=> {actor}\n' + f' |_cid: {cid}\n' + f' |>> {func}()\n' + ) + assert actor._service_n # wait why? do it at top? + try: + ctx: Context = await actor._service_n.start( + partial( + _invoke, + actor, + cid, + chan, + func, + kwargs, + ), + name=funcname, + ) + + except ( + RuntimeError, + BaseExceptionGroup, + ): + # avoid reporting a benign race condition + # during actor runtime teardown. + nursery_cancelled_before_task: bool = True + break + + # in the lone case where a ``Context`` is not + # delivered, it's likely going to be a locally + # scoped exception from ``_invoke()`` itself. + if isinstance(err := ctx, Exception): + log.warning( + 'Task for RPC failed?' + f'|_ {func}()\n\n' + + f'{err}' + ) + continue + + else: + # mark that we have ongoing rpc tasks + actor._ongoing_rpc_tasks = trio.Event() + + # store cancel scope such that the rpc task can be + # cancelled gracefully if requested + actor._rpc_tasks[(chan, cid)] = ( + ctx, + func, + trio.Event(), + ) + + case Error()|_: + # This is the non-rpc error case, that is, an + # error **not** raised inside a call to ``_invoke()`` + # (i.e. no cid was provided in the msg - see above). + # Push this error to all local channel consumers + # (normally portals) by marking the channel as errored + log.exception( + f'Unhandled IPC msg:\n\n' + f'{msg}\n' + ) + assert chan.uid + exc = unpack_error( + msg, + chan=chan, + ) + chan._exc = exc + raise exc log.runtime( 'Waiting on next IPC msg from\n' diff --git a/tractor/_runtime.py b/tractor/_runtime.py index ed7b4503..eee78973 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -87,6 +87,23 @@ from ._rpc import ( process_messages, try_ship_error_to_remote, ) +from tractor.msg import ( + types as msgtypes, + pretty_struct, +) +# from tractor.msg.types import ( +# Aid, +# SpawnSpec, +# Start, +# StartAck, +# Started, +# Yield, +# Stop, +# Return, +# Error, +# ) + + if TYPE_CHECKING: @@ -143,6 +160,7 @@ class Actor: # Information about `__main__` from parent _parent_main_data: dict[str, str] _parent_chan_cs: CancelScope|None = None + _spawn_spec: SpawnSpec|None = None # syncs for setup/teardown sequences _server_down: trio.Event|None = None @@ -539,7 +557,8 @@ class Actor: f'{pformat(msg)}\n' ) - cid = msg.get('cid') + # cid: str|None = msg.get('cid') + cid: str|None = msg.cid if cid: # deliver response to local caller/waiter await self._push_result( @@ -891,29 +910,44 @@ class Actor: f'=> {ns}.{func}({kwargs})\n' ) await chan.send( - {'cmd': ( - ns, - func, - kwargs, - self.uid, - cid, - )} + msgtypes.Start( + ns=ns, + func=func, + kwargs=kwargs, + uid=self.uid, + cid=cid, + ) ) + # {'cmd': ( + # ns, + # func, + # kwargs, + # self.uid, + # cid, + # )} + # ) # Wait on first response msg and validate; this should be # immediate. - first_msg: dict = await ctx._recv_chan.receive() - functype: str = first_msg.get('functype') + # first_msg: dict = await ctx._recv_chan.receive() + # functype: str = first_msg.get('functype') - if 'error' in first_msg: + first_msg: msgtypes.StartAck = await ctx._recv_chan.receive() + try: + functype: str = first_msg.functype + except AttributeError: raise unpack_error(first_msg, chan) + # if 'error' in first_msg: + # raise unpack_error(first_msg, chan) - elif functype not in ( + if functype not in ( 'asyncfunc', 'asyncgen', 'context', ): - raise ValueError(f"{first_msg} is an invalid response packet?") + raise ValueError( + f'{first_msg} is an invalid response packet?' + ) ctx._remote_func_type = functype return ctx @@ -946,24 +980,36 @@ class Actor: await self._do_handshake(chan) accept_addrs: list[tuple[str, int]]|None = None - if self._spawn_method == "trio": - # Receive runtime state from our parent - parent_data: dict[str, Any] - parent_data = await chan.recv() - log.runtime( - 'Received state from parent:\n\n' - # TODO: eventually all these msgs as - # `msgspec.Struct` with a special mode that - # pformats them in multi-line mode, BUT only - # if "trace"/"util" mode is enabled? - f'{pformat(parent_data)}\n' - ) - accept_addrs: list[tuple[str, int]] = parent_data.pop('bind_addrs') - rvs = parent_data.pop('_runtime_vars') + if self._spawn_method == "trio": + + # Receive runtime state from our parent + # parent_data: dict[str, Any] + # parent_data = await chan.recv() + + # TODO: maybe we should just wrap this directly + # in a `Actor.spawn_info: SpawnInfo` struct? + spawnspec: msgtypes.SpawnSpec = await chan.recv() + self._spawn_spec = spawnspec + + # TODO: eventually all these msgs as + # `msgspec.Struct` with a special mode that + # pformats them in multi-line mode, BUT only + # if "trace"/"util" mode is enabled? + log.runtime( + 'Received runtime spec from parent:\n\n' + f'{pformat(spawnspec)}\n' + ) + # accept_addrs: list[tuple[str, int]] = parent_data.pop('bind_addrs') + accept_addrs: list[tuple[str, int]] = spawnspec.bind_addrs + + # rvs = parent_data.pop('_runtime_vars') + rvs = spawnspec._runtime_vars if rvs['_debug_mode']: try: - log.info('Enabling `stackscope` traces on SIGUSR1') + log.info( + 'Enabling `stackscope` traces on SIGUSR1' + ) from .devx import enable_stack_on_sig enable_stack_on_sig() except ImportError: @@ -971,28 +1017,40 @@ class Actor: '`stackscope` not installed for use in debug mode!' ) - log.runtime(f"Runtime vars are: {rvs}") + log.runtime(f'Runtime vars are: {rvs}') rvs['_is_root'] = False _state._runtime_vars.update(rvs) - for attr, value in parent_data.items(): - if ( - attr == 'reg_addrs' - and value - ): - # XXX: ``msgspec`` doesn't support serializing tuples - # so just cash manually here since it's what our - # internals expect. - # TODO: we don't really NEED these as - # tuples so we can probably drop this - # casting since apparently in python lists - # are "more efficient"? - self.reg_addrs = [tuple(val) for val in value] + # XXX: ``msgspec`` doesn't support serializing tuples + # so just cash manually here since it's what our + # internals expect. + # + self.reg_addrs = [ + # TODO: we don't really NEED these as tuples? + # so we can probably drop this casting since + # apparently in python lists are "more + # efficient"? + tuple(val) + for val in spawnspec.reg_addrs + ] - else: - setattr(self, attr, value) + # for attr, value in parent_data.items(): + for _, attr, value in pretty_struct.iter_fields( + spawnspec, + ): + setattr(self, attr, value) + # if ( + # attr == 'reg_addrs' + # and value + # ): + # self.reg_addrs = [tuple(val) for val in value] + # else: + # setattr(self, attr, value) - return chan, accept_addrs + return ( + chan, + accept_addrs, + ) except OSError: # failed to connect log.warning( @@ -1434,7 +1492,7 @@ class Actor: self, chan: Channel - ) -> tuple[str, str]: + ) -> msgtypes.Aid: ''' Exchange `(name, UUIDs)` identifiers as the first communication step with any (peer) remote `Actor`. @@ -1443,14 +1501,27 @@ class Actor: "actor model" parlance. ''' - await chan.send(self.uid) - value: tuple = await chan.recv() - uid: tuple[str, str] = (str(value[0]), str(value[1])) + name, uuid = self.uid + await chan.send( + msgtypes.Aid( + name=name, + uuid=uuid, + ) + ) + aid: msgtypes.Aid = await chan.recv() + chan.aid = aid + + uid: tuple[str, str] = ( + # str(value[0]), + # str(value[1]) + aid.name, + aid.uuid, + ) if not isinstance(uid, tuple): raise ValueError(f"{uid} is not a valid uid?!") - chan.uid = str(uid[0]), str(uid[1]) + chan.uid = uid return uid def is_infected_aio(self) -> bool: @@ -1510,7 +1581,8 @@ async def async_main( # because we're running in mp mode if ( set_accept_addr_says_rent - and set_accept_addr_says_rent is not None + and + set_accept_addr_says_rent is not None ): accept_addrs = set_accept_addr_says_rent diff --git a/tractor/_spawn.py b/tractor/_spawn.py index 48135cc9..824f41f3 100644 --- a/tractor/_spawn.py +++ b/tractor/_spawn.py @@ -49,6 +49,9 @@ from tractor._portal import Portal from tractor._runtime import Actor from tractor._entry import _mp_main from tractor._exceptions import ActorFailure +from tractor.msg.types import ( + SpawnSpec, +) if TYPE_CHECKING: @@ -493,14 +496,25 @@ async def trio_proc( portal, ) - # send additional init params - await chan.send({ - '_parent_main_data': subactor._parent_main_data, - 'enable_modules': subactor.enable_modules, - 'reg_addrs': subactor.reg_addrs, - 'bind_addrs': bind_addrs, - '_runtime_vars': _runtime_vars, - }) + # send a "spawning specification" which configures the + # initial runtime state of the child. + await chan.send( + SpawnSpec( + _parent_main_data=subactor._parent_main_data, + enable_modules=subactor.enable_modules, + reg_addrs=subactor.reg_addrs, + bind_addrs=bind_addrs, + _runtime_vars=_runtime_vars, + ) + ) + + # await chan.send({ + # '_parent_main_data': subactor._parent_main_data, + # 'enable_modules': subactor.enable_modules, + # 'reg_addrs': subactor.reg_addrs, + # 'bind_addrs': bind_addrs, + # '_runtime_vars': _runtime_vars, + # }) # track subactor in current nursery curr_actor: Actor = current_actor() diff --git a/tractor/_streaming.py b/tractor/_streaming.py index 90c33d31..941cfe8d 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -43,6 +43,11 @@ from .trionics import ( broadcast_receiver, BroadcastReceiver, ) +from tractor.msg import ( + Stop, + Yield, + Error, +) if TYPE_CHECKING: from ._context import Context @@ -94,21 +99,25 @@ class MsgStream(trio.abc.Channel): self, allow_msg_keys: list[str] = ['yield'], ): - msg: dict = self._rx_chan.receive_nowait() + # msg: dict = self._rx_chan.receive_nowait() + msg: Yield|Stop = self._rx_chan.receive_nowait() for ( i, key, ) in enumerate(allow_msg_keys): try: - return msg[key] - except KeyError as kerr: + # return msg[key] + return msg.pld + # except KeyError as kerr: + except AttributeError as attrerr: if i < (len(allow_msg_keys) - 1): continue _raise_from_no_key_in_msg( ctx=self._ctx, msg=msg, - src_err=kerr, + # src_err=kerr, + src_err=attrerr, log=log, expect_key=key, stream=self, @@ -148,18 +157,22 @@ class MsgStream(trio.abc.Channel): src_err: Exception|None = None # orig tb try: try: - msg = await self._rx_chan.receive() - return msg['yield'] + msg: Yield = await self._rx_chan.receive() + # return msg['yield'] + return msg.pld - except KeyError as kerr: - src_err = kerr + # except KeyError as kerr: + except AttributeError as attrerr: + # src_err = kerr + src_err = attrerr # NOTE: may raise any of the below error types # includg EoC when a 'stop' msg is found. _raise_from_no_key_in_msg( ctx=self._ctx, msg=msg, - src_err=kerr, + # src_err=kerr, + src_err=attrerr, log=log, expect_key='yield', stream=self, @@ -514,11 +527,18 @@ class MsgStream(trio.abc.Channel): raise self._closed try: + # await self._ctx.chan.send( + # payload={ + # 'yield': data, + # 'cid': self._ctx.cid, + # }, + # # hide_tb=hide_tb, + # ) await self._ctx.chan.send( - payload={ - 'yield': data, - 'cid': self._ctx.cid, - }, + payload=Yield( + cid=self._ctx.cid, + pld=data, + ), # hide_tb=hide_tb, ) except ( diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 255b1dbd..26155b22 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -935,6 +935,9 @@ async def _pause( # ``breakpoint()`` was awaited and begin handling stdio. log.debug('Entering sync world of the `pdb` REPL..') try: + # log.critical( + # f'stack len: {len(pdb.stack)}\n' + # ) debug_func( actor, pdb, -- 2.34.1 From fc83f4ecf00c7dc429b035a79f1464770b066485 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Apr 2024 14:32:16 -0400 Subject: [PATCH 017/190] Woops, only pack `Error(cid=cid)` if input is not `None` --- tractor/_exceptions.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 7deda9d2..9c1dc36d 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -498,7 +498,7 @@ def pack_error( tb: str|None = None, cid: str|None = None, -) -> Error|dict[str, dict]: +) -> Error: ''' Create an "error message" which boxes a locally caught exception's meta-data and encodes it for wire transport via an @@ -561,19 +561,10 @@ def pack_error( # content's `.msgdata`). error_msg['tb_str'] = tb_str - # Error() - # pkt: dict = { - # 'error': error_msg, - # } - pkt: Error = Error( - cid=cid, - **error_msg, - # TODO: just get rid of `.pld` on this msg? - ) - # if cid: - # pkt['cid'] = cid + if cid is not None: + error_msg['cid'] = cid - return pkt + return Error(**error_msg) def unpack_error( -- 2.34.1 From e1f612996cadc555167bb2890366c0e4a4c4768b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Apr 2024 14:34:08 -0400 Subject: [PATCH 018/190] TOSQUASH f2ce4a3, timeout bump --- tests/test_context_stream_semantics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_context_stream_semantics.py b/tests/test_context_stream_semantics.py index 36a5fd9f..d2b572cc 100644 --- a/tests/test_context_stream_semantics.py +++ b/tests/test_context_stream_semantics.py @@ -873,7 +873,7 @@ def test_one_end_stream_not_opened( enable_modules=[__name__], ) - with trio.fail_after(0.8): + with trio.fail_after(1): async with portal.open_context( entrypoint, ) as (ctx, sent): -- 2.34.1 From ed72974ec4e679d06ba2ff42b2ac6770c7781c5c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 3 Apr 2024 09:45:03 -0400 Subject: [PATCH 019/190] Flip default codec to our `Msg`-spec Yes, this is "the switch" and will likely cause the test suite to bail until a few more fixes some in. Tweaked a couple `.msg` pkg exports: - remove `__spec__` (used by modules) and change it to `__msg_types: lists[Msg]` as well as add a new `__msg_spec__: TypeAlias`, being the default `Any` paramed spec. - tweak the naming of `msg.types` lists of runtime vs payload msgs to: `._runtime_msgs` and `._payload_msgs`. - just build `__msg_types__` out of the above 2 lists. --- tractor/msg/__init__.py | 12 ++++++-- tractor/msg/_codec.py | 4 +-- tractor/msg/types.py | 61 +++++++++++++++++++++++------------------ 3 files changed, 47 insertions(+), 30 deletions(-) diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index d8f37477..fe965e0b 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -18,6 +18,10 @@ Built-in messaging patterns, types, APIs and helpers. ''' +from typing import ( + Union, + TypeAlias, +) from .ptr import ( NamespacePath as NamespacePath, ) @@ -50,6 +54,10 @@ from .types import ( Error as Error, - # full msg spec set - __spec__ as __spec__, + # full msg class set from above as list + __msg_types__ as __msg_types__, ) +# TODO: use new type declaration syntax for msg-type-spec +# https://docs.python.org/3/library/typing.html#type-aliases +# https://docs.python.org/3/reference/simple_stmts.html#type +__msg_spec__: TypeAlias = Union[*__msg_types__] diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 32a58a56..56f24d62 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -438,8 +438,8 @@ _ctxvar_MsgCodec: MsgCodec = RunVar( 'msgspec_codec', # TODO: move this to our new `Msg`-spec! - default=_def_msgspec_codec, - # default=_def_tractor_codec, + # default=_def_msgspec_codec, + default=_def_tractor_codec, ) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index a81473d7..b246cb61 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -26,7 +26,7 @@ from __future__ import annotations import types from typing import ( Any, - Callable, + # Callable, Generic, Literal, Type, @@ -340,50 +340,54 @@ class Error( # class Overrun(Msg): # cid: str +_runtime_msgs: list[Msg] = [ -# built-in SC shuttle protocol msg type set in -# approx order of the IPC txn-state spaces. -__spec__: list[Msg] = [ - - # identity handshake + # identity handshake on first IPC `Channel` contact. Aid, - # spawn specification from parent + # parent-to-child spawn specification passed as 2nd msg after + # handshake ONLY after child connects back to parent. SpawnSpec, # inter-actor RPC initiation - Start, - StartAck, + Start, # schedule remote task-as-func + StartAck, # ack the schedule request - # no-outcome-yet IAC (inter-actor-communication) - Started, - Yield, + # emission from `MsgStream.aclose()` Stop, - # termination outcomes - Return, + # box remote errors, normally subtypes + # of `RemoteActorError`. Error, ] -_runtime_spec_msgs: list[Msg] = [ - Aid, - SpawnSpec, - Start, - StartAck, - Stop, - Error, -] -_payload_spec_msgs: list[Msg] = [ +# the no-outcome-yet IAC (inter-actor-communication) sub-set which +# can be `Msg.pld` payload field type-limited by application code +# using `apply_codec()` and `limit_msg_spec()`. +_payload_msgs: list[Msg] = [ + # first from `Context.started()` Started, + + # any sent via `MsgStream.send()` Yield, + + # the final value returned from a `@context` decorated + # IPC endpoint. Return, ] +# built-in SC shuttle protocol msg type set in +# approx order of the IPC txn-state spaces. +__msg_types__: list[Msg] = ( + _runtime_msgs + + + _payload_msgs +) + def mk_msg_spec( payload_type_union: Union[Type] = Any, - # boxing_msg_set: list[Msg] = _payload_spec_msgs, spec_build_method: Literal[ 'indexed_generics', # works 'defstruct', @@ -424,12 +428,12 @@ def mk_msg_spec( defs_msg_types: list[Msg] = [] nc_msg_types: list[Msg] = [] - for msgtype in __spec__: + for msgtype in __msg_types__: # for the NON-payload (user api) type specify-able # msgs types, we simply aggregate the def as is # for inclusion in the output type `Union`. - if msgtype not in _payload_spec_msgs: + if msgtype not in _payload_msgs: ipc_msg_types.append(msgtype) continue @@ -535,6 +539,11 @@ def mk_msg_spec( # TODO: make something similar to this inside `._codec` such that # user can just pass a type table of some sort? +# -[ ] we would need to decode all msgs to `pretty_struct.Struct` +# and then call `.to_dict()` on them? +# -[ ] we're going to need to re-impl all the stuff changed in the +# runtime port such that it can handle dicts or `Msg`s? +# # def mk_dict_msg_codec_hooks() -> tuple[Callable, Callable]: # ''' # Deliver a `enc_hook()`/`dec_hook()` pair which does -- 2.34.1 From 8f18c9febf1ca91cfaccaa947bd6ee6d3b2b61f2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 3 Apr 2024 09:50:22 -0400 Subject: [PATCH 020/190] Start a new `._testing.fault_simulation` Since I needed the `break_ipc()` helper from the `examples/advanced_faults/ipc_failure_during_stream.py` used in the `test_advanced_faults` suite, might as well move it into a pkg-wide importable module. Also changed the default break method to be `socket_close` which just calls `Stream.socket.close()` underneath in `trio`. Also tweak that example to not keep sending after the stream has been broken since with new `trio` that will raise `ClosedResourceError` and in the wrapping test we generally speaking want to see a hang and then cancel via simulated user sent SIGINT/ctl-c. --- .../ipc_failure_during_stream.py | 86 ++++------------- tractor/_testing/__init__.py | 3 + tractor/_testing/fault_simulation.py | 92 +++++++++++++++++++ 3 files changed, 112 insertions(+), 69 deletions(-) create mode 100644 tractor/_testing/fault_simulation.py diff --git a/examples/advanced_faults/ipc_failure_during_stream.py b/examples/advanced_faults/ipc_failure_during_stream.py index 9dca92b1..60b28c3e 100644 --- a/examples/advanced_faults/ipc_failure_during_stream.py +++ b/examples/advanced_faults/ipc_failure_during_stream.py @@ -21,75 +21,12 @@ import trio import pytest -async def break_ipc( - stream: MsgStream, - method: str|None = None, - pre_close: bool = False, - - def_method: str = 'eof', - -) -> None: - ''' - XXX: close the channel right after an error is raised - purposely breaking the IPC transport to make sure the parent - doesn't get stuck in debug or hang on the connection join. - this more or less simulates an infinite msg-receive hang on - the other end. - - ''' - # close channel via IPC prot msging before - # any transport breakage - if pre_close: - await stream.aclose() - - method: str = method or def_method - print( - '#################################\n' - 'Simulating CHILD-side IPC BREAK!\n' - f'method: {method}\n' - f'pre `.aclose()`: {pre_close}\n' - '#################################\n' - ) - - match method: - case 'trans_aclose': - await stream._ctx.chan.transport.stream.aclose() - - case 'eof': - await stream._ctx.chan.transport.stream.send_eof() - - case 'msg': - await stream._ctx.chan.send(None) - - # TODO: the actual real-world simulated cases like - # transport layer hangs and/or lower layer 2-gens type - # scenarios.. - # - # -[ ] already have some issues for this general testing - # area: - # - https://github.com/goodboy/tractor/issues/97 - # - https://github.com/goodboy/tractor/issues/124 - # - PR from @guille: - # https://github.com/goodboy/tractor/pull/149 - # case 'hang': - # TODO: framework research: - # - # - https://github.com/GuoTengda1993/pynetem - # - https://github.com/shopify/toxiproxy - # - https://manpages.ubuntu.com/manpages/trusty/man1/wirefilter.1.html - - case _: - raise RuntimeError( - f'IPC break method unsupported: {method}' - ) - - async def break_ipc_then_error( stream: MsgStream, break_ipc_with: str|None = None, pre_close: bool = False, ): - await break_ipc( + await _testing.break_ipc( stream=stream, method=break_ipc_with, pre_close=pre_close, @@ -121,6 +58,7 @@ async def recv_and_spawn_net_killers( Receive stream msgs and spawn some IPC killers mid-stream. ''' + broke_ipc: bool = False await ctx.started() async with ( ctx.open_stream() as stream, @@ -128,13 +66,17 @@ async def recv_and_spawn_net_killers( ): async for i in stream: print(f'child echoing {i}') - await stream.send(i) + if not broke_ipc: + await stream.send(i) + else: + await trio.sleep(0.01) if ( break_ipc_after and i >= break_ipc_after ): + broke_ipc = True n.start_soon( iter_ipc_stream, stream, @@ -242,14 +184,13 @@ async def main( # await stream._ctx.chan.send(None) # await stream._ctx.chan.transport.stream.send_eof() await stream._ctx.chan.transport.stream.aclose() - ipc_break_sent = True # it actually breaks right here in the - # mp_spawn/forkserver backends and thus the zombie - # reaper never even kicks in? - print(f'parent sending {i}') + # mp_spawn/forkserver backends and thus the + # zombie reaper never even kicks in? try: + print(f'parent sending {i}') await stream.send(i) except ContextCancelled as ctxc: print( @@ -262,6 +203,13 @@ async def main( # TODO: is this needed or no? raise + except trio.ClosedResourceError: + # NOTE: don't send if we already broke the + # connection to avoid raising a closed-error + # such that we drop through to the ctl-c + # mashing by user. + await trio.sleep(0.01) + # timeout: int = 1 # with trio.move_on_after(timeout) as cs: async with stuff_hangin_ctlc() as timeout: diff --git a/tractor/_testing/__init__.py b/tractor/_testing/__init__.py index 876c87e8..fd79fe20 100644 --- a/tractor/_testing/__init__.py +++ b/tractor/_testing/__init__.py @@ -26,6 +26,9 @@ import tractor from .pytest import ( tractor_test as tractor_test ) +from .fault_simulation import ( + break_ipc as break_ipc, +) def repodir() -> pathlib.Path: diff --git a/tractor/_testing/fault_simulation.py b/tractor/_testing/fault_simulation.py new file mode 100644 index 00000000..fbd97bf5 --- /dev/null +++ b/tractor/_testing/fault_simulation.py @@ -0,0 +1,92 @@ +# tractor: structured concurrent "actors". +# Copyright 2018-eternity Tyler Goodlet. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +`pytest` utils helpers and plugins for testing `tractor`'s runtime +and applications. + +''' + +from tractor import ( + MsgStream, +) + +async def break_ipc( + stream: MsgStream, + method: str|None = None, + pre_close: bool = False, + + def_method: str = 'socket_close', + +) -> None: + ''' + XXX: close the channel right after an error is raised + purposely breaking the IPC transport to make sure the parent + doesn't get stuck in debug or hang on the connection join. + this more or less simulates an infinite msg-receive hang on + the other end. + + ''' + # close channel via IPC prot msging before + # any transport breakage + if pre_close: + await stream.aclose() + + method: str = method or def_method + print( + '#################################\n' + 'Simulating CHILD-side IPC BREAK!\n' + f'method: {method}\n' + f'pre `.aclose()`: {pre_close}\n' + '#################################\n' + ) + + match method: + case 'socket_close': + await stream._ctx.chan.transport.stream.aclose() + + case 'socket_eof': + # NOTE: `trio` does the following underneath this + # call in `src/trio/_highlevel_socket.py`: + # `Stream.socket.shutdown(tsocket.SHUT_WR)` + await stream._ctx.chan.transport.stream.send_eof() + + # TODO: remove since now this will be invalid with our + # new typed msg spec? + # case 'msg': + # await stream._ctx.chan.send(None) + + # TODO: the actual real-world simulated cases like + # transport layer hangs and/or lower layer 2-gens type + # scenarios.. + # + # -[ ] already have some issues for this general testing + # area: + # - https://github.com/goodboy/tractor/issues/97 + # - https://github.com/goodboy/tractor/issues/124 + # - PR from @guille: + # https://github.com/goodboy/tractor/pull/149 + # case 'hang': + # TODO: framework research: + # + # - https://github.com/GuoTengda1993/pynetem + # - https://github.com/shopify/toxiproxy + # - https://manpages.ubuntu.com/manpages/trusty/man1/wirefilter.1.html + + case _: + raise RuntimeError( + f'IPC break method unsupported: {method}' + ) -- 2.34.1 From 9de2fff2735853b9258fb95c6b4ead8a62a87a5e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 5 Apr 2024 10:53:07 -0400 Subject: [PATCH 021/190] Use `._testing.break_ipc()` in final advanced fault test child ctx --- tests/test_advanced_faults.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/test_advanced_faults.py b/tests/test_advanced_faults.py index 52db139f..dfaeb68e 100644 --- a/tests/test_advanced_faults.py +++ b/tests/test_advanced_faults.py @@ -13,6 +13,7 @@ import trio import tractor from tractor._testing import ( examples_dir, + break_ipc, ) @@ -93,7 +94,8 @@ def test_ipc_channel_break_during_stream( expect_final_exc = trio.ClosedResourceError mod: ModuleType = import_path( - examples_dir() / 'advanced_faults' / 'ipc_failure_during_stream.py', + examples_dir() / 'advanced_faults' + / 'ipc_failure_during_stream.py', root=examples_dir(), consider_namespace_packages=False, ) @@ -225,9 +227,15 @@ async def break_ipc_after_started( ) -> None: await ctx.started() async with ctx.open_stream() as stream: - await stream.aclose() - await trio.sleep(0.2) - await ctx.chan.send(None) + + # TODO: make a test which verifies the error + # for this, i.e. raises a `MsgTypeError` + # await ctx.chan.send(None) + + await break_ipc( + stream=stream, + pre_close=True, + ) print('child broke IPC and terminating') -- 2.34.1 From c2fc6293aa08af9b265aaf2d357770cc7b0e1ab4 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 5 Apr 2024 11:36:09 -0400 Subject: [PATCH 022/190] Extend codec test to for msg-spec parameterizing Set a diff `Msg.pld` spec per test and then send multiple types to a child actor making sure the child can only send certain types over a stream and fails with validation or decode errors ow. The test is also param-ed both with and without hooks demonstrating how a custom type, `NamespacePath`, needs them for effective use. The subactor IPC context child is passed a `expect_ipc_send: dict` which relays the values along with their expected `.send()`-ability. Deats on technical refinements: ------ - ------ - added a `iter_maybe_sends()` send-value-as-msg-auditor and predicate generator (literally) so as to be able to pre-determine if given the current codec and `send_values` which values are expected to be IPC transmittable. - as per ^, the diff value-msgs are first round-tripped inside a `Started` msg using the configured codec in the parent/root actor before bothering with using IPC primitives + a subactor; this is how the `expect_ipc_send` table is generated initially. - for serializing the specs (`Union[Type]`s as required by `msgspec`), added a pair of codec hooks: `enc/dec_type_union()` (that ideally we move into a `.msg` submod eventually) which code the type-values as a `list[str]` of names. - the `dec_` hook had to be modified to NOT raise an error when an invalid/unhandled value arrives, this is because we do NOT want the RPC msg handling loop to raise on the `async for msg in chan:` and instead prefer to ignore and warn (for now, but eventually respond with error msg - see notes in hook body) these msgs when sent during a streaming phase; `Context.started()` will however error on a bad input for the current msg-spec since it is part of the "cheap" dialog (again see notes in `._context`) wherein the `Started` msg is always roundtripped prior to `Channel.send()` to guarantee the child adheres to its own spec. - tossed in lotsa `print()`s for console groking of the run progress. Further notes on typed-msging breaking cancellation: ------ - ------ - turns out since the runtime's cancellation implementation, being done with `Actor.cancel()` methods and friends will actually break when a stringent spec is applied (eg. a single type-spec) since the return values from said methods are generally `bool`s.. - this means we do indeed need special handling of "runtime RPC method invocations" since ideally a user's msg-spec choices do not break core functionality on them XD => The obvi solution is to add a/some special sub-`Msg` types for such cases, possibly just a `RuntimeReturn(Return)` type that will always include a `.pld: bool` for these cancel methods such that their results are always handled without msg type errors. More to come on a (hopefully) elegant solution to that last bit! --- tests/test_caps_based_msging.py | 648 +++++++++++++++++++++++--------- 1 file changed, 462 insertions(+), 186 deletions(-) diff --git a/tests/test_caps_based_msging.py b/tests/test_caps_based_msging.py index b42d9e35..acc1f307 100644 --- a/tests/test_caps_based_msging.py +++ b/tests/test_caps_based_msging.py @@ -5,6 +5,7 @@ Low-level functional audits for our B~) ''' +import typing from typing import ( Any, Type, @@ -23,7 +24,9 @@ from msgspec import ( ValidationError, ) import pytest + import tractor +from tractor import _state from tractor.msg import ( _codec, _ctxvar_MsgCodec, @@ -34,12 +37,9 @@ from tractor.msg import ( apply_codec, current_codec, ) -from tractor.msg import ( - types, -) -from tractor import _state from tractor.msg.types import ( - # PayloadT, + _payload_msgs, + log, Msg, Started, mk_msg_spec, @@ -62,17 +62,14 @@ def test_msg_spec_xor_pld_spec(): ) -def ex_func(*args): - print(f'ex_func({args})') - - def mk_custom_codec( pld_spec: Union[Type]|Any, + add_hooks: bool, ) -> MsgCodec: ''' Create custom `msgpack` enc/dec-hooks and set a `Decoder` - which only loads `NamespacePath` types. + which only loads `pld_spec` (like `NamespacePath`) types. ''' uid: tuple[str, str] = tractor.current_actor().uid @@ -83,61 +80,75 @@ def mk_custom_codec( # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types def enc_nsp(obj: Any) -> Any: + print(f'{uid} ENC HOOK') match obj: case NamespacePath(): print( f'{uid}: `NamespacePath`-Only ENCODE?\n' - f'type: {type(obj)}\n' - f'obj: {obj}\n' + f'obj-> `{obj}`: {type(obj)}\n' ) - + # if type(obj) != NamespacePath: + # breakpoint() return str(obj) - logmsg: str = ( - f'{uid}: Encoding `{obj}: <{type(obj)}>` not supported' - f'type: {type(obj)}\n' - f'obj: {obj}\n' + print( + f'{uid}\n' + 'CUSTOM ENCODE\n' + f'obj-arg-> `{obj}`: {type(obj)}\n' + ) + logmsg: str = ( + f'{uid}\n' + 'FAILED ENCODE\n' + f'obj-> `{obj}: {type(obj)}`\n' ) - print(logmsg) raise NotImplementedError(logmsg) def dec_nsp( - type: Type, + obj_type: Type, obj: Any, ) -> Any: print( - f'{uid}: CUSTOM DECODE\n' - f'input type: {type}\n' - f'obj: {obj}\n' - f'type(obj): `{type(obj).__class__}`\n' + f'{uid}\n' + 'CUSTOM DECODE\n' + f'type-arg-> {obj_type}\n' + f'obj-arg-> `{obj}`: {type(obj)}\n' ) nsp = None - # This never seems to hit? - if isinstance(obj, Msg): - print(f'Msg type: {obj}') - if ( - type is NamespacePath + obj_type is NamespacePath and isinstance(obj, str) and ':' in obj ): nsp = NamespacePath(obj) + # TODO: we could built a generic handler using + # JUST matching the obj_type part? + # nsp = obj_type(obj) if nsp: print(f'Returning NSP instance: {nsp}') return nsp logmsg: str = ( - f'{uid}: Decoding `{obj}: <{type(obj)}>` not supported' - f'input type: {type(obj)}\n' - f'obj: {obj}\n' - f'type(obj): `{type(obj).__class__}`\n' + f'{uid}\n' + 'FAILED DECODE\n' + f'type-> {obj_type}\n' + f'obj-arg-> `{obj}`: {type(obj)}\n' ) - print(logmsg) - raise NotImplementedError(logmsg) - + # TODO: figure out the ignore subsys for this! + # -[ ] option whether to defense-relay backc the msg + # inside an `Invalid`/`Ignore` + # -[ ] how to make this handling pluggable such that a + # `Channel`/`MsgTransport` can intercept and process + # back msgs either via exception handling or some other + # signal? + log.warning(logmsg) + # NOTE: this delivers the invalid + # value up to `msgspec`'s decoding + # machinery for error raising. + return obj + # raise NotImplementedError(logmsg) nsp_codec: MsgCodec = mk_codec( ipc_pld_spec=pld_spec, @@ -151,97 +162,32 @@ def mk_custom_codec( # `Any`-decoded-pld the enc has no knowledge (by default) # how to enc `NamespacePath` (nsp), so we add a custom # hook to do that ALWAYS. - enc_hook=enc_nsp, + enc_hook=enc_nsp if add_hooks else None, # XXX NOTE: pretty sure this is mutex with the `type=` to # `Decoder`? so it won't work in tandem with the # `ipc_pld_spec` passed above? - dec_hook=dec_nsp, + dec_hook=dec_nsp if add_hooks else None, ) return nsp_codec -@tractor.context -async def send_back_nsp( - ctx: Context, - expect_debug: bool, - use_any_spec: bool, - -) -> None: - ''' - Setup up a custom codec to load instances of `NamespacePath` - and ensure we can round trip a func ref with our parent. - - ''' - # debug mode sanity check - assert expect_debug == _state.debug_mode() - - # task: trio.Task = trio.lowlevel.current_task() - - # TreeVar - # curr_codec = _ctxvar_MsgCodec.get_in(task) - - # ContextVar - # task_ctx: Context = task.context - # assert _ctxvar_MsgCodec not in task_ctx - - curr_codec = _ctxvar_MsgCodec.get() - assert curr_codec is _codec._def_tractor_codec - - if use_any_spec: - pld_spec = Any - else: - # NOTE: don't need the |None here since - # the parent side will never send `None` like - # we do here in the implicit return at the end of this - # `@context` body. - pld_spec = NamespacePath # |None - - nsp_codec: MsgCodec = mk_custom_codec( - pld_spec=pld_spec, - ) - with apply_codec(nsp_codec) as codec: - chk_codec_applied( - custom_codec=nsp_codec, - enter_value=codec, - ) - - # ensure roundtripping works locally - nsp = NamespacePath.from_ref(ex_func) - wire_bytes: bytes = nsp_codec.encode( - Started( - cid=ctx.cid, - pld=nsp - ) - ) - msg: Started = nsp_codec.decode(wire_bytes) - pld = msg.pld - assert pld == nsp - - await ctx.started(nsp) - async with ctx.open_stream() as ipc: - async for msg in ipc: - - if use_any_spec: - assert msg == f'{__name__}:ex_func' - - # TODO: as per below - # assert isinstance(msg, NamespacePath) - assert isinstance(msg, str) - else: - assert isinstance(msg, NamespacePath) - - await ipc.send(msg) - - def chk_codec_applied( - custom_codec: MsgCodec, - enter_value: MsgCodec, + expect_codec: MsgCodec, + enter_value: MsgCodec|None = None, + ) -> MsgCodec: + ''' + buncha sanity checks ensuring that the IPC channel's + context-vars are set to the expected codec and that are + ctx-var wrapper APIs match the same. - # task: trio.Task = trio.lowlevel.current_task() - + ''' + # TODO: play with tricyle again, bc this is supposed to work + # the way we want? + # # TreeVar + # task: trio.Task = trio.lowlevel.current_task() # curr_codec = _ctxvar_MsgCodec.get_in(task) # ContextVar @@ -249,46 +195,358 @@ def chk_codec_applied( # assert _ctxvar_MsgCodec in task_ctx # curr_codec: MsgCodec = task.context[_ctxvar_MsgCodec] + # NOTE: currently we use this! # RunVar - curr_codec: MsgCodec = _ctxvar_MsgCodec.get() + curr_codec: MsgCodec = current_codec() last_read_codec = _ctxvar_MsgCodec.get() - assert curr_codec is last_read_codec + # assert curr_codec is last_read_codec assert ( + (same_codec := expect_codec) is # returned from `mk_codec()` - custom_codec is # yielded value from `apply_codec()` - enter_value is # read from current task's `contextvars.Context` curr_codec is - - # public API for all of the above - current_codec() + last_read_codec # the default `msgspec` settings is not _codec._def_msgspec_codec is not _codec._def_tractor_codec ) + if enter_value: + enter_value is same_codec + + +def iter_maybe_sends( + send_items: dict[Union[Type], Any] | list[tuple], + ipc_pld_spec: Union[Type] | Any, + add_codec_hooks: bool, + + codec: MsgCodec|None = None, + +) -> tuple[Any, bool]: + + if isinstance(send_items, dict): + send_items = send_items.items() + + for ( + send_type_spec, + send_value, + ) in send_items: + + expect_roundtrip: bool = False + + # values-to-typespec santiy + send_type = type(send_value) + assert send_type == send_type_spec or ( + (subtypes := getattr(send_type_spec, '__args__', None)) + and send_type in subtypes + ) + + spec_subtypes: set[Union[Type]] = ( + getattr( + ipc_pld_spec, + '__args__', + {ipc_pld_spec,}, + ) + ) + send_in_spec: bool = ( + send_type == ipc_pld_spec + or ( + ipc_pld_spec != Any + and # presume `Union` of types + send_type in spec_subtypes + ) + or ( + ipc_pld_spec == Any + and + send_type != NamespacePath + ) + ) + expect_roundtrip = ( + send_in_spec + # any spec should support all other + # builtin py values that we send + # except our custom nsp type which + # we should be able to send as long + # as we provide the custom codec hooks. + or ( + ipc_pld_spec == Any + and + send_type == NamespacePath + and + add_codec_hooks + ) + ) + + if codec is not None: + # XXX FIRST XXX ensure roundtripping works + # before touching any IPC primitives/APIs. + wire_bytes: bytes = codec.encode( + Started( + cid='blahblah', + pld=send_value, + ) + ) + # NOTE: demonstrates the decoder loading + # to via our native SCIPP msg-spec + # (structurred-conc-inter-proc-protocol) + # implemented as per, + try: + msg: Started = codec.decode(wire_bytes) + if not expect_roundtrip: + pytest.fail( + f'NOT-EXPECTED able to roundtrip value given spec:\n' + f'ipc_pld_spec -> {ipc_pld_spec}\n' + f'value -> {send_value}: {send_type}\n' + ) + + pld = msg.pld + assert pld == send_value + + except ValidationError: + if expect_roundtrip: + pytest.fail( + f'EXPECTED to roundtrip value given spec:\n' + f'ipc_pld_spec -> {ipc_pld_spec}\n' + f'value -> {send_value}: {send_type}\n' + ) + + yield ( + str(send_type), + send_value, + expect_roundtrip, + ) + + +def dec_type_union( + type_names: list[str], +) -> Type: + ''' + Look up types by name, compile into a list and then create and + return a `typing.Union` from the full set. + + ''' + import importlib + types: list[Type] = [] + for type_name in type_names: + for ns in [ + typing, + importlib.import_module(__name__), + ]: + if type_ref := getattr( + ns, + type_name, + False, + ): + types.append(type_ref) + + # special case handling only.. + # ipc_pld_spec: Union[Type] = eval( + # pld_spec_str, + # {}, # globals + # {'typing': typing}, # locals + # ) + + return Union[*types] + + +def enc_type_union( + union_or_type: Union[Type]|Type, +) -> list[str]: + ''' + Encode a type-union or single type to a list of type-name-strings + ready for IPC interchange. + + ''' + type_strs: list[str] = [] + for typ in getattr( + union_or_type, + '__args__', + {union_or_type,}, + ): + type_strs.append(typ.__qualname__) + + return type_strs + + +@tractor.context +async def send_back_nsp( + ctx: Context, + expect_debug: bool, + pld_spec_type_strs: list[str], + add_hooks: bool, + started_msg_bytes: bytes, + expect_ipc_send: dict[str, tuple[Any, bool]], + +) -> None: + ''' + Setup up a custom codec to load instances of `NamespacePath` + and ensure we can round trip a func ref with our parent. + + ''' + # debug mode sanity check (prolly superfluous but, meh) + assert expect_debug == _state.debug_mode() + + # init state in sub-actor should be default + chk_codec_applied( + expect_codec=_codec._def_tractor_codec, + ) + + # load pld spec from input str + ipc_pld_spec = dec_type_union( + pld_spec_type_strs, + ) + pld_spec_str = str(ipc_pld_spec) + + # same as on parent side config. + nsp_codec: MsgCodec = mk_custom_codec( + pld_spec=ipc_pld_spec, + add_hooks=add_hooks, + ) + with apply_codec(nsp_codec) as codec: + chk_codec_applied( + expect_codec=nsp_codec, + enter_value=codec, + ) + + print( + 'CHILD attempting `Started`-bytes DECODE..\n' + ) + try: + msg: Started = nsp_codec.decode(started_msg_bytes) + expected_pld_spec_str: str = msg.pld + assert pld_spec_str == expected_pld_spec_str + + # TODO: maybe we should add our own wrapper error so as to + # be interchange-lib agnostic? + # -[ ] the error type is wtv is raised from the hook so we + # could also require a type-class of errors for + # indicating whether the hook-failure can be handled by + # a nasty-dialog-unprot sub-sys? + except ValidationError: + + # NOTE: only in the `Any` spec case do we expect this to + # work since otherwise no spec covers a plain-ol' + # `.pld: str` + if pld_spec_str == 'Any': + raise + else: + print( + 'CHILD (correctly) unable to DECODE `Started`-bytes\n' + f'{started_msg_bytes}\n' + ) + + iter_send_val_items = iter(expect_ipc_send.values()) + sent: list[Any] = [] + for send_value, expect_send in iter_send_val_items: + try: + print( + f'CHILD attempting to `.started({send_value})`\n' + f'=> expect_send: {expect_send}\n' + f'SINCE, ipc_pld_spec: {ipc_pld_spec}\n' + f'AND, codec: {codec}\n' + ) + await ctx.started(send_value) + sent.append(send_value) + if not expect_send: + + # XXX NOTE XXX THIS WON'T WORK WITHOUT SPECIAL + # `str` handling! or special debug mode IPC + # msgs! + # await tractor.pause() + + raise RuntimeError( + # pytest.fail( + f'NOT-EXPECTED able to roundtrip value given spec:\n' + f'ipc_pld_spec -> {ipc_pld_spec}\n' + f'value -> {send_value}: {type(send_value)}\n' + ) + + break # move on to streaming block.. + + except NotImplementedError: + print('FAILED ENCODE!') + + except tractor.MsgTypeError: + # await tractor.pause() + if expect_send: + pytest.fail( + f'EXPECTED to `.started()` value given spec:\n' + f'ipc_pld_spec -> {ipc_pld_spec}\n' + f'value -> {send_value}: {type(send_value)}\n' + ) + + async with ctx.open_stream() as ipc: + for send_value, expect_send in iter_send_val_items: + send_type: Type = type(send_value) + print( + 'CHILD report on send value\n' + f'ipc_pld_spec: {ipc_pld_spec}\n' + f'expect_send: {expect_send}\n' + f'val: {send_value}\n' + ) + try: + await ipc.send(send_value) + sent.append(send_value) + if not expect_send: + pytest.fail( + f'NOT-EXPECTED able to roundtrip value given spec:\n' + f'ipc_pld_spec -> {ipc_pld_spec}\n' + f'value -> {send_value}: {send_type}\n' + ) + except ValidationError: + if expect_send: + pytest.fail( + f'EXPECTED to roundtrip value given spec:\n' + f'ipc_pld_spec -> {ipc_pld_spec}\n' + f'value -> {send_value}: {send_type}\n' + ) + continue + + assert ( + len(sent) + == + len([val + for val, expect in + expect_ipc_send.values() + if expect is True]) + ) + + +def ex_func(*args): + print(f'ex_func({args})') + @pytest.mark.parametrize( 'ipc_pld_spec', [ - # _codec._def_msgspec_codec, Any, - # _codec._def_tractor_codec, - NamespacePath|None, + NamespacePath, + NamespacePath|None, # the "maybe" spec Bo ], ids=[ 'any_type', 'nsp_type', + 'maybe_nsp_type', ] ) +@pytest.mark.parametrize( + 'add_codec_hooks', + [ + True, + False, + ], + ids=['use_codec_hooks', 'no_codec_hooks'], +) def test_codec_hooks_mod( debug_mode: bool, ipc_pld_spec: Union[Type]|Any, + # send_value: None|str|NamespacePath, + add_codec_hooks: bool, ): ''' Audit the `.msg.MsgCodec` override apis details given our impl @@ -297,17 +555,17 @@ def test_codec_hooks_mod( ''' async def main(): + nsp = NamespacePath.from_ref(ex_func) + send_items: dict[Union, Any] = { + Union[None]: None, + Union[NamespacePath]: nsp, + Union[str]: str(nsp), + } - # task: trio.Task = trio.lowlevel.current_task() - - # ContextVar - # task_ctx: Context = task.context - # assert _ctxvar_MsgCodec not in task_ctx - - # TreeVar - # def_codec: MsgCodec = _ctxvar_MsgCodec.get_in(task) - def_codec = _ctxvar_MsgCodec.get() - assert def_codec is _codec._def_tractor_codec + # init default state for actor + chk_codec_applied( + expect_codec=_codec._def_tractor_codec, + ) async with tractor.open_nursery( debug_mode=debug_mode, @@ -323,79 +581,97 @@ def test_codec_hooks_mod( # `NamespacePath` nsp_codec: MsgCodec = mk_custom_codec( pld_spec=ipc_pld_spec, + add_hooks=add_codec_hooks, ) with apply_codec(nsp_codec) as codec: chk_codec_applied( - custom_codec=nsp_codec, + expect_codec=nsp_codec, enter_value=codec, ) + expect_ipc_send: dict[str, tuple[Any, bool]] = {} + + report: str = ( + 'Parent report on send values with\n' + f'ipc_pld_spec: {ipc_pld_spec}\n' + ' ------ - ------\n' + ) + for val_type_str, val, expect_send in iter_maybe_sends( + send_items, + ipc_pld_spec, + add_codec_hooks=add_codec_hooks, + ): + report += ( + f'send_value: {val}: {type(val)} ' + f'=> expect_send: {expect_send}\n' + ) + expect_ipc_send[val_type_str] = (val, expect_send) + + print( + report + + ' ------ - ------\n' + ) + assert len(expect_ipc_send) == len(send_items) + # now try over real IPC with a the subactor + # expect_ipc_rountrip: bool = True + expected_started = Started( + cid='cid', + pld=str(ipc_pld_spec), + ) + # build list of values we expect to receive from + # the subactor. + expect_to_send: list[Any] = [ + val + for val, expect_send in expect_ipc_send.values() + if expect_send + ] + + pld_spec_type_strs: list[str] = enc_type_union(ipc_pld_spec) + + # TODO: send the original nsp here and + # test with `limit_msg_spec()` above? + # await tractor.pause() + print('PARENT opening IPC ctx!\n') async with ( + p.open_context( send_back_nsp, - # TODO: send the original nsp here and - # test with `limit_msg_spec()` above? expect_debug=debug_mode, - use_any_spec=(ipc_pld_spec==Any), - + pld_spec_type_strs=pld_spec_type_strs, + add_hooks=add_codec_hooks, + started_msg_bytes=nsp_codec.encode(expected_started), + expect_ipc_send=expect_ipc_send, ) as (ctx, first), + ctx.open_stream() as ipc, ): - if ipc_pld_spec is NamespacePath: - assert isinstance(first, NamespacePath) - + # ensure codec is still applied across + # `tractor.Context` + its embedded nursery. + chk_codec_applied( + expect_codec=nsp_codec, + enter_value=codec, + ) print( 'root: ENTERING CONTEXT BLOCK\n' f'type(first): {type(first)}\n' f'first: {first}\n' ) - # ensure codec is still applied across - # `tractor.Context` + its embedded nursery. - chk_codec_applied( - custom_codec=nsp_codec, - enter_value=codec, - ) + expect_to_send.remove(first) - first_nsp = NamespacePath(first) + # TODO: explicit values we expect depending on + # codec config! + # assert first == first_val + # assert first == f'{__name__}:ex_func' - # ensure roundtripping works - wire_bytes: bytes = nsp_codec.encode( - Started( - cid=ctx.cid, - pld=first_nsp + async for next_sent in ipc: + print( + 'Child sent next value\n' + f'{next_sent}: {type(next_sent)}\n' ) - ) - msg: Started = nsp_codec.decode(wire_bytes) - pld = msg.pld - assert pld == first_nsp + expect_to_send.remove(next_sent) - # try a manual decode of the started msg+pld - - # TODO: actually get the decoder loading - # to native once we spec our SCIPP msgspec - # (structurred-conc-inter-proc-protocol) - # implemented as per, - # https://github.com/goodboy/tractor/issues/36 - # - if ipc_pld_spec is NamespacePath: - assert isinstance(first, NamespacePath) - - # `Any`-payload-spec case - else: - assert isinstance(first, str) - assert first == f'{__name__}:ex_func' - - await ipc.send(first) - - with trio.move_on_after(.6): - async for msg in ipc: - print(msg) - - # TODO: as per above - # assert isinstance(msg, NamespacePath) - assert isinstance(msg, str) - await ipc.send(msg) - await trio.sleep(0.1) + # all sent values should have arrived! + assert not expect_to_send await p.cancel_actor() @@ -467,7 +743,7 @@ def chk_pld_type( roundtrip: bool|None = None pld_spec_msg_names: list[str] = [ - td.__name__ for td in types._payload_spec_msgs + td.__name__ for td in _payload_msgs ] for typedef in msg_types: -- 2.34.1 From d33eb1588427b9a6506c8f8a89bd7476f50df444 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 5 Apr 2024 13:59:43 -0400 Subject: [PATCH 023/190] Add buncha notes on `Start` field for "params" Such that the current `kwargs: dict` field can eventually be strictly msg-typed (eventually directly from a `@context` def) using modern typed python's hippest syntactical approach B) Also proto a new `CancelAck(Return)` subtype msg for supporting msg-spec agnostic `Actor.cancel_xx()` method calls in the runtime such that a user can't break cancellation (and thus SC) by dynamically setting a codec that doesn't allow `bool` results (as an eg. in this case). Note that the msg isn't used yet in `._rpc` but that's a comin! --- tractor/msg/types.py | 124 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 113 insertions(+), 11 deletions(-) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index b246cb61..3e7a2d7a 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -45,6 +45,10 @@ from msgspec import ( from tractor.msg import ( pretty_struct, ) +from tractor.log import get_logger + + +log = get_logger('tractor.msgspec') # type variable for the boxed payload field `.pld` PayloadT = TypeVar('PayloadT') @@ -185,7 +189,47 @@ class SpawnSpec( # | Union[DebugLock, DebugLocked, DebugRelease] # ) +# class Params( +# Struct, +# Generic[PayloadT], +# ): +# spec: PayloadT|ParamSpec +# inputs: InputsT|dict[str, Any] + # TODO: for eg. we could stringently check the target + # task-func's type sig and enforce it? + # as an example for an IPTC, + # @tractor.context + # async def send_back_nsp( + # ctx: Context, + # expect_debug: bool, + # pld_spec_str: str, + # add_hooks: bool, + # started_msg_dict: dict, + # ) -> : + + # TODO: figure out which of the `typing` feats we want to + # support: + # - plain ol `ParamSpec`: + # https://docs.python.org/3/library/typing.html#typing.ParamSpec + # - new in 3.12 type parameter lists Bo + # |_ https://docs.python.org/3/reference/compound_stmts.html#type-params + # |_ historical pep 695: https://peps.python.org/pep-0695/ + # |_ full lang spec: https://typing.readthedocs.io/en/latest/spec/ + # |_ on annotation scopes: + # https://docs.python.org/3/reference/executionmodel.html#annotation-scopes + # spec: ParamSpec[ + # expect_debug: bool, + # pld_spec_str: str, + # add_hooks: bool, + # started_msg_dict: dict, + # ] + + +# TODO: possibly sub-type for runtime method requests? +# -[ ] `Runtime(Start)` with a `.ns: str = 'self' or +# we can just enforce any such method as having a strict +# ns for calling funcs, namely the `Actor` instance? class Start( Struct, tag=True, @@ -212,9 +256,45 @@ class Start( ns: str func: str - kwargs: dict + # TODO: make this a sub-struct which can be further + # type-limited, maybe `Inputs`? + # => SEE ABOVE <= + kwargs: dict[str, Any] uid: tuple[str, str] # (calling) actor-id + # TODO: enforcing a msg-spec in terms `Msg.pld` + # parameterizable msgs to be used in the appls IPC dialog. + # + # -[ ] both as part of the `.open_context()` call AND as part of the + # immediate ack-reponse (see similar below) + # we should do spec matching and fail if anything is awry? + # + # -[ ] eventually spec should be generated/parsed from the + # type-annots as # desired in GH issue: + # https://github.com/goodboy/tractor/issues/365 + # + # -[ ] semantics of the mismatch case + # - when caller-callee specs we should raise + # a `MsgTypeError` or `MsgSpecError` or similar? + # + # -[ ] wrapper types for both spec types such that we can easily + # IPC transport them? + # - `TypeSpec: Union[Type]` + # * also a `.__contains__()` for doing `None in + # TypeSpec[None|int]` since rn you need to do it on + # `.__args__` for unions.. + # - `MsgSpec: Union[Type[Msg]] + # + # -[ ] auto-genning this from new (in 3.12) type parameter lists Bo + # |_ https://docs.python.org/3/reference/compound_stmts.html#type-params + # |_ historical pep 695: https://peps.python.org/pep-0695/ + # |_ full lang spec: https://typing.readthedocs.io/en/latest/spec/ + # |_ on annotation scopes: + # https://docs.python.org/3/reference/executionmodel.html#annotation-scopes + # |_ 3.13 will have subscriptable funcs Bo + # https://peps.python.org/pep-0718/ + pld_spec: str = str(Any) + class StartAck( Struct, @@ -235,14 +315,10 @@ class StartAck( 'context', # TODO: the only one eventually? ] - # TODO: as part of the reponse we should report our allowed - # msg spec which should be generated from the type-annots as - # desired in # https://github.com/goodboy/tractor/issues/365 - # When this does not match what the starter/caller side - # expects we of course raise a `TypeError` just like if - # a function had been called using an invalid signature. - # - # msgspec: MsgSpec + # import typing + # eval(str(Any), {}, {'typing': typing}) + # started_spec: str = str(Any) + # return_spec class Started( @@ -290,6 +366,7 @@ class Stop( # pld: UnsetType = UNSET +# TODO: is `Result` or `Out[come]` a better name? class Return( Msg, Generic[PayloadT], @@ -302,6 +379,27 @@ class Return( pld: PayloadT +class CancelAck( + Return, +): + ''' + Deliver the `bool` return-value from a cancellation `Actor` + method scheduled via and prior RPC request. + + - `Actor.cancel()` + `|_.cancel_soon()` + `|_.cancel_rpc_tasks()` + `|_._cancel_task()` + `|_.cancel_server()` + + RPCs to these methods must **always** be able to deliver a result + despite the currently configured IPC msg spec such that graceful + cancellation is always functional in the runtime. + + ''' + pld: bool + + class Error( Struct, tag=True, @@ -530,9 +628,13 @@ def mk_msg_spec( pld_spec: Union[Type] = specs[spec_build_method] runtime_spec: Union[Type] = Union[*ipc_msg_types] - + ipc_spec = pld_spec | runtime_spec + log.runtime( + 'Generating new IPC msg-spec\n' + f'{ipc_spec}\n' + ) return ( - pld_spec | runtime_spec, + ipc_spec, msgtypes_table[spec_build_method] + ipc_msg_types, ) -- 2.34.1 From 99a2e13c91f9f579deeac69671401824fc3ba63f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 5 Apr 2024 14:04:32 -0400 Subject: [PATCH 024/190] Factor boxed-err formatting into new `pformat_boxed_tb()` helper for use elsewhere --- tractor/_exceptions.py | 78 +++++++++++++++++++++++++++++++----------- 1 file changed, 58 insertions(+), 20 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 9c1dc36d..28c61628 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -104,6 +104,57 @@ def get_err_type(type_name: str) -> BaseException|None: return type_ref +def pformat_boxed_tb( + tb_str: str, + fields_str: str|None = None, + field_prefix: str = ' |_', + indent: str = ' '*2 +) -> str: + if ( + fields_str + and + field_prefix + ): + fields: str = textwrap.indent( + fields_str, + # prefix=' '*2, + # prefix=' |_', + prefix=field_prefix, + ) + else: + fields = fields_str or '' + + # body_indent: str = len(field_prefix) * ' ' + body: str = ( + + # orig + # f' |\n' + # f' ------ - ------\n\n' + # f'{tb_str}\n' + # f' ------ - ------\n' + # f' _|\n' + + f'|\n' + f' ------ - ------\n\n' + f'{tb_str}\n' + f' ------ - ------\n' + f'_|\n' + ) + if len(indent): + body: str = textwrap.indent( + body, + # prefix=body_indent, + prefix=indent, + ) + + return ( + fields + + + body + ) + # return body + + # TODO: rename to just `RemoteError`? class RemoteActorError(Exception): ''' @@ -117,7 +168,7 @@ class RemoteActorError(Exception): ''' reprol_fields: list[str] = [ 'src_uid', - 'relay_path', + # 'relay_path', ] def __init__( @@ -249,7 +300,7 @@ class RemoteActorError(Exception): @property def tb_str( self, - indent: str = ' '*3, + indent: str = ' ', ) -> str: if remote_tb := self.msgdata.get('tb_str'): return textwrap.indent( @@ -309,25 +360,12 @@ class RemoteActorError(Exception): fields: str = self._mk_fields_str( _body_fields, ) - fields: str = textwrap.indent( - fields, - # prefix=' '*2, - prefix=' |_', + body: str = pformat_boxed_tb( + tb_str=self.tb_str, + fields_str=fields, + field_prefix=' |_', + indent=' ', # no indent? ) - indent: str = ''*1 - body: str = ( - f'{fields}' - f' |\n' - f' ------ - ------\n\n' - f'{self.tb_str}\n' - f' ------ - ------\n' - f' _|\n' - ) - if indent: - body: str = textwrap.indent( - body, - prefix=indent, - ) return ( f'<{type(self).__name__}(\n' f'{body}' -- 2.34.1 From 0df557d2ddc2da484248c895efb212f3454208f0 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 5 Apr 2024 16:00:32 -0400 Subject: [PATCH 025/190] Make `Context.started()` a type checked IPC send As detailed in the surrounding notes, it's pretty advantageous to always have the child context task ensure the first msg it relays back is msg-type checked against the current spec and thus `MsgCodec`. Implement the check via a simple codec-roundtrip of the `Started` msg such that the `.pld` payload is always validated before transit. This ensures the child will fail early and notify the parent before any streaming takes place (i.e. the "nasty" dialog protocol phase). The main motivation here is to avoid inter-actor task syncing bugs that are hard(er) to recover from and/or such as if an invalid typed msg is sent to the parent, who then ignores it (depending on config), and then the child thinks the parent is in some presumed state while the parent is still thinking a first msg has yet to arrive. Doing the stringent check on the sender side (i.e. the child is sending the "first" application msg via `.started()`) avoids/sidesteps dealing with such syncing/coordinated-state problems by keeping the entire IPC dialog in a "cheap" or "control" style transaction up until a stream is opened. Iow, the parent task's `.open_context()` block entry can't occur until the child side is definitely (as much as is possible with IPC msg type checking) in a correct state spec wise. During any streaming phase in the dialog the msg-type-checking is NOT done for performance (the "nasty" protocol phase) and instead any type errors are relayed back from the receiving side. I'm still unsure whether to take the same approach on the `Return` msg, since at that point erroring early doesn't benefit the parent task if/when a msg-type error occurs? Definitely more to ponder and tinker out here.. Impl notes: - a gotcha with the roundtrip-codec-ed msg is that it often won't match the input `value` bc in the `msgpack` case many native python sequence/collection types will map to a common array type due to the surjection that `msgpack`'s type-sys imposes. - so we can't assert that `started == rt_started` but it may be useful to at least report the diff of the type-reduced payload so that the caller can at least be notified how the input `value` might be better type-casted prior to call, for ex. pre-casting to `list`s. - added a `._strict_started: bool` that could provide the stringent checking if desired in the future. - on any validation error raise our `MsgTypeError` from it. - ALSO change over the lingering `.send_yield()` deprecated meth body to use a `Yield()`. --- tractor/_context.py | 79 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 67 insertions(+), 12 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 02dcac39..b4e207a4 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -32,6 +32,7 @@ from dataclasses import ( ) from functools import partial import inspect +import msgspec from pprint import pformat from typing import ( Any, @@ -60,6 +61,9 @@ from .msg import ( Started, Stop, Yield, + current_codec, + MsgCodec, + pretty_struct, ) from ._ipc import Channel from ._streaming import MsgStream @@ -505,6 +509,8 @@ class Context: _in_overrun: bool = False _allow_overruns: bool = False + # TODO: figure out how we can enforce this without losing our minds.. + _strict_started: bool = False def __str__(self) -> str: ds: str = '=' @@ -727,7 +733,13 @@ class Context: DeprecationWarning, stacklevel=2, ) - await self.chan.send({'yield': data, 'cid': self.cid}) + # await self.chan.send({'yield': data, 'cid': self.cid}) + await self.chan.send( + Yield( + cid=self.cid, + pld=data, + ) + ) async def send_stop(self) -> None: # await pause() @@ -1643,18 +1655,61 @@ class Context: f'called `.started()` twice on context with {self.chan.uid}' ) - # await self.chan.send( - # { - # 'started': value, - # 'cid': self.cid, - # } - # ) - await self.chan.send( - Started( - cid=self.cid, - pld=value, - ) + started = Started( + cid=self.cid, + pld=value, ) + # XXX MEGA NOTE XXX: ONLY on the first msg sent with + # `Context.started()` do we STRINGENTLY roundtrip-check + # the first payload such that the child side can't send an + # incorrect value according to the currently applied + # msg-spec! + # + # HOWEVER, once a stream is opened via + # `Context.open_stream()` then this check is NEVER done on + # `MsgStream.send()` and instead both the parent and child + # sides are expected to relay back msg-type errors when + # decode failures exhibit on `MsgStream.receive()` calls thus + # enabling a so-called (by the holy 0mq lords) + # "cheap-or-nasty pattern" un-protocol design Bo + # + # https://zguide.zeromq.org/docs/chapter7/#The-Cheap-or-Nasty-Pattern + # + codec: MsgCodec = current_codec() + msg_bytes: bytes = codec.encode(started) + try: + # be a "cheap" dialog (see above!) + rt_started = codec.decode(msg_bytes) + if rt_started != started: + + # TODO: break these methods out from the struct subtype? + diff = pretty_struct.Struct.__sub__(rt_started, started) + + complaint: str = ( + 'Started value does not match after codec rountrip?\n\n' + f'{diff}' + ) + # TODO: rn this will pretty much always fail with + # any other sequence type embeded in the + # payload... + if self._strict_started: + raise ValueError(complaint) + else: + log.warning(complaint) + + await self.chan.send(rt_started) + + # raise any msg type error NO MATTER WHAT! + except msgspec.ValidationError as verr: + from tractor._ipc import _raise_msg_type_err + _raise_msg_type_err( + msg=msg_bytes, + codec=codec, + validation_err=verr, + verb_header='Trying to send payload' + # > 'invalid `Started IPC msgs\n' + ) + self._started_called = True async def _drain_overflows( -- 2.34.1 From ffbe471790cdfea0a5e746dfa37c487f54cf7907 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 5 Apr 2024 16:32:15 -0400 Subject: [PATCH 026/190] Expose `MsgTypeError` from pkg --- tractor/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tractor/__init__.py b/tractor/__init__.py index 31f59598..bd9b8610 100644 --- a/tractor/__init__.py +++ b/tractor/__init__.py @@ -45,9 +45,10 @@ from ._state import ( is_root_process as is_root_process, ) from ._exceptions import ( - RemoteActorError as RemoteActorError, - ModuleNotExposed as ModuleNotExposed, ContextCancelled as ContextCancelled, + ModuleNotExposed as ModuleNotExposed, + MsgTypeError as MsgTypeError, + RemoteActorError as RemoteActorError, ) from .devx import ( breakpoint as breakpoint, -- 2.34.1 From e87f688c8dad5c73e5829339e4c99b2aeaa42f1f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 5 Apr 2024 16:34:07 -0400 Subject: [PATCH 027/190] Factor `MsgpackTCPStream` msg-type checks Add both the `.send()` and `.recv()` handling blocks to a common `_raise_msg_type_err()` which includes detailed error msg formatting: - the `.recv()` side case does introspection of the `Msg` fields and attempting to report the exact (field type related) issue - `.send()` side does some boxed-error style tb formatting like `RemoteActorError`. - add a `strict_types: bool` to `.send()` to allow for just warning on bad inputs versus raising, but always raise from any `Encoder` type error. --- tractor/_ipc.py | 174 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 123 insertions(+), 51 deletions(-) diff --git a/tractor/_ipc.py b/tractor/_ipc.py index 6168c77c..9af28e5a 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -54,7 +54,8 @@ from tractor.msg import ( _ctxvar_MsgCodec, _codec, MsgCodec, - types, + types as msgtypes, + pretty_struct, ) log = get_logger(__name__) @@ -72,6 +73,7 @@ def get_stream_addrs(stream: trio.SocketStream) -> tuple: ) +# TODO: this should be our `Union[*msgtypes.__spec__]` now right? MsgType = TypeVar("MsgType") # TODO: consider using a generic def and indexing with our eventual @@ -116,6 +118,73 @@ class MsgTransport(Protocol[MsgType]): ... +def _raise_msg_type_err( + msg: Any|bytes, + codec: MsgCodec, + validation_err: msgspec.ValidationError|None = None, + verb_header: str = '', + +) -> None: + + # if side == 'send': + if validation_err is None: # send-side + + import traceback + from tractor._exceptions import pformat_boxed_tb + + fmt_spec: str = '\n'.join( + map(str, codec.msg_spec.__args__) + ) + fmt_stack: str = ( + '\n'.join(traceback.format_stack(limit=3)) + ) + tb_fmt: str = pformat_boxed_tb( + tb_str=fmt_stack, + # fields_str=header, + field_prefix=' ', + indent='', + ) + raise MsgTypeError( + f'invalid msg -> {msg}: {type(msg)}\n\n' + f'{tb_fmt}\n' + f'Valid IPC msgs are:\n\n' + # f' ------ - ------\n' + f'{fmt_spec}\n' + ) + + else: + # decode the msg-bytes using the std msgpack + # interchange-prot (i.e. without any + # `msgspec.Struct` handling) so that we can + # determine what `.msg.types.Msg` is the culprit + # by reporting the received value. + msg_dict: dict = msgspec.msgpack.decode(msg) + msg_type_name: str = msg_dict['msg_type'] + msg_type = getattr(msgtypes, msg_type_name) + errmsg: str = ( + f'invalid `{msg_type_name}` IPC msg\n\n' + ) + if verb_header: + errmsg = f'{verb_header} ' + errmsg + + # XXX see if we can determine the exact invalid field + # such that we can comprehensively report the + # specific field's type problem + msgspec_msg: str = validation_err.args[0].rstrip('`') + msg, _, maybe_field = msgspec_msg.rpartition('$.') + if field_val := msg_dict.get(maybe_field): + field_type: Union[Type] = msg_type.__signature__.parameters[ + maybe_field + ].annotation + errmsg += ( + f'{msg.rstrip("`")}\n\n' + f'{msg_type}\n' + f' |_.{maybe_field}: {field_type} = {field_val!r}\n' + ) + + raise MsgTypeError(errmsg) from validation_err + + # TODO: not sure why we have to inherit here, but it seems to be an # issue with ``get_msg_transport()`` returning a ``Type[Protocol]``; # probably should make a `mypy` issue? @@ -175,9 +244,10 @@ class MsgpackTCPStream(MsgTransport): or _codec._ctxvar_MsgCodec.get() ) - log.critical( - '!?!: USING STD `tractor` CODEC !?!?\n' - f'{self._codec}\n' + # TODO: mask out before release? + log.runtime( + f'New {self} created with codec\n' + f'codec: {self._codec}\n' ) async def _iter_packets(self) -> AsyncGenerator[dict, None]: @@ -221,16 +291,18 @@ class MsgpackTCPStream(MsgTransport): # NOTE: lookup the `trio.Task.context`'s var for # the current `MsgCodec`. codec: MsgCodec = _ctxvar_MsgCodec.get() + + # TODO: mask out before release? if self._codec.pld_spec != codec.pld_spec: # assert ( # task := trio.lowlevel.current_task() # ) is not self._task # self._task = task self._codec = codec - log.critical( - '.recv() USING NEW CODEC !?!?\n' - f'{self._codec}\n\n' - f'msg_bytes -> {msg_bytes}\n' + log.runtime( + 'Using new codec in {self}.recv()\n' + f'codec: {self._codec}\n\n' + f'msg_bytes: {msg_bytes}\n' ) yield codec.decode(msg_bytes) @@ -252,36 +324,13 @@ class MsgpackTCPStream(MsgTransport): # and always raise such that spec violations # are never allowed to be caught silently! except msgspec.ValidationError as verr: - - # decode the msg-bytes using the std msgpack - # interchange-prot (i.e. without any - # `msgspec.Struct` handling) so that we can - # determine what `.msg.types.Msg` is the culprit - # by reporting the received value. - msg_dict: dict = msgspec.msgpack.decode(msg_bytes) - msg_type_name: str = msg_dict['msg_type'] - msg_type = getattr(types, msg_type_name) - errmsg: str = ( - f'Received invalid IPC `{msg_type_name}` msg\n\n' + # re-raise as type error + _raise_msg_type_err( + msg=msg_bytes, + codec=codec, + validation_err=verr, ) - # XXX see if we can determine the exact invalid field - # such that we can comprehensively report the - # specific field's type problem - msgspec_msg: str = verr.args[0].rstrip('`') - msg, _, maybe_field = msgspec_msg.rpartition('$.') - if field_val := msg_dict.get(maybe_field): - field_type: Union[Type] = msg_type.__signature__.parameters[ - maybe_field - ].annotation - errmsg += ( - f'{msg.rstrip("`")}\n\n' - f'{msg_type}\n' - f' |_.{maybe_field}: {field_type} = {field_val}\n' - ) - - raise MsgTypeError(errmsg) from verr - except ( msgspec.DecodeError, UnicodeDecodeError, @@ -307,12 +356,16 @@ class MsgpackTCPStream(MsgTransport): async def send( self, - msg: Any, + msg: msgtypes.Msg, + strict_types: bool = True, # hide_tb: bool = False, ) -> None: ''' - Send a msgpack coded blob-as-msg over TCP. + Send a msgpack encoded py-object-blob-as-msg over TCP. + + If `strict_types == True` then a `MsgTypeError` will be raised on any + invalid msg type ''' # __tracebackhide__: bool = hide_tb @@ -321,25 +374,40 @@ class MsgpackTCPStream(MsgTransport): # NOTE: lookup the `trio.Task.context`'s var for # the current `MsgCodec`. codec: MsgCodec = _ctxvar_MsgCodec.get() - # if self._codec != codec: + + # TODO: mask out before release? if self._codec.pld_spec != codec.pld_spec: self._codec = codec - log.critical( - '.send() using NEW CODEC !?!?\n' - f'{self._codec}\n\n' - f'OBJ -> {msg}\n' + log.runtime( + 'Using new codec in {self}.send()\n' + f'codec: {self._codec}\n\n' + f'msg: {msg}\n' ) - if type(msg) not in types.__spec__: - log.warning( - 'Sending non-`Msg`-spec msg?\n\n' - f'{msg}\n' - ) - bytes_data: bytes = codec.encode(msg) + + if type(msg) not in msgtypes.__msg_types__: + if strict_types: + _raise_msg_type_err( + msg, + codec=codec, + ) + else: + log.warning( + 'Sending non-`Msg`-spec msg?\n\n' + f'{msg}\n' + ) + + try: + bytes_data: bytes = codec.encode(msg) + except TypeError as typerr: + raise MsgTypeError( + 'A msg field violates the current spec\n' + f'{codec.pld_spec}\n\n' + f'{pretty_struct.Struct.pformat(msg)}' + ) from typerr # supposedly the fastest says, # https://stackoverflow.com/a/54027962 size: bytes = struct.pack(" seems like that might be re-inventing scalability + # prots tho no? # try: # return await self._transport.recv() # except trio.BrokenResourceError: -- 2.34.1 From 939f198dd933635643649a828294857322180c60 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 5 Apr 2024 19:07:12 -0400 Subject: [PATCH 028/190] Drop `None`-sentinel cancels RPC loop mechanism Pretty sure we haven't *needed it* for a while, it was always generally hazardous in terms of IPC msg types, AND it's definitely incompatible with a dynamically applied typed msg spec: you can't just expect a `None` to be willy nilly handled all the time XD For now I'm masking out all the code and leaving very detailed surrounding notes but am not removing it quite yet in case for strange reason it is needed by some edge case (though I haven't found according to the test suite). Backstory: ------ - ------ Originally (i'm pretty sure anyway) it was added as a super naive "remote cancellation" mechanism (back before there were specific `Actor` methods for such things) that was mostly (only?) used before IPC `Channel` closures to "more gracefully cancel" the connection's parented RPC tasks. Since we now have explicit runtime-RPC endpoints for conducting remote cancellation of both tasks and full actors, it should really be removed anyway, because: - a `None`-msg setinel is inconsistent with other RPC endpoint handling input patterns which (even prior to typed msging) had specific msg-value triggers. - the IPC endpoint's (block) implementation should use `Actor.cancel_rpc_tasks(parent_chan=chan)` instead of a manual loop through a `Actor._rpc_tasks.copy()`.. Deats: - mask the `Channel.send(None)` calls from both the `Actor._stream_handler()` tail as well as from the `._portal.open_portal()` was connected block. - mask the msg loop endpoint block and toss in lotsa notes. Unrelated tweaks: - drop `Actor._debug_mode`; unused. - make `Actor.cancel_server()` return a `bool`. - use `.msg.pretty_struct.Struct.pformat()` to show any msg that is ignored (bc invalid) in `._push_result()`. --- tractor/_portal.py | 6 ++-- tractor/_rpc.py | 70 +++++++++++++++++++++++------------- tractor/_runtime.py | 88 ++++++++++++++++++++++++++------------------- 3 files changed, 100 insertions(+), 64 deletions(-) diff --git a/tractor/_portal.py b/tractor/_portal.py index cc9052ba..957eae59 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -502,7 +502,7 @@ async def open_portal( ''' actor = current_actor() assert actor - was_connected = False + was_connected: bool = False async with maybe_open_nursery(nursery, shield=shield) as nursery: @@ -533,9 +533,7 @@ async def open_portal( await portal.aclose() if was_connected: - # gracefully signal remote channel-msg loop - await channel.send(None) - # await channel.aclose() + await channel.aclose() # cancel background msg loop task if msg_loop_cs: diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 9b179524..a765d666 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -424,8 +424,6 @@ async def _invoke( # XXX for .pause_from_sync()` usage we need to make sure # `greenback` is boostrapped in the subactor! await _debug.maybe_init_greenback() - # else: - # await pause() # TODO: possibly a specially formatted traceback # (not sure what typing is for this..)? @@ -850,30 +848,54 @@ async def process_messages( match msg: + # NOTE: this *was a dedicated + # "graceful-terminate-loop" mechanism using + # a `None`-msg-sentinel which would cancel all RPC + # tasks parented by this loop's IPC channel; that + # is all rpc-scheduled-tasks started over the + # connection were explicitly per-task cancelled + # normally prior to the `Channel`'s underlying + # transport being later closed. + # + # * all `.send(None)`s were # removed as part of + # typed-msging requirements + # + # TODO: if this mechanism is still desired going + # forward it should be implemented as part of the + # normal runtime-cancel-RPC endpoints with either, + # - a special `msg.types.Msg` to trigger the loop endpoint + # (like `None` was used prior) or, + # - it should just be accomplished using A + # `Start(ns='self', func='cancel_rpc_tasks())` + # request instead? + # # if msg is None: - # dedicated loop terminate sentinel - case None: + # case None: + # tasks: dict[ + # tuple[Channel, str], + # tuple[Context, Callable, trio.Event] + # ] = actor._rpc_tasks.copy() + # log.cancel( + # f'Peer IPC channel terminated via `None` setinel msg?\n' + # f'=> Cancelling all {len(tasks)} local RPC tasks..\n' + # f'peer: {chan.uid}\n' + # f'|_{chan}\n' + # ) + # # TODO: why aren't we just calling + # # `.cancel_rpc_tasks()` with the parent + # # chan as input instead? + # for (channel, cid) in tasks: + # if channel is chan: + # await actor._cancel_task( + # cid, + # channel, + # requesting_uid=channel.uid, - tasks: dict[ - tuple[Channel, str], - tuple[Context, Callable, trio.Event] - ] = actor._rpc_tasks.copy() - log.cancel( - f'Peer IPC channel terminated via `None` setinel msg?\n' - f'=> Cancelling all {len(tasks)} local RPC tasks..\n' - f'peer: {chan.uid}\n' - f'|_{chan}\n' - ) - for (channel, cid) in tasks: - if channel is chan: - await actor._cancel_task( - cid, - channel, - requesting_uid=channel.uid, + # ipc_msg=msg, + # ) - ipc_msg=msg, - ) - break + # # immediately break out of this loop! + # break # cid = msg.get('cid') # if cid: @@ -911,7 +933,7 @@ async def process_messages( cid=cid, ns=ns, func=funcname, - kwargs=kwargs, + kwargs=kwargs, # type-spec this? see `msg.types` uid=actorid, ): # try: diff --git a/tractor/_runtime.py b/tractor/_runtime.py index eee78973..66a1db62 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -65,7 +65,11 @@ from trio import ( TaskStatus, ) -from .msg import NamespacePath +from tractor.msg import ( + pretty_struct, + NamespacePath, + types as msgtypes, +) from ._ipc import Channel from ._context import ( mk_context, @@ -87,10 +91,6 @@ from ._rpc import ( process_messages, try_ship_error_to_remote, ) -from tractor.msg import ( - types as msgtypes, - pretty_struct, -) # from tractor.msg.types import ( # Aid, # SpawnSpec, @@ -160,18 +160,15 @@ class Actor: # Information about `__main__` from parent _parent_main_data: dict[str, str] _parent_chan_cs: CancelScope|None = None - _spawn_spec: SpawnSpec|None = None + _spawn_spec: msgtypes.SpawnSpec|None = None # syncs for setup/teardown sequences _server_down: trio.Event|None = None - # user toggled crash handling (including monkey-patched in - # `trio.open_nursery()` via `.trionics._supervisor` B) - _debug_mode: bool = False - # if started on ``asycio`` running ``trio`` in guest mode _infected_aio: bool = False + # TODO: nursery tracking like `trio` does? # _ans: dict[ # tuple[str, str], # list[ActorNursery], @@ -718,35 +715,50 @@ class Actor: # TODO: figure out why this breaks tests.. db_cs.cancel() - # XXX: is this necessary (GC should do it)? + # XXX TODO XXX: DO WE NEED THIS? + # -[ ] is it necessary any more (GC should do it) now + # that we have strict(er) graceful cancellation + # semantics? # XXX WARNING XXX # Be AWARE OF THE INDENT LEVEL HERE # -> ONLY ENTER THIS BLOCK WHEN ._peers IS # EMPTY!!!! - if ( - not self._peers - and chan.connected() - ): - # if the channel is still connected it may mean the far - # end has not closed and we may have gotten here due to - # an error and so we should at least try to terminate - # the channel from this end gracefully. - log.runtime( - 'Terminating channel with `None` setinel msg\n' - f'|_{chan}\n' - ) - try: - # send msg loop terminate sentinel which - # triggers cancellation of all remotely - # started tasks. - await chan.send(None) + # + # if the channel is still connected it may mean the far + # end has not closed and we may have gotten here due to + # an error and so we should at least try to terminate + # the channel from this end gracefully. + #if ( + # not self._peers + # and chan.connected() + #): + # log.runtime( + # 'Terminating channel with `None` setinel msg\n' + # f'|_{chan}\n' + # ) + # try: + # # ORIGINALLY we sent a msg loop terminate + # # sentinel (`None`) which triggers + # # cancellation of all remotely started + # # tasks. + # # + # # HOWEVER, after we added typed msging, + # # you can't just willy nilly send `None` + # # wherever since it might be invalid given + # # the currently configured msg-spec. + # # + # # SO, this was all removed and I'm pretty + # # confident we don't need it replaced with + # # a manual RPC to + # # a `Actor.cancel_rpc_tasks()` right? + # await chan.send(None) - # XXX: do we want this? no right? - # causes "[104] connection reset by peer" on other end - # await chan.aclose() + # # XXX: do we want this? NO RIGHT? + # # causes "[104] connection reset by peer" on other end + # # await chan.aclose() - except trio.BrokenResourceError: - log.runtime(f"Channel {chan.uid} was already closed") + # except trio.BrokenResourceError: + # log.runtime(f"Channel {chan.uid} was already closed") # TODO: rename to `._deliver_payload()` since this handles # more then just `result` msgs now obvi XD @@ -776,9 +788,10 @@ class Actor: log.warning( 'Ignoring invalid IPC ctx msg!\n\n' f'<= sender: {uid}\n' - f'=> cid: {cid}\n\n' + # XXX don't need right since it's always in msg? + # f'=> cid: {cid}\n\n' - f'{msg}\n' + f'{pretty_struct.Struct.pformat(msg)}\n' ) return @@ -1439,7 +1452,7 @@ class Actor: ) await self._ongoing_rpc_tasks.wait() - def cancel_server(self) -> None: + def cancel_server(self) -> bool: ''' Cancel the internal IPC transport server nursery thereby preventing any new inbound IPC connections establishing. @@ -1448,6 +1461,9 @@ class Actor: if self._server_n: log.runtime("Shutting down channel server") self._server_n.cancel_scope.cancel() + return True + + return False @property def accept_addrs(self) -> list[tuple[str, int]]: -- 2.34.1 From 7b92d2b1cb4605ca4e853d820992de31218ec14c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 7 Apr 2024 10:40:01 -0400 Subject: [PATCH 029/190] Flatten out RPC loop with `match:`/`case:` Mainly expanding out the runtime endpoints for cancellation to separate cases and flattening them with the main RPC-request-invoke block, moving the non-cancel runtime case (where we call `getattr(actor, funcname)`) inside the main `Start` case (for now) which branches on `ns=="self"`. Also, add a new IPC msg `class CancelAck(Return):` which is always included in the default msg-spec such that runtime cancellation (and eventually all) endpoints return that msg (instead of a `Return`) and thus sidestep any currently applied `MsgCodec` such that the results (`bool`s for most cancel methods) are never violating the current type limit(s) on `Msg.pld`. To support this expose a new variable `return_msg: Return|CancelAck` param from `_invoke()`/`_invoke_non_context)()` and set it to `CancelAck` in the appropriate endpoint case-blocks of the msg loop. Clean out all the lingering legacy `chan.send()` commented codez from the invoker funcs, with more cleaning likely to come B) --- tractor/_rpc.py | 308 ++++++++++++++++--------------------------- tractor/msg/types.py | 4 + 2 files changed, 119 insertions(+), 193 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index a765d666..b7638335 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -58,13 +58,15 @@ from .devx import _debug from . import _state from .log import get_logger from tractor.msg.types import ( + CancelAck, + Error, + Msg, + Return, Start, StartAck, Started, Stop, Yield, - Return, - Error, ) if TYPE_CHECKING: @@ -85,6 +87,7 @@ async def _invoke_non_context( treat_as_gen: bool, is_rpc: bool, + return_msg: Return|CancelAck = Return, task_status: TaskStatus[ Context | BaseException @@ -93,7 +96,6 @@ async def _invoke_non_context( # TODO: can we unify this with the `context=True` impl below? if inspect.isasyncgen(coro): - # await chan.send({ await chan.send( StartAck( cid=cid, @@ -119,11 +121,6 @@ async def _invoke_non_context( # to_send = await chan.recv_nowait() # if to_send is not None: # to_yield = await coro.asend(to_send) - # await chan.send({ - # # Yield() - # 'cid': cid, - # 'yield': item, - # }) await chan.send( Yield( cid=cid, @@ -138,11 +135,6 @@ async def _invoke_non_context( await chan.send( Stop(cid=cid) ) - # await chan.send({ - # # Stop( - # 'cid': cid, - # 'stop': True, - # }) # one way @stream func that gets treated like an async gen # TODO: can we unify this with the `context=True` impl below? @@ -153,11 +145,6 @@ async def _invoke_non_context( functype='asyncgen', ) ) - # await chan.send({ - # # StartAck() - # 'cid': cid, - # 'functype': 'asyncgen', - # }) # XXX: the async-func may spawn further tasks which push # back values like an async-generator would but must # manualy construct the response dict-packet-responses as @@ -173,11 +160,6 @@ async def _invoke_non_context( await chan.send( Stop(cid=cid) ) - # await chan.send({ - # # Stop( - # 'cid': cid, - # 'stop': True, - # }) else: # regular async function/method # XXX: possibly just a scheduled `Actor._cancel_task()` @@ -195,11 +177,6 @@ async def _invoke_non_context( functype='asyncfunc', ) ) - # await chan.send({ - # # StartAck() - # 'cid': cid, - # 'functype': 'asyncfunc', - # }) except ( trio.ClosedResourceError, trio.BrokenResourceError, @@ -233,13 +210,8 @@ async def _invoke_non_context( and chan.connected() ): try: - # await chan.send({ - # # Return() - # 'cid': cid, - # 'return': result, - # }) await chan.send( - Return( + return_msg( cid=cid, pld=result, ) @@ -404,6 +376,7 @@ async def _invoke( is_rpc: bool = True, hide_tb: bool = True, + return_msg: Return|CancelAck = Return, task_status: TaskStatus[ Context | BaseException @@ -513,6 +486,7 @@ async def _invoke( kwargs, treat_as_gen, is_rpc, + return_msg, task_status, ) # below is only for `@context` funcs @@ -543,11 +517,6 @@ async def _invoke( functype='context', ) ) - # await chan.send({ - # # StartAck() - # 'cid': cid, - # 'functype': 'context', - # }) # TODO: should we also use an `.open_context()` equiv # for this callee side by factoring the impl from @@ -572,16 +541,11 @@ async def _invoke( # deliver final result to caller side. await chan.send( - Return( + return_msg( cid=cid, pld=res, ) ) - # await chan.send({ - # # Return() - # 'cid': cid, - # 'return': res, - # }) # NOTE: this happens IFF `ctx._scope.cancel()` is # called by any of, @@ -670,7 +634,6 @@ async def _invoke( ctxc = ContextCancelled( msg, boxed_type=trio.Cancelled, - # boxed_type_str='Cancelled', canceller=canceller, ) # assign local error so that the `.outcome` @@ -771,12 +734,12 @@ async def try_ship_error_to_remote( trio.BrokenResourceError, BrokenPipeError, ): - # err_msg: dict = msg['error']['tb_str'] log.critical( 'IPC transport failure -> ' f'failed to ship error to {remote_descr}!\n\n' f'X=> {channel.uid}\n\n' - # f'{err_msg}\n' + + # TODO: use `.msg.preetty_struct` for this! f'{msg}\n' ) @@ -818,6 +781,8 @@ async def process_messages( ''' + assert actor._service_n # state sanity + # TODO: once `trio` get's an "obvious way" for req/resp we # should use it? # https://github.com/python-trio/trio/issues/467 @@ -827,7 +792,7 @@ async def process_messages( f'|_{chan}\n' ) nursery_cancelled_before_task: bool = False - msg: dict | None = None + msg: Msg|None = None try: # NOTE: this internal scope allows for keeping this # message loop running despite the current task having @@ -836,6 +801,7 @@ async def process_messages( # using ``scope = Nursery.start()`` with CancelScope(shield=shield) as loop_cs: task_status.started(loop_cs) + async for msg in chan: log.transport( # type: ignore f'<= IPC msg from peer: {chan.uid}\n\n' @@ -890,21 +856,18 @@ async def process_messages( # cid, # channel, # requesting_uid=channel.uid, - # ipc_msg=msg, # ) - # # immediately break out of this loop! # break - # cid = msg.get('cid') - # if cid: case ( StartAck(cid=cid) | Started(cid=cid) | Yield(cid=cid) | Stop(cid=cid) | Return(cid=cid) + | CancelAck(cid=cid) | Error(cid=cid) ): # deliver response to local caller/waiter @@ -914,17 +877,85 @@ async def process_messages( cid, msg, ) + # TODO: can remove right? + # continue + + # runtime-internal cancellation endpoints + case Start( + ns='self', + func='cancel', + cid=cid, + kwargs=kwargs, + ): + kwargs |= {'req_chan': chan} + + # XXX NOTE XXX don't start entire actor + # runtime cancellation if this actor is + # currently in debug mode! + pdb_complete: trio.Event|None = _debug.Lock.local_pdb_complete + if pdb_complete: + await pdb_complete.wait() + + # Either of `Actor.cancel()`/`.cancel_soon()` + # was called, so terminate this IPC msg + # loop, exit back out into `async_main()`, + # and immediately start the core runtime + # machinery shutdown! + with CancelScope(shield=True): + await _invoke( + actor, + cid, + chan, + actor.cancel, + kwargs, + is_rpc=False, + return_msg=CancelAck, + ) log.runtime( - 'Waiting on next IPC msg from\n' - f'peer: {chan.uid}:\n' + 'Cancelling IPC transport msg-loop with peer:\n' f'|_{chan}\n' - - # f'last msg: {msg}\n' ) - continue + loop_cs.cancel() + break - # process a 'cmd' request-msg upack + case Start( + ns='self', + func='_cancel_task', + cid=cid, + kwargs=kwargs, + ): + target_cid: str = kwargs['cid'] + kwargs |= { + 'requesting_uid': chan.uid, + 'ipc_msg': msg, + + # XXX NOTE! ONLY the rpc-task-owning + # parent IPC channel should be able to + # cancel it! + 'parent_chan': chan, + } + try: + await _invoke( + actor, + cid, + chan, + actor._cancel_task, + kwargs, + is_rpc=False, + return_msg=CancelAck, + ) + except BaseException: + log.exception( + 'Failed to cancel task?\n' + f'<= canceller: {chan.uid}\n' + f' |_{chan}\n\n' + f'=> {actor}\n' + f' |_cid: {target_cid}\n' + ) + + # the "MAIN" RPC endpoint to schedule-a-`trio.Task` + # # TODO: impl with native `msgspec.Struct` support !! # -[ ] implement with ``match:`` syntax? # -[ ] discard un-authed msgs as per, @@ -936,139 +967,29 @@ async def process_messages( kwargs=kwargs, # type-spec this? see `msg.types` uid=actorid, ): - # try: - # ( - # ns, - # funcname, - # kwargs, - # actorid, - # cid, - # ) = msg['cmd'] - - # # TODO: put in `case Error():` right? - # except KeyError: - # # This is the non-rpc error case, that is, an - # # error **not** raised inside a call to ``_invoke()`` - # # (i.e. no cid was provided in the msg - see above). - # # Push this error to all local channel consumers - # # (normally portals) by marking the channel as errored - # assert chan.uid - # exc = unpack_error(msg, chan=chan) - # chan._exc = exc - # raise exc - log.runtime( 'Handling RPC `Start` request from\n' f'peer: {actorid}\n' '\n' f'=> {ns}.{funcname}({kwargs})\n' ) - # case Start( - # ns='self', - # funcname='cancel', - # ): + + # runtime-internal endpoint: `Actor.` + # only registry methods exist now yah, + # like ``.register_actor()`` etc. ? if ns == 'self': - if funcname == 'cancel': - func: Callable = actor.cancel - kwargs |= { - 'req_chan': chan, - } + func: Callable = getattr(actor, funcname) - # don't start entire actor runtime cancellation - # if this actor is currently in debug mode! - pdb_complete: trio.Event|None = _debug.Lock.local_pdb_complete - if pdb_complete: - await pdb_complete.wait() - - # Either of `Actor.cancel()`/`.cancel_soon()` - # was called, so terminate this IPC msg - # loop, exit back out into `async_main()`, - # and immediately start the core runtime - # machinery shutdown! - with CancelScope(shield=True): - await _invoke( - actor, - cid, - chan, - func, - kwargs, - is_rpc=False, - ) - - log.runtime( - 'Cancelling IPC transport msg-loop with peer:\n' - f'|_{chan}\n' - ) - loop_cs.cancel() - break - - # case Start( - # ns='self', - # funcname='_cancel_task', - # ): - if funcname == '_cancel_task': - func: Callable = actor._cancel_task - - # we immediately start the runtime machinery - # shutdown - # with CancelScope(shield=True): - target_cid: str = kwargs['cid'] - kwargs |= { - # NOTE: ONLY the rpc-task-owning - # parent IPC channel should be able to - # cancel it! - 'parent_chan': chan, - 'requesting_uid': chan.uid, - 'ipc_msg': msg, - } - # TODO: remove? already have emit in meth. - # log.runtime( - # f'Rx RPC task cancel request\n' - # f'<= canceller: {chan.uid}\n' - # f' |_{chan}\n\n' - # f'=> {actor}\n' - # f' |_cid: {target_cid}\n' - # ) - try: - await _invoke( - actor, - cid, - chan, - func, - kwargs, - is_rpc=False, - ) - except BaseException: - log.exception( - 'Failed to cancel task?\n' - f'<= canceller: {chan.uid}\n' - f' |_{chan}\n\n' - f'=> {actor}\n' - f' |_cid: {target_cid}\n' - ) - continue - - # case Start( - # ns='self', - # funcname='register_actor', - # ): - else: - # normally registry methods, eg. - # ``.register_actor()`` etc. - func: Callable = getattr(actor, funcname) - - # case Start( - # ns=str(), - # funcname=funcname, - # ): + # application RPC endpoint else: - # complain to client about restricted modules try: - func = actor._get_rpc_func(ns, funcname) + func: Callable = actor._get_rpc_func(ns, funcname) except ( ModuleNotExposed, AttributeError, ) as err: + # always complain to requester + # client about un-enabled modules err_msg: dict[str, dict] = pack_error( err, cid=cid, @@ -1078,6 +999,7 @@ async def process_messages( # schedule a task for the requested RPC function # in the actor's main "service nursery". + # # TODO: possibly a service-tn per IPC channel for # supervision isolation? would avoid having to # manage RPC tasks individually in `._rpc_tasks` @@ -1086,7 +1008,7 @@ async def process_messages( f'Spawning task for RPC request\n' f'<= caller: {chan.uid}\n' f' |_{chan}\n\n' - # TODO: maddr style repr? + # ^-TODO-^ maddr style repr? # f' |_@ /ipv4/{chan.raddr}/tcp/{chan.rport}/' # f'cid="{cid[-16:]} .."\n\n' @@ -1094,7 +1016,6 @@ async def process_messages( f' |_cid: {cid}\n' f' |>> {func}()\n' ) - assert actor._service_n # wait why? do it at top? try: ctx: Context = await actor._service_n.start( partial( @@ -1124,13 +1045,12 @@ async def process_messages( log.warning( 'Task for RPC failed?' f'|_ {func}()\n\n' - f'{err}' ) continue else: - # mark that we have ongoing rpc tasks + # mark our global state with ongoing rpc tasks actor._ongoing_rpc_tasks = trio.Event() # store cancel scope such that the rpc task can be @@ -1141,23 +1061,24 @@ async def process_messages( trio.Event(), ) - case Error()|_: - # This is the non-rpc error case, that is, an - # error **not** raised inside a call to ``_invoke()`` - # (i.e. no cid was provided in the msg - see above). - # Push this error to all local channel consumers - # (normally portals) by marking the channel as errored + case Error() | _: + # NOTE: this is the non-rpc error case, + # that is, an error **not** raised inside + # a call to ``_invoke()`` (i.e. no cid was + # provided in the msg - see above). Push + # this error to all local channel + # consumers (normally portals) by marking + # the channel as errored log.exception( f'Unhandled IPC msg:\n\n' f'{msg}\n' ) - assert chan.uid - exc = unpack_error( + # assert chan.uid + chan._exc: Exception = unpack_error( msg, chan=chan, ) - chan._exc = exc - raise exc + raise chan._exc log.runtime( 'Waiting on next IPC msg from\n' @@ -1168,7 +1089,8 @@ async def process_messages( # end of async for, channel disconnect vis # ``trio.EndOfChannel`` log.runtime( - f"{chan} for {chan.uid} disconnected, cancelling tasks" + f'channel for {chan.uid} disconnected, cancelling RPC tasks\n' + f'|_{chan}\n' ) await actor.cancel_rpc_tasks( req_uid=actor.uid, diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 3e7a2d7a..7355a610 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -454,6 +454,10 @@ _runtime_msgs: list[Msg] = [ # emission from `MsgStream.aclose()` Stop, + # `Return` sub-type that we always accept from + # runtime-internal cancel endpoints + CancelAck, + # box remote errors, normally subtypes # of `RemoteActorError`. Error, -- 2.34.1 From 7686dd7a15089bf830aa0539eb3bf1fd0ab1c2f3 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 7 Apr 2024 16:29:21 -0400 Subject: [PATCH 030/190] Use `object()` when checking for error field value Since the field value could be `None` or some other type with truthy-ness evaluating to `False`.. --- tractor/_ipc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tractor/_ipc.py b/tractor/_ipc.py index 9af28e5a..694eaf9e 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -172,7 +172,8 @@ def _raise_msg_type_err( # specific field's type problem msgspec_msg: str = validation_err.args[0].rstrip('`') msg, _, maybe_field = msgspec_msg.rpartition('$.') - if field_val := msg_dict.get(maybe_field): + obj = object() + if (field_val := msg_dict.get(maybe_field, obj)) is not obj: field_type: Union[Type] = msg_type.__signature__.parameters[ maybe_field ].annotation -- 2.34.1 From 3bccdf6de465904d20d14e1cb22bb06eb9ce9c31 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 7 Apr 2024 16:35:00 -0400 Subject: [PATCH 031/190] Detail out EoC-by-self log msg --- tractor/_streaming.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tractor/_streaming.py b/tractor/_streaming.py index 941cfe8d..dc30ac6e 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -46,7 +46,6 @@ from .trionics import ( from tractor.msg import ( Stop, Yield, - Error, ) if TYPE_CHECKING: @@ -391,11 +390,11 @@ class MsgStream(trio.abc.Channel): if not self._eoc: log.cancel( - 'Stream closed before it received an EoC?\n' + 'Stream closed by self before it received an EoC?\n' 'Setting eoc manually..\n..' ) self._eoc: bool = trio.EndOfChannel( - f'Context stream closed by {self._ctx.side}\n' + f'Context stream closed by self({self._ctx.side})\n' f'|_{self}\n' ) # ?XXX WAIT, why do we not close the local mem chan `._rx_chan` XXX? -- 2.34.1 From 8ac9ccf65d2d811c9dd2e94580b16b6a5beb799b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 7 Apr 2024 18:54:03 -0400 Subject: [PATCH 032/190] Finally drop masked `chan.send(None)` related code blocks --- tractor/_rpc.py | 89 ++++++++++++--------------------------------- tractor/_runtime.py | 48 +----------------------- 2 files changed, 25 insertions(+), 112 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index b7638335..de76e3cf 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -806,61 +806,15 @@ async def process_messages( log.transport( # type: ignore f'<= IPC msg from peer: {chan.uid}\n\n' - # TODO: conditionally avoid fmting depending - # on log level (for perf)? - # => specifically `pformat()` sub-call..? + # TODO: avoid fmting depending on loglevel for perf? + # -[ ] specifically `pformat()` sub-call..? + # -[ ] use `.msg.pretty_struct` here now instead! f'{pformat(msg)}\n' ) match msg: - - # NOTE: this *was a dedicated - # "graceful-terminate-loop" mechanism using - # a `None`-msg-sentinel which would cancel all RPC - # tasks parented by this loop's IPC channel; that - # is all rpc-scheduled-tasks started over the - # connection were explicitly per-task cancelled - # normally prior to the `Channel`'s underlying - # transport being later closed. - # - # * all `.send(None)`s were # removed as part of - # typed-msging requirements - # - # TODO: if this mechanism is still desired going - # forward it should be implemented as part of the - # normal runtime-cancel-RPC endpoints with either, - # - a special `msg.types.Msg` to trigger the loop endpoint - # (like `None` was used prior) or, - # - it should just be accomplished using A - # `Start(ns='self', func='cancel_rpc_tasks())` - # request instead? - # - # if msg is None: - # case None: - # tasks: dict[ - # tuple[Channel, str], - # tuple[Context, Callable, trio.Event] - # ] = actor._rpc_tasks.copy() - # log.cancel( - # f'Peer IPC channel terminated via `None` setinel msg?\n' - # f'=> Cancelling all {len(tasks)} local RPC tasks..\n' - # f'peer: {chan.uid}\n' - # f'|_{chan}\n' - # ) - # # TODO: why aren't we just calling - # # `.cancel_rpc_tasks()` with the parent - # # chan as input instead? - # for (channel, cid) in tasks: - # if channel is chan: - # await actor._cancel_task( - # cid, - # channel, - # requesting_uid=channel.uid, - # ipc_msg=msg, - # ) - # # immediately break out of this loop! - # break - + # msg for an ongoing IPC ctx session, deliver msg to + # local task. case ( StartAck(cid=cid) | Started(cid=cid) @@ -868,7 +822,7 @@ async def process_messages( | Stop(cid=cid) | Return(cid=cid) | CancelAck(cid=cid) - | Error(cid=cid) + | Error(cid=cid) # RPC-task ctx specific ): # deliver response to local caller/waiter # via its per-remote-context memory channel. @@ -877,10 +831,8 @@ async def process_messages( cid, msg, ) - # TODO: can remove right? - # continue - # runtime-internal cancellation endpoints + # `Actor`(-internal) runtime cancel requests case Start( ns='self', func='cancel', @@ -955,11 +907,9 @@ async def process_messages( ) # the "MAIN" RPC endpoint to schedule-a-`trio.Task` - # - # TODO: impl with native `msgspec.Struct` support !! - # -[ ] implement with ``match:`` syntax? - # -[ ] discard un-authed msgs as per, - # + # ------ - ------ + # -[x] discard un-authed msgs as per, + # case Start( cid=cid, ns=ns, @@ -983,7 +933,10 @@ async def process_messages( # application RPC endpoint else: try: - func: Callable = actor._get_rpc_func(ns, funcname) + func: Callable = actor._get_rpc_func( + ns, + funcname, + ) except ( ModuleNotExposed, AttributeError, @@ -1061,6 +1014,8 @@ async def process_messages( trio.Event(), ) + # XXX remote (runtime scoped) error or uknown + # msg (type). case Error() | _: # NOTE: this is the non-rpc error case, # that is, an error **not** raised inside @@ -1086,8 +1041,9 @@ async def process_messages( f'|_{chan}\n' ) - # end of async for, channel disconnect vis - # ``trio.EndOfChannel`` + # END-OF `async for`: + # IPC disconnected via `trio.EndOfChannel`, likely + # due to a (graceful) `Channel.aclose()`. log.runtime( f'channel for {chan.uid} disconnected, cancelling RPC tasks\n' f'|_{chan}\n' @@ -1107,9 +1063,10 @@ async def process_messages( # connection-reset) is ok since we don't have a teardown # handshake for them (yet) and instead we simply bail out of # the message loop and expect the teardown sequence to clean - # up. - # TODO: don't show this msg if it's an emphemeral - # discovery ep call? + # up.. + # TODO: add a teardown handshake? and, + # -[ ] don't show this msg if it's an ephemeral discovery ep call? + # -[ ] figure out how this will break with other transports? log.runtime( f'channel closed abruptly with\n' f'peer: {chan.uid}\n' diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 66a1db62..0b00f747 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -389,8 +389,9 @@ class Actor: raise mne + # TODO: maybe change to mod-func and rename for implied + # multi-transport semantics? async def _stream_handler( - self, stream: trio.SocketStream, @@ -715,51 +716,6 @@ class Actor: # TODO: figure out why this breaks tests.. db_cs.cancel() - # XXX TODO XXX: DO WE NEED THIS? - # -[ ] is it necessary any more (GC should do it) now - # that we have strict(er) graceful cancellation - # semantics? - # XXX WARNING XXX - # Be AWARE OF THE INDENT LEVEL HERE - # -> ONLY ENTER THIS BLOCK WHEN ._peers IS - # EMPTY!!!! - # - # if the channel is still connected it may mean the far - # end has not closed and we may have gotten here due to - # an error and so we should at least try to terminate - # the channel from this end gracefully. - #if ( - # not self._peers - # and chan.connected() - #): - # log.runtime( - # 'Terminating channel with `None` setinel msg\n' - # f'|_{chan}\n' - # ) - # try: - # # ORIGINALLY we sent a msg loop terminate - # # sentinel (`None`) which triggers - # # cancellation of all remotely started - # # tasks. - # # - # # HOWEVER, after we added typed msging, - # # you can't just willy nilly send `None` - # # wherever since it might be invalid given - # # the currently configured msg-spec. - # # - # # SO, this was all removed and I'm pretty - # # confident we don't need it replaced with - # # a manual RPC to - # # a `Actor.cancel_rpc_tasks()` right? - # await chan.send(None) - - # # XXX: do we want this? NO RIGHT? - # # causes "[104] connection reset by peer" on other end - # # await chan.aclose() - - # except trio.BrokenResourceError: - # log.runtime(f"Channel {chan.uid} was already closed") - # TODO: rename to `._deliver_payload()` since this handles # more then just `result` msgs now obvi XD async def _push_result( -- 2.34.1 From b3e3e0ff85e0b106ed047cf5b317c4c16be19915 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 8 Apr 2024 10:13:14 -0400 Subject: [PATCH 033/190] Caps-msging test tweaks to get correct failures These are likely temporary changes but still needed to actually see the desired/correct failures (of which 5 of 6 tests are supposed to fail rn) mostly to do with `Start` and `Return` msgs which are invalid under each test's applied msg-spec. Tweak set here: - bit more `print()`s in root and sub for grokin test flow. - never use `pytes.fail()` in subactor.. should know this by now XD - comment out some bits that can't ever pass rn and make the underlying expected failues harder to grok: - the sub's child-side-of-ctx task doing sends should only fail for certain msg types like `Started` + `Return`, `Yield`s are processed receiver/parent side. - don't expect `sent` list to match predicate set for the same reason as last bullet. The outstanding msg-type-semantic validation questions are: - how to handle `.open_context()` with an input `kwargs` set that doesn't adhere to the currently applied msg-spec? - should the initial `@acm` entry fail before sending to the child side? - where should received `MsgTypeError`s be raised, at the `MsgStream` `.receive()` or lower in the stack? - i'm thinking we should mk `MsgTypeError` derive from `RemoteActorError` and then have it be delivered as an error to the `Context`/`MsgStream` for per-ctx-task handling; would lead to more flexible/modular policy overrides in user code outside any defaults we provide. --- tests/test_caps_based_msging.py | 85 ++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 29 deletions(-) diff --git a/tests/test_caps_based_msging.py b/tests/test_caps_based_msging.py index acc1f307..f7cab2a5 100644 --- a/tests/test_caps_based_msging.py +++ b/tests/test_caps_based_msging.py @@ -374,7 +374,7 @@ def enc_type_union( @tractor.context -async def send_back_nsp( +async def send_back_values( ctx: Context, expect_debug: bool, pld_spec_type_strs: list[str], @@ -388,6 +388,8 @@ async def send_back_nsp( and ensure we can round trip a func ref with our parent. ''' + uid: tuple = tractor.current_actor().uid + # debug mode sanity check (prolly superfluous but, meh) assert expect_debug == _state.debug_mode() @@ -414,7 +416,7 @@ async def send_back_nsp( ) print( - 'CHILD attempting `Started`-bytes DECODE..\n' + f'{uid}: attempting `Started`-bytes DECODE..\n' ) try: msg: Started = nsp_codec.decode(started_msg_bytes) @@ -436,7 +438,7 @@ async def send_back_nsp( raise else: print( - 'CHILD (correctly) unable to DECODE `Started`-bytes\n' + f'{uid}: (correctly) unable to DECODE `Started`-bytes\n' f'{started_msg_bytes}\n' ) @@ -445,7 +447,7 @@ async def send_back_nsp( for send_value, expect_send in iter_send_val_items: try: print( - f'CHILD attempting to `.started({send_value})`\n' + f'{uid}: attempting to `.started({send_value})`\n' f'=> expect_send: {expect_send}\n' f'SINCE, ipc_pld_spec: {ipc_pld_spec}\n' f'AND, codec: {codec}\n' @@ -460,7 +462,6 @@ async def send_back_nsp( # await tractor.pause() raise RuntimeError( - # pytest.fail( f'NOT-EXPECTED able to roundtrip value given spec:\n' f'ipc_pld_spec -> {ipc_pld_spec}\n' f'value -> {send_value}: {type(send_value)}\n' @@ -468,53 +469,76 @@ async def send_back_nsp( break # move on to streaming block.. - except NotImplementedError: - print('FAILED ENCODE!') - except tractor.MsgTypeError: # await tractor.pause() if expect_send: - pytest.fail( + raise RuntimeError( f'EXPECTED to `.started()` value given spec:\n' f'ipc_pld_spec -> {ipc_pld_spec}\n' f'value -> {send_value}: {type(send_value)}\n' ) async with ctx.open_stream() as ipc: + print( + f'{uid}: Entering streaming block to send remaining values..' + ) + for send_value, expect_send in iter_send_val_items: send_type: Type = type(send_value) print( - 'CHILD report on send value\n' + '------ - ------\n' + f'{uid}: SENDING NEXT VALUE\n' f'ipc_pld_spec: {ipc_pld_spec}\n' f'expect_send: {expect_send}\n' f'val: {send_value}\n' + '------ - ------\n' ) try: await ipc.send(send_value) + print(f'***\n{uid}-CHILD sent {send_value!r}\n***\n') sent.append(send_value) - if not expect_send: - pytest.fail( - f'NOT-EXPECTED able to roundtrip value given spec:\n' - f'ipc_pld_spec -> {ipc_pld_spec}\n' - f'value -> {send_value}: {send_type}\n' - ) + + # NOTE: should only raise above on + # `.started()` or a `Return` + # if not expect_send: + # raise RuntimeError( + # f'NOT-EXPECTED able to roundtrip value given spec:\n' + # f'ipc_pld_spec -> {ipc_pld_spec}\n' + # f'value -> {send_value}: {send_type}\n' + # ) + except ValidationError: + print(f'{uid} FAILED TO SEND {send_value}!') + + # await tractor.pause() if expect_send: - pytest.fail( + raise RuntimeError( f'EXPECTED to roundtrip value given spec:\n' f'ipc_pld_spec -> {ipc_pld_spec}\n' f'value -> {send_value}: {send_type}\n' ) - continue + # continue - assert ( - len(sent) - == - len([val - for val, expect in - expect_ipc_send.values() - if expect is True]) - ) + else: + print( + f'{uid}: finished sending all values\n' + 'Should be exiting stream block!\n' + ) + + print(f'{uid}: exited streaming block!') + + # TODO: this won't be true bc in streaming phase we DO NOT + # msgspec check outbound msgs! + # -[ ] once we implement the receiver side `InvalidMsg` + # then we can expect it here? + # assert ( + # len(sent) + # == + # len([val + # for val, expect in + # expect_ipc_send.values() + # if expect is True]) + # ) def ex_func(*args): @@ -635,7 +659,7 @@ def test_codec_hooks_mod( async with ( p.open_context( - send_back_nsp, + send_back_values, expect_debug=debug_mode, pld_spec_type_strs=pld_spec_type_strs, add_hooks=add_codec_hooks, @@ -665,10 +689,13 @@ def test_codec_hooks_mod( async for next_sent in ipc: print( - 'Child sent next value\n' + 'Parent: child sent next value\n' f'{next_sent}: {type(next_sent)}\n' ) - expect_to_send.remove(next_sent) + if expect_to_send: + expect_to_send.remove(next_sent) + else: + print('PARENT should terminate stream loop + block!') # all sent values should have arrived! assert not expect_to_send -- 2.34.1 From fb94ecd72909e48678768db069e24a05a1263c33 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 8 Apr 2024 10:25:57 -0400 Subject: [PATCH 034/190] Rename `Actor._push_result()` -> `._deliver_ctx_payload()` Better describes the internal RPC impl/latest-architecture with the msgs delivered being those which either define a `.pld: PayloadT` that gets passed up to user code, or the error-msg subset that similarly is raised in a ctx-linked task. --- tractor/_context.py | 10 +++++----- tractor/_rpc.py | 2 +- tractor/_runtime.py | 15 ++++++++++----- tractor/_streaming.py | 2 +- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index b4e207a4..e0f62ec8 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -1210,7 +1210,7 @@ class Context: # XXX: (MEGA IMPORTANT) if this is a root opened process we # wait for any immediate child in debug before popping the # context from the runtime msg loop otherwise inside - # ``Actor._push_result()`` the msg will be discarded and in + # ``Actor._deliver_ctx_payload()`` the msg will be discarded and in # the case where that msg is global debugger unlock (via # a "stop" msg for a stream), this can result in a deadlock # where the root is waiting on the lock to clear but the @@ -1701,11 +1701,11 @@ class Context: # raise any msg type error NO MATTER WHAT! except msgspec.ValidationError as verr: - from tractor._ipc import _raise_msg_type_err - _raise_msg_type_err( + from tractor._ipc import _mk_msg_type_err + raise _mk_msg_type_err( msg=msg_bytes, codec=codec, - validation_err=verr, + src_validation_error=verr, verb_header='Trying to send payload' # > 'invalid `Started IPC msgs\n' ) @@ -2418,7 +2418,7 @@ async def open_context_from_portal( # XXX: (MEGA IMPORTANT) if this is a root opened process we # wait for any immediate child in debug before popping the # context from the runtime msg loop otherwise inside - # ``Actor._push_result()`` the msg will be discarded and in + # ``Actor._deliver_ctx_payload()`` the msg will be discarded and in # the case where that msg is global debugger unlock (via # a "stop" msg for a stream), this can result in a deadlock # where the root is waiting on the lock to clear but the diff --git a/tractor/_rpc.py b/tractor/_rpc.py index de76e3cf..b494af2b 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -826,7 +826,7 @@ async def process_messages( ): # deliver response to local caller/waiter # via its per-remote-context memory channel. - await actor._push_result( + await actor._deliver_ctx_payload( chan, cid, msg, diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 0b00f747..435464be 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -69,6 +69,7 @@ from tractor.msg import ( pretty_struct, NamespacePath, types as msgtypes, + Msg, ) from ._ipc import Channel from ._context import ( @@ -77,9 +78,10 @@ from ._context import ( ) from .log import get_logger from ._exceptions import ( - unpack_error, - ModuleNotExposed, ContextCancelled, + ModuleNotExposed, + MsgTypeError, + unpack_error, TransportClosed, ) from .devx import _debug @@ -559,7 +561,7 @@ class Actor: cid: str|None = msg.cid if cid: # deliver response to local caller/waiter - await self._push_result( + await self._deliver_ctx_payload( chan, cid, msg, @@ -718,11 +720,11 @@ class Actor: # TODO: rename to `._deliver_payload()` since this handles # more then just `result` msgs now obvi XD - async def _push_result( + async def _deliver_ctx_payload( self, chan: Channel, cid: str, - msg: dict[str, Any], + msg: Msg|MsgTypeError, ) -> None|bool: ''' @@ -751,6 +753,9 @@ class Actor: ) return + # if isinstance(msg, MsgTypeError): + # return await ctx._deliver_bad_msg() + return await ctx._deliver_msg(msg) def get_context( diff --git a/tractor/_streaming.py b/tractor/_streaming.py index dc30ac6e..fcf8dafc 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -183,7 +183,7 @@ class MsgStream(trio.abc.Channel): # - via a received `{'stop': ...}` msg from remote side. # |_ NOTE: previously this was triggered by calling # ``._rx_chan.aclose()`` on the send side of the channel inside - # `Actor._push_result()`, but now the 'stop' message handling + # `Actor._deliver_ctx_payload()`, but now the 'stop' message handling # has been put just above inside `_raise_from_no_key_in_msg()`. except ( trio.EndOfChannel, -- 2.34.1 From dbebcc54ccf27c6fda63f1d6faf7a329dcaa56f2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 9 Apr 2024 08:44:06 -0400 Subject: [PATCH 035/190] Unify `MsgTypeError` as a `RemoteActorError` subtype Since in the receive-side error case the source of the exception is the sender side (normally causing a local `TypeError` at decode time), might as well bundle the error in remote-capture-style using boxing semantics around the causing local type error raised from the `msgspec.msgpack.Decoder.decode()` and with a traceback packed from `msgspec`-specific knowledge of any field-type spec matching failure. Deats on new `MsgTypeError` interface: - includes a `.msg_dict` to get access to any `Decoder.type`-applied load of the original (underlying and offending) IPC msg into a `dict` form using a vanilla decoder which is normally packed into the instance as a `._msg_dict`. - a public getter to the "supposed offending msg" via `.payload_msg` which attempts to take the above `.msg_dict` and load it manually into the corresponding `.msg.types.MsgType` struct. - a constructor `.from_decode()` to make it simple to build out error instances from a failed decode scope where the aforementioned `msgdict: dict` from the vanilla decode can be provided directly. - ALSO, we now pack into `MsgTypeError` directly just like ctxc in `unpack_error()` This also completes the while-standing todo for `RemoteActorError` to contain a ref to the underlying `Error` msg as `._ipc_msg` with public `@property` access that `defstruct()`-creates a pretty struct version via `.ipc_msg`. Internal tweaks for this include: - `._ipc_msg` is the internal literal `Error`-msg instance if provided with `.ipc_msg` the dynamic wrapper as mentioned above. - `.__init__()` now can still take variable `**extra_msgdata` (similar to the `dict`-msgdata as before) to maintain support for subtypes which are constructed manually (not only by `pack_error()`) and insert their own attrs which get placed in a `._extra_msgdata: dict` if no `ipc_msg: Error` is provided as input. - the `.msgdata` is now a merge of any `._extra_msgdata` and a `dict`-casted form of any `._ipc_msg`. - adjust all previous `.msgdata` field lookups to try equivalent field reads on `._ipc_msg: Error`. - drop default single ws indent from `.tb_str` and do a failover lookup to `.msgdata` when `._ipc_msg is None` for the manually constructed subtype-instance case. - add a new class attr `.extra_body_fields: list[str]` to allow subtypes to declare attrs they want shown in the `.__repr__()` output, eg. `ContextCancelled.canceller`, `StreamOverrun.sender` and `MsgTypeError.payload_msg`. - ^-rework defaults pertaining to-^ with rename from `_msgdata_keys` -> `_ipcmsg_keys` with latter now just loading directly from the `Error` fields def and `_body_fields: list[str]` just taking that value and removing the not-so-useful-in-REPL or already shown (i.e. `.tb_str: str`) field names. - add a new mod level `.pack_from_raise()` helper for auto-boxing RAE subtypes constructed manually into `Error`s which is normally how `StreamOverrun` and `MsgTypeError` get created in the runtime. - in support of the above expose a `src_uid: tuple` override to `pack_error()` such that the runtime can provide any remote actor id when packing a locally-created yet remotely-caused RAE subtype. - adjust all typing to expect `Error`s over `dict`-msgs. Adjust some tests to match these changes: - context and inter-peer-cancel tests to make their `.msgdata` related checks against the new `.ipc_msg` as well and `.tb_str` directly. - toss in an extra sleep to `sleep_a_bit_then_cancel_peer()` to keep the 'canceller' ctx child task cancelled by it's parent in the 'root' for the rte-raised-during-ctxc-handling case (apparently now it's returning too fast, cool?). --- tests/test_context_stream_semantics.py | 7 +- tests/test_inter_peer_cancellation.py | 10 + tractor/_exceptions.py | 418 +++++++++++++++++++------ 3 files changed, 333 insertions(+), 102 deletions(-) diff --git a/tests/test_context_stream_semantics.py b/tests/test_context_stream_semantics.py index d2b572cc..cedddf73 100644 --- a/tests/test_context_stream_semantics.py +++ b/tests/test_context_stream_semantics.py @@ -796,10 +796,12 @@ async def test_callee_cancels_before_started( # raises a special cancel signal except tractor.ContextCancelled as ce: + _ce = ce # for debug on crash ce.boxed_type == trio.Cancelled # the traceback should be informative - assert 'itself' in ce.msgdata['tb_str'] + assert 'itself' in ce.tb_str + assert ce.tb_str == ce.msgdata['tb_str'] # teardown the actor await portal.cancel_actor() @@ -1157,7 +1159,8 @@ def test_maybe_allow_overruns_stream( elif slow_side == 'parent': assert err.boxed_type == tractor.RemoteActorError - assert 'StreamOverrun' in err.msgdata['tb_str'] + assert 'StreamOverrun' in err.tb_str + assert err.tb_str == err.msgdata['tb_str'] else: # if this hits the logic blocks from above are not diff --git a/tests/test_inter_peer_cancellation.py b/tests/test_inter_peer_cancellation.py index 470287fb..aa05e3c8 100644 --- a/tests/test_inter_peer_cancellation.py +++ b/tests/test_inter_peer_cancellation.py @@ -185,6 +185,10 @@ async def sleep_a_bit_then_cancel_peer( await trio.sleep(cancel_after) await peer.cancel_actor() + # such that we're cancelled by our rent ctx-task + await trio.sleep(3) + print('CANCELLER RETURNING!') + @tractor.context async def stream_ints( @@ -245,6 +249,12 @@ async def stream_from_peer( assert peer_ctx._remote_error is ctxerr assert peer_ctx._remote_error.msgdata == ctxerr.msgdata + # XXX YES, bc exact same msg instances + assert peer_ctx._remote_error._ipc_msg is ctxerr._ipc_msg + + # XXX NO, bc new one always created for property accesss + assert peer_ctx._remote_error.ipc_msg != ctxerr.ipc_msg + # the peer ctx is the canceller even though it's canceller # is the "canceller" XD assert peer_name in peer_ctx.canceller diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 28c61628..a31aa11e 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -31,7 +31,10 @@ import textwrap import traceback import trio -from msgspec import structs +from msgspec import ( + structs, + defstruct, +) from tractor._state import current_actor from tractor.log import get_logger @@ -40,6 +43,8 @@ from tractor.msg import ( Msg, Stop, Yield, + pretty_struct, + types as msgtypes, ) if TYPE_CHECKING: @@ -64,21 +69,38 @@ class InternalError(RuntimeError): ''' -_body_fields: list[str] = [ - 'boxed_type', - 'src_type', - # TODO: format this better if we're going to include it. - # 'relay_path', - 'src_uid', - # only in sub-types - 'canceller', - 'sender', +# NOTE: more or less should be close to these: +# 'boxed_type', +# 'src_type', +# 'src_uid', +# 'canceller', +# 'sender', +# TODO: format this better if we're going to include it. +# 'relay_path', +# +_ipcmsg_keys: list[str] = [ + fi.name + for fi, k, v + in pretty_struct.iter_fields(Error) + ] -_msgdata_keys: list[str] = [ - 'boxed_type_str', -] + _body_fields +_body_fields: list[str] = list( + set(_ipcmsg_keys) + + # NOTE: don't show fields that either don't provide + # any extra useful info or that are already shown + # as part of `.__repr__()` output. + - { + 'src_type_str', + 'boxed_type_str', + 'tb_str', + 'relay_path', + '_msg_dict', + 'cid', + } +) def get_err_type(type_name: str) -> BaseException|None: @@ -137,7 +159,7 @@ def pformat_boxed_tb( f'|\n' f' ------ - ------\n\n' f'{tb_str}\n' - f' ------ - ------\n' + f' ------ - ------\n' f'_|\n' ) if len(indent): @@ -152,10 +174,40 @@ def pformat_boxed_tb( + body ) - # return body -# TODO: rename to just `RemoteError`? +def pack_from_raise( + local_err: ( + ContextCancelled + |StreamOverrun + |MsgTypeError + ), + cid: str, + + **rae_fields, + +) -> Error: + ''' + Raise the provided `RemoteActorError` subtype exception + instance locally to get a traceback and pack it into an IPC + `Error`-msg using `pack_error()` to extract the tb info. + + ''' + try: + raise local_err + except type(local_err) as local_err: + err_msg: dict[str, dict] = pack_error( + local_err, + cid=cid, + **rae_fields, + ) + return err_msg + + +# TODO: better compat with IPC msg structs? +# -[ ] rename to just `RemoteError` like in `mp.manager`? +# -[ ] make a `Struct`-subtype by using the .__post_init__()`? +# https://jcristharif.com/msgspec/structs.html#post-init-processing class RemoteActorError(Exception): ''' A box(ing) type which bundles a remote actor `BaseException` for @@ -170,12 +222,28 @@ class RemoteActorError(Exception): 'src_uid', # 'relay_path', ] + extra_body_fields: list[str] = [ + 'cid', + 'boxed_type', + ] def __init__( self, message: str, + ipc_msg: Error|None = None, boxed_type: Type[BaseException]|None = None, - **msgdata + + # NOTE: only provided by subtypes (ctxc and overruns) + # wishing to both manually instantiate and add field + # values defined on `Error` without having to construct an + # `Error()` before the exception is processed by + # `pack_error()`. + # + # TODO: a better way to support this without the extra + # private `._extra_msgdata`? + # -[ ] ctxc constructed inside `._rpc._invoke()` L:638 + # -[ ] overrun @ `._context.Context._deliver_msg()` L:1958 + **extra_msgdata, ) -> None: super().__init__(message) @@ -188,14 +256,24 @@ class RemoteActorError(Exception): # - .remote_type # also pertains to our long long oustanding issue XD # https://github.com/goodboy/tractor/issues/5 - # - # TODO: always set ._boxed_type` as `None` by default - # and instead render if from `.boxed_type_str`? self._boxed_type: BaseException = boxed_type self._src_type: BaseException|None = None + self._ipc_msg: Error|None = ipc_msg - # TODO: make this a `.errmsg: Error` throughout? - self.msgdata: dict[str, Any] = msgdata + if ( + extra_msgdata + and ipc_msg + ): + # XXX mutate the orig msg directly from + # manually provided input params. + for k, v in extra_msgdata.items(): + setattr( + self._ipc_msg, + k, + v, + ) + else: + self._extra_msgdata = extra_msgdata # TODO: mask out eventually or place in `pack_error()` # pre-`return` lines? @@ -214,14 +292,56 @@ class RemoteActorError(Exception): # either by customizing `ContextCancelled.__init__()` or # through a special factor func? elif boxed_type: - if not self.msgdata.get('boxed_type_str'): - self.msgdata['boxed_type_str'] = str( - type(boxed_type).__name__ - ) + boxed_type_str: str = type(boxed_type).__name__ + if ( + ipc_msg + and not self._ipc_msg.boxed_type_str + ): + self._ipc_msg.boxed_type_str = boxed_type_str + assert self.boxed_type_str == self._ipc_msg.boxed_type_str + + else: + self._extra_msgdata['boxed_type_str'] = boxed_type_str - assert self.boxed_type_str == self.msgdata['boxed_type_str'] assert self.boxed_type is boxed_type + @property + def ipc_msg(self) -> pretty_struct.Struct: + ''' + Re-render the underlying `._ipc_msg: Msg` as + a `pretty_struct.Struct` for introspection such that the + returned value is a read-only copy of the original. + + ''' + if self._ipc_msg is None: + return None + + msg_type: Msg = type(self._ipc_msg) + fields: dict[str, Any] = { + k: v for _, k, v in + pretty_struct.iter_fields(self._ipc_msg) + } + return defstruct( + msg_type.__name__, + fields=fields.keys(), + bases=(msg_type, pretty_struct.Struct), + )(**fields) + + @property + def msgdata(self) -> dict[str, Any]: + ''' + The (remote) error data provided by a merge of the + `._ipc_msg: Error` msg and any input `._extra_msgdata: dict` + (provided by subtypes via `.__init__()`). + + ''' + msgdata: dict = ( + structs.asdict(self._ipc_msg) + if self._ipc_msg + else {} + ) + return self._extra_msgdata | msgdata + @property def src_type_str(self) -> str: ''' @@ -231,7 +351,7 @@ class RemoteActorError(Exception): at the first relay/hop's receiving actor. ''' - return self.msgdata['src_type_str'] + return self._ipc_msg.src_type_str @property def src_type(self) -> str: @@ -241,7 +361,7 @@ class RemoteActorError(Exception): ''' if self._src_type is None: self._src_type = get_err_type( - self.msgdata['src_type_str'] + self._ipc_msg.src_type_str ) return self._src_type @@ -252,7 +372,7 @@ class RemoteActorError(Exception): String-name of the (last hop's) boxed error type. ''' - return self.msgdata['boxed_type_str'] + return self._ipc_msg.boxed_type_str @property def boxed_type(self) -> str: @@ -262,7 +382,7 @@ class RemoteActorError(Exception): ''' if self._boxed_type is None: self._boxed_type = get_err_type( - self.msgdata['boxed_type_str'] + self._ipc_msg.boxed_type_str ) return self._boxed_type @@ -275,40 +395,44 @@ class RemoteActorError(Exception): actor's hop. NOTE: a `list` field with the same name is expected to be - passed/updated in `.msgdata`. + passed/updated in `.ipc_msg`. ''' - return self.msgdata['relay_path'] + return self._ipc_msg.relay_path @property def relay_uid(self) -> tuple[str, str]|None: return tuple( - self.msgdata['relay_path'][-1] + self._ipc_msg.relay_path[-1] ) @property def src_uid(self) -> tuple[str, str]|None: if src_uid := ( - self.msgdata.get('src_uid') + self._ipc_msg.src_uid ): return tuple(src_uid) # TODO: use path lookup instead? # return tuple( - # self.msgdata['relay_path'][0] + # self._ipc_msg.relay_path[0] # ) @property def tb_str( self, - indent: str = ' ', + indent: str = '', ) -> str: - if remote_tb := self.msgdata.get('tb_str'): - return textwrap.indent( - remote_tb, - prefix=indent, - ) + remote_tb: str = '' - return '' + if self._ipc_msg: + remote_tb: str = self._ipc_msg.tb_str + else: + remote_tb = self.msgdata.get('tb_str') + + return textwrap.indent( + remote_tb or '', + prefix=indent, + ) def _mk_fields_str( self, @@ -320,14 +444,17 @@ class RemoteActorError(Exception): val: Any|None = ( getattr(self, key, None) or - self.msgdata.get(key) + getattr( + self._ipc_msg, + key, + None, + ) ) # TODO: for `.relay_path` on multiline? # if not isinstance(val, str): # val_str = pformat(val) # else: val_str: str = repr(val) - if val: _repr += f'{key}={val_str}{end_char}' @@ -358,7 +485,9 @@ class RemoteActorError(Exception): ''' fields: str = self._mk_fields_str( - _body_fields, + _body_fields + + + self.extra_body_fields, ) body: str = pformat_boxed_tb( tb_str=self.tb_str, @@ -415,15 +544,6 @@ class RemoteActorError(Exception): # raise NotImplementedError -class InternalActorError(RemoteActorError): - ''' - (Remote) internal `tractor` error indicating failure of some - primitive, machinery state or lowlevel task that should never - occur. - - ''' - - class ContextCancelled(RemoteActorError): ''' Inter-actor task context was cancelled by either a call to @@ -433,6 +553,10 @@ class ContextCancelled(RemoteActorError): reprol_fields: list[str] = [ 'canceller', ] + extra_body_fields: list[str] = [ + 'cid', + 'canceller', + ] @property def canceller(self) -> tuple[str, str]|None: ''' @@ -454,7 +578,7 @@ class ContextCancelled(RemoteActorError): |_`._cancel_task()` ''' - value = self.msgdata.get('canceller') + value: tuple[str, str]|None = self._ipc_msg.canceller if value: return tuple(value) @@ -468,6 +592,132 @@ class ContextCancelled(RemoteActorError): # src_actor_uid = canceller +class MsgTypeError( + RemoteActorError, +): + ''' + Equivalent of a runtime `TypeError` for IPC dialogs. + + Raise when any IPC wire-message is decoded to have invalid + field values (due to type) or for other `MsgCodec` related + violations such as having no extension-type for a field with + a custom type but no `enc/dec_hook()` support. + + Can be raised on the send or recv side of an IPC `Channel` + depending on the particular msg. + + Msgs which cause this to be raised on the `.send()` side (aka + in the "ctl" dialog phase) include: + - `Start` + - `Started` + - `Return` + + Those which cause it on on the `.recv()` side (aka the "nasty + streaming" dialog phase) are: + - `Yield` + - TODO: any embedded `.pld` type defined by user code? + + Normally the source of an error is re-raised from some `.msg._codec` + decode which itself raises in a backend interchange + lib (eg. a `msgspec.ValidationError`). + + ''' + reprol_fields: list[str] = [ + 'ipc_msg', + ] + extra_body_fields: list[str] = [ + 'cid', + 'payload_msg', + ] + + @property + def msg_dict(self) -> dict[str, Any]: + ''' + If the underlying IPC `Msg` was received from a remote + actor but was unable to be decoded to a native + `Yield`|`Started`|`Return` struct, the interchange backend + native format decoder can be used to stash a `dict` + version for introspection by the invalidating RPC task. + + ''' + return self.msgdata.get('_msg_dict') + + @property + def payload_msg(self) -> Msg|None: + ''' + Attempt to construct what would have been the original + `Msg`-with-payload subtype (i.e. an instance from the set + of msgs in `.msg.types._payload_msgs`) which failed + validation. + + ''' + msg_dict: dict = self.msg_dict.copy() + name: str = msg_dict.pop('msg_type') + msg_type: Msg = getattr( + msgtypes, + name, + Msg, + ) + return msg_type(**msg_dict) + + @property + def cid(self) -> str: + # pre-packed using `.from_decode()` constructor + return self.msgdata.get('cid') + + @classmethod + def from_decode( + cls, + message: str, + msgdict: dict, + + ) -> MsgTypeError: + return cls( + message=message, + + # NOTE: original "vanilla decode" of the msg-bytes + # is placed inside a value readable from + # `.msgdata['_msg_dict']` + _msg_dict=msgdict, + + # expand and pack all RAE compat fields + # into the `._extra_msgdata` aux `dict`. + **{ + k: v + for k, v in msgdict.items() + if k in _ipcmsg_keys + }, + ) + + +class StreamOverrun( + RemoteActorError, + trio.TooSlowError, +): + reprol_fields: list[str] = [ + 'sender', + ] + ''' + This stream was overrun by its sender and can be optionally + handled by app code using `MsgStream.send()/.receive()`. + + ''' + @property + def sender(self) -> tuple[str, str] | None: + value = self._ipc_msg.sender + if value: + return tuple(value) + + +# class InternalActorError(RemoteActorError): +# ''' +# Boxed (Remote) internal `tractor` error indicating failure of some +# primitive, machinery state or lowlevel task that should never +# occur. + +# ''' + + class TransportClosed(trio.ClosedResourceError): "Underlying channel transport was closed prior to use" @@ -484,23 +734,6 @@ class NoRuntime(RuntimeError): "The root actor has not been initialized yet" -class StreamOverrun( - RemoteActorError, - trio.TooSlowError, -): - reprol_fields: list[str] = [ - 'sender', - ] - ''' - This stream was overrun by sender - - ''' - @property - def sender(self) -> tuple[str, str] | None: - value = self.msgdata.get('sender') - if value: - return tuple(value) - class AsyncioCancelled(Exception): ''' @@ -518,23 +751,12 @@ class MessagingError(Exception): ''' -class MsgTypeError(MessagingError): - ''' - Equivalent of a `TypeError` for an IPC wire-message - due to an invalid field value (type). - - Normally this is re-raised from some `.msg._codec` - decode error raised by a backend interchange lib - like `msgspec` or `pycapnproto`. - - ''' - - def pack_error( exc: BaseException|RemoteActorError, tb: str|None = None, cid: str|None = None, + src_uid: tuple[str, str]|None = None, ) -> Error: ''' @@ -560,7 +782,8 @@ def pack_error( ): error_msg.update(exc.msgdata) - # an onion/inception we need to pack + # an onion/inception we need to pack as a nested and relayed + # remotely boxed error. if ( type(exc) is RemoteActorError and (boxed := exc.boxed_type) @@ -584,7 +807,7 @@ def pack_error( error_msg['boxed_type_str'] = 'RemoteActorError' else: - error_msg['src_uid'] = our_uid + error_msg['src_uid'] = src_uid or our_uid error_msg['src_type_str'] = type(exc).__name__ error_msg['boxed_type_str'] = type(exc).__name__ @@ -596,7 +819,7 @@ def pack_error( # XXX NOTE: always ensure the traceback-str is from the # locally raised error (**not** the prior relay's boxed - # content's `.msgdata`). + # content's in `._ipc_msg.tb_str`). error_msg['tb_str'] = tb_str if cid is not None: @@ -606,7 +829,7 @@ def pack_error( def unpack_error( - msg: dict[str, Any]|Error, + msg: Error, chan: Channel|None = None, box_type: RemoteActorError = RemoteActorError, @@ -624,16 +847,10 @@ def unpack_error( ''' __tracebackhide__: bool = hide_tb - error_dict: dict[str, dict]|None if not isinstance(msg, Error): - # if ( - # error_dict := msg.get('error') - # ) is None: - # no error field, nothing to unpack. return None - # retrieve the remote error's msg encoded details - # tb_str: str = error_dict.get('tb_str', '') + # retrieve the remote error's encoded details from fields tb_str: str = msg.tb_str message: str = ( f'{chan.uid}\n' @@ -651,6 +868,10 @@ def unpack_error( box_type = ContextCancelled assert boxed_type is box_type + elif boxed_type_str == 'MsgTypeError': + box_type = MsgTypeError + assert boxed_type is box_type + # TODO: already included by `_this_mod` in else loop right? # # we have an inception/onion-error so ensure @@ -661,12 +882,9 @@ def unpack_error( # assert len(error_dict['relay_path']) >= 1 assert len(msg.relay_path) >= 1 - # TODO: mk RAE just take the `Error` instance directly? - error_dict: dict = structs.asdict(msg) - exc = box_type( message, - **error_dict, + ipc_msg=msg, ) return exc -- 2.34.1 From ae42b913847ccf9bd07be0c717243c14a107ce73 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 9 Apr 2024 10:09:05 -0400 Subject: [PATCH 036/190] Expose `MsgType` and extend `MsgCodec` API a bit Make a new `MsgType: TypeAlias` for the union of all msg types such that it can be used in annots throughout the code base; just make `.msg.__msg_spec__` delegate to it. Add some new codec methods: - `pld_spec_str`: for the `str`-casted value of the payload spec, generally useful in logging content. - `msg_spec_items()`: to render a `dict` of msg types to their `str()`-casted values with support for singling out a specific `MsgType`, type by input `msg` instance. - `pformat_msg_spec()`: for rendering the (partial) `.msg_spec` as a formatted `str` useful in logging. Oh right, add a `Error._msg_dict: dict` in support of the previous commit (for `MsgTypeError` packing as RAEs) such that our error msg type can house a non-type-spec decoded wire-bytes for error reporting/analysis purposes. --- tractor/msg/__init__.py | 10 ++++----- tractor/msg/_codec.py | 46 +++++++++++++++++++++++++++++++++++++---- tractor/msg/types.py | 25 +++++++++++++++++++++- 3 files changed, 71 insertions(+), 10 deletions(-) diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index fe965e0b..443b781b 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -19,7 +19,6 @@ Built-in messaging patterns, types, APIs and helpers. ''' from typing import ( - Union, TypeAlias, ) from .ptr import ( @@ -56,8 +55,9 @@ from .types import ( # full msg class set from above as list __msg_types__ as __msg_types__, + + # type-alias for union of all msgs + MsgType as MsgType, ) -# TODO: use new type declaration syntax for msg-type-spec -# https://docs.python.org/3/library/typing.html#type-aliases -# https://docs.python.org/3/reference/simple_stmts.html#type -__msg_spec__: TypeAlias = Union[*__msg_types__] + +__msg_spec__: TypeAlias = MsgType diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 56f24d62..de3316c8 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -57,7 +57,7 @@ from trio.lowlevel import ( from tractor.msg.pretty_struct import Struct from tractor.msg.types import ( mk_msg_spec, - Msg, + MsgType, ) @@ -87,12 +87,50 @@ class MsgCodec(Struct): pld_spec: Union[Type[Struct]]|None + @property + def pld_spec_str(self) -> str: + spec: Union[Type]|Type = self.pld_spec + + # TODO: could also use match: instead? + if getattr(spec, '__args__', False): + # `typing.Union` case + return str(spec) + else: + return spec.__name__ + # struct type unions # https://jcristharif.com/msgspec/structs.html#tagged-unions @property def msg_spec(self) -> Union[Type[Struct]]: return self._dec.type + def msg_spec_items( + self, + msg: MsgType|None = None, + + ) -> dict[str, MsgType]|str: + + msgt_table: dict[str, MsgType] = { + msgt: str(msgt) + for msgt in self.msg_spec.__args__ + } + if msg: + msgt: MsgType = type(msg) + str_repr: str = msgt_table[msgt] + return {msgt: str_repr} + + return msgt_table + + # TODO: some way to make `pretty_struct.Struct` use this + # wrapped field over the `.msg_spec` one? + def pformat_msg_spec( + self, + msg: MsgType|None = None, + ) -> str: + return '\n'.join( + self.msg_spec_items(msg=msg).values() + ) + lib: ModuleType = msgspec # TODO: a sub-decoder system as well? @@ -108,7 +146,7 @@ class MsgCodec(Struct): # OR # ) = { # # pre-seed decoders for std-py-type-set for use when - # # `Msg.pld == None|Any`. + # # `MsgType.pld == None|Any`. # None: msgpack.Decoder(Any), # Any: msgpack.Decoder(Any), # } @@ -303,7 +341,7 @@ def mk_codec( # by `tag_field: str` value key? # payload_msg_specs: dict[ # str, # tag_field value as sub-decoder key - # Union[Type[Struct]] # `Msg.pld` type spec + # Union[Type[Struct]] # `MsgType.pld` type spec # ]|None = None, libname: str = 'msgspec', @@ -336,7 +374,7 @@ def mk_codec( raise RuntimeError( f'If a payload spec is provided,\n' "the builtin SC-shuttle-protocol's msg set\n" - f'(i.e. `{Msg}`) MUST be used!\n\n' + f'(i.e. a `{MsgType}`) MUST be used!\n\n' f'However both values were passed as => mk_codec(\n' f' ipc_msg_spec={ipc_msg_spec}`\n' f' ipc_pld_spec={ipc_pld_spec}`\n)\n' diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 7355a610..14db09cd 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -31,6 +31,7 @@ from typing import ( Literal, Type, TypeVar, + TypeAlias, Union, ) @@ -400,16 +401,29 @@ class CancelAck( pld: bool +# TODO: unify this with `._exceptions.RemoteActorError` +# such that we can have a msg which is both raisable and +# IPC-wire ready? +# B~o class Error( Struct, tag=True, tag_field='msg_type', + + # TODO may omit defaults? + # https://jcristharif.com/msgspec/structs.html#omitting-default-values + # omit_defaults=True, ): ''' A pkt that wraps `RemoteActorError`s for relay and raising. Fields are 1-to-1 meta-data as needed originally by - `RemoteActorError.msgdata: dict`. + `RemoteActorError.msgdata: dict` but now are defined here. + + Note: this msg shuttles `ContextCancelled` and `StreamOverrun` + as well is used to rewrap any `MsgTypeError` for relay-reponse + to bad `Yield.pld` senders during an IPC ctx's streaming dialog + phase. ''' src_uid: tuple[str, str] @@ -428,6 +442,10 @@ class Error( # `StreamOverrun` sender: tuple[str, str]|None = None + # for the `MsgTypeError` case where the receiver side + # decodes the underlying original `Msg`-subtype + _msg_dict: dict|None = None + # TODO: should be make a msg version of `ContextCancelled?` # and/or with a scope field or a full `ActorCancelled`? @@ -486,6 +504,11 @@ __msg_types__: list[Msg] = ( _payload_msgs ) +# TODO: use new type declaration syntax for msg-type-spec +# https://docs.python.org/3/library/typing.html#type-aliases +# https://docs.python.org/3/reference/simple_stmts.html#type +MsgType: TypeAlias = Union[*__msg_types__] + def mk_msg_spec( payload_type_union: Union[Type] = Any, -- 2.34.1 From 62bb11975f45e24a0bb084244a13e8054644f173 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 9 Apr 2024 10:36:25 -0400 Subject: [PATCH 037/190] Refine `MsgTypeError` handling to relay-up-on-`.recv()` Such that `Channel.recv()` + `MsgpackTCPStream.recv()` originating msg-type-errors are not raised at the IPC transport layer but instead relayed up the runtime stack for eventual handling by user-app code via the `Context`/`MsgStream` layer APIs. This design choice leads to a substantial amount of flexibility and modularity, and avoids `MsgTypeError` handling policies from being coupled to a particular backend IPC transport layer: - receive-side msg-type errors, as can be raised and handled in the `.open_stream()` "nasty" phase of a ctx, whilst being packed at the `MsgCodec`/transport layer (keeping the underlying src decode error coupled to the specific transport + interchange lib) and then relayed upward to app code for custom handling like a normal Error` msg. - the policy options for handling such cases could be implemented as `@acm` wrappers around `.open_context()`/`.open_stream()` blocks (and their respective delivered primitives) OR just plain old async generators around `MsgStream.receive()` such that both built-in policy handling and custom user-app solutions can be swapped without touching any `tractor` internals or providing specialized "registry APIs". -> eg. the ignore and relay-invalid-msg-to-sender approach can be more easily implemented as embedded `try: except MsgTypeError:` blocks around `MsgStream.receive()` possibly applied as either of an injected wrapper type around a stream or an async gen that `async for`s from the stream. - any performance based AOT-lang extensions used to implement a policy for handling recv-side errors space can avoid knowledge of the lower level IPC `Channel` (and-downward) primitives. - `Context` consuming code can choose to let all msg-type-errs bubble and handle them manually (like any other remote `Error` shuttled exception). - we can keep (as before) send-side msg type checks can be raised locally and cause offending senders to error and adjust before the streaming phase of an IPC ctx. Impl (related) deats: - obvi make `MsgpackTCPStream.recv()` yield up any `MsgTypeError` constructed by `_mk_msg_type_err()` such that the exception will eventually be relayed up to `._rpc.process_messages()` and from their delivered to the corresponding ctx-task. - in support of ^, make `Channel.recv()` detect said mtes and use the new `pack_from_raise()` to inject the far end `Actor.uid` for the `Error.src_uid`. - keep raising the send side equivalent (when strict enabled) errors inline immediately with no upward `Error` packing or relay. - improve `_mk_msg_type_err()` cases handling with far more detailed `MsgTypeError` "message" contents pertaining to `msgspec` specific failure-fixing-tips and type-spec mismatch info: * use `.from_decode()` constructor in recv-side case to inject the non-spec decoded `msg_dict: dict` and use the new `MsgCodec.pld_spec_str: str` when clarifying the type discrepancy with the offending field. * on send-side, if we detect that an unsupported field type was described in the original `src_type_error`, AND there is no `msgpack.Encoder.enc_hook()` set, that the real issue is likely that the user needs to extend the codec to support the non-std/custom type with a hook and link to `msgspec` docs. * if one of a `src_type/validation_error` is provided, set that error as the `.__cause__` in the new mte. --- tractor/_ipc.py | 163 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 115 insertions(+), 48 deletions(-) diff --git a/tractor/_ipc.py b/tractor/_ipc.py index 694eaf9e..7713811c 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -38,7 +38,6 @@ from typing import ( Protocol, Type, TypeVar, - Union, ) import msgspec @@ -47,8 +46,9 @@ import trio from tractor.log import get_logger from tractor._exceptions import ( - TransportClosed, MsgTypeError, + pack_from_raise, + TransportClosed, ) from tractor.msg import ( _ctxvar_MsgCodec, @@ -118,40 +118,75 @@ class MsgTransport(Protocol[MsgType]): ... -def _raise_msg_type_err( +def _mk_msg_type_err( msg: Any|bytes, codec: MsgCodec, - validation_err: msgspec.ValidationError|None = None, + + message: str|None = None, verb_header: str = '', -) -> None: + src_validation_error: msgspec.ValidationError|None = None, + src_type_error: TypeError|None = None, - # if side == 'send': - if validation_err is None: # send-side +) -> MsgTypeError: - import traceback - from tractor._exceptions import pformat_boxed_tb + # `Channel.send()` case + if src_validation_error is None: # send-side - fmt_spec: str = '\n'.join( - map(str, codec.msg_spec.__args__) - ) - fmt_stack: str = ( - '\n'.join(traceback.format_stack(limit=3)) - ) - tb_fmt: str = pformat_boxed_tb( - tb_str=fmt_stack, - # fields_str=header, - field_prefix=' ', - indent='', - ) - raise MsgTypeError( - f'invalid msg -> {msg}: {type(msg)}\n\n' - f'{tb_fmt}\n' - f'Valid IPC msgs are:\n\n' - # f' ------ - ------\n' - f'{fmt_spec}\n' - ) + # no src error from `msgspec.msgpack.Decoder.decode()` so + # prolly a manual type-check on our part. + if message is None: + import traceback + from tractor._exceptions import pformat_boxed_tb + fmt_spec: str = '\n'.join( + map(str, codec.msg_spec.__args__) + ) + fmt_stack: str = ( + '\n'.join(traceback.format_stack(limit=3)) + ) + tb_fmt: str = pformat_boxed_tb( + tb_str=fmt_stack, + # fields_str=header, + field_prefix=' ', + indent='', + ) + message: str = ( + f'invalid msg -> {msg}: {type(msg)}\n\n' + f'{tb_fmt}\n' + f'Valid IPC msgs are:\n\n' + # f' ------ - ------\n' + f'{fmt_spec}\n', + ) + elif src_type_error: + src_message: str = str(src_type_error) + patt: str = 'type ' + type_idx: int = src_message.find('type ') + invalid_type: str = src_message[type_idx + len(patt):].split()[0] + + enc_hook: Callable|None = codec.enc.enc_hook + if enc_hook is None: + message += ( + '\n\n' + + f"The current IPC-msg codec can't encode type `{invalid_type}` !\n" + f'Maybe a `msgpack.Encoder.enc_hook()` extension is needed?\n\n' + + f'Check the `msgspec` docs for ad-hoc type extending:\n' + '|_ https://jcristharif.com/msgspec/extending.html\n' + '|_ https://jcristharif.com/msgspec/extending.html#defining-a-custom-extension-messagepack-only\n' + ) + + + msgtyperr = MsgTypeError( + message=message, + ipc_msg=msg, + ) + # ya, might be `None` + msgtyperr.__cause__ = src_type_error + return msgtyperr + + # `Channel.recv()` case else: # decode the msg-bytes using the std msgpack # interchange-prot (i.e. without any @@ -161,29 +196,31 @@ def _raise_msg_type_err( msg_dict: dict = msgspec.msgpack.decode(msg) msg_type_name: str = msg_dict['msg_type'] msg_type = getattr(msgtypes, msg_type_name) - errmsg: str = ( + message: str = ( f'invalid `{msg_type_name}` IPC msg\n\n' ) if verb_header: - errmsg = f'{verb_header} ' + errmsg + message = f'{verb_header} ' + message # XXX see if we can determine the exact invalid field # such that we can comprehensively report the # specific field's type problem - msgspec_msg: str = validation_err.args[0].rstrip('`') + msgspec_msg: str = src_validation_error.args[0].rstrip('`') msg, _, maybe_field = msgspec_msg.rpartition('$.') obj = object() if (field_val := msg_dict.get(maybe_field, obj)) is not obj: - field_type: Union[Type] = msg_type.__signature__.parameters[ - maybe_field - ].annotation - errmsg += ( + message += ( f'{msg.rstrip("`")}\n\n' f'{msg_type}\n' - f' |_.{maybe_field}: {field_type} = {field_val!r}\n' + f' |_.{maybe_field}: {codec.pld_spec_str} = {field_val!r}\n' ) - raise MsgTypeError(errmsg) from validation_err + msgtyperr = MsgTypeError.from_decode( + message=message, + msgdict=msg_dict, + ) + msgtyperr.__cause__ = src_validation_error + return msgtyperr # TODO: not sure why we have to inherit here, but it seems to be an @@ -325,12 +362,15 @@ class MsgpackTCPStream(MsgTransport): # and always raise such that spec violations # are never allowed to be caught silently! except msgspec.ValidationError as verr: - # re-raise as type error - _raise_msg_type_err( + msgtyperr: MsgTypeError = _mk_msg_type_err( msg=msg_bytes, codec=codec, - validation_err=verr, + src_validation_error=verr, ) + # XXX deliver up to `Channel.recv()` where + # a re-raise and `Error`-pack can inject the far + # end actor `.uid`. + yield msgtyperr except ( msgspec.DecodeError, @@ -387,7 +427,7 @@ class MsgpackTCPStream(MsgTransport): if type(msg) not in msgtypes.__msg_types__: if strict_types: - _raise_msg_type_err( + raise _mk_msg_type_err( msg, codec=codec, ) @@ -400,11 +440,16 @@ class MsgpackTCPStream(MsgTransport): try: bytes_data: bytes = codec.encode(msg) except TypeError as typerr: - raise MsgTypeError( - 'A msg field violates the current spec\n' - f'{codec.pld_spec}\n\n' - f'{pretty_struct.Struct.pformat(msg)}' - ) from typerr + msgtyperr: MsgTypeError = _mk_msg_type_err( + msg, + codec=codec, + message=( + f'IPC-msg-spec violation in\n\n' + f'{pretty_struct.Struct.pformat(msg)}' + ), + src_type_error=typerr, + ) + raise msgtyperr from typerr # supposedly the fastest says, # https://stackoverflow.com/a/54027962 @@ -719,13 +764,35 @@ class Channel: assert self._transport while True: try: - async for item in self._transport: - yield item + async for msg in self._transport: + match msg: + # NOTE: if transport/interchange delivers + # a type error, we pack it with the far + # end peer `Actor.uid` and relay the + # `Error`-msg upward to the `._rpc` stack + # for normal RAE handling. + case MsgTypeError(): + yield pack_from_raise( + local_err=msg, + cid=msg.cid, + + # XXX we pack it here bc lower + # layers have no notion of an + # actor-id ;) + src_uid=self.uid, + ) + case _: + yield msg + + # TODO: if we were gonna do this it should be + # done up at the `MsgStream` layer! + # # sent = yield item # if sent is not None: # # optimization, passing None through all the # # time is pointless # await self._transport.send(sent) + except trio.BrokenResourceError: # if not self._autorecon: -- 2.34.1 From 648695a325c038d35160c1af460b68be6a0e8ec2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 9 Apr 2024 13:46:34 -0400 Subject: [PATCH 038/190] Start tidying up `._context`, use `pack_from_raise()` Mostly removing commented (and replaced) code blocks lingering from the ctxc semantics work and new typed-msg-spec `MsgType`s handling AND use the new `._exceptions.pack_from_raise()` helper to construct `StreamOverrun` msgs. Deaterz: - clean out the drain loop now that it's implemented to handle our struct msg types including the `dict`-msg bits left in as fallback-reminders, any notes/todos better summarized at the top of their blocks, remove any `_final_result_is_set()` related duplicate/legacy tidbits. - use a `case Error()` block in drain loop with fallthrough to `_:` always resulting in an rte raise. - move "XXX" notes into the doc-string for `._deliver_msg()` as a "rules" section. - use `match:` syntax for logging the `result_or_err: MsgType` outcome from the final `.result()` call inside `open_context_from_portal()`. - generally speaking use `MsgType` type annotations throughout! --- tractor/_context.py | 226 ++++++++++++++++++-------------------------- tractor/_portal.py | 4 +- tractor/_runtime.py | 1 - 3 files changed, 95 insertions(+), 136 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index e0f62ec8..1d0f67f0 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -49,20 +49,21 @@ from ._exceptions import ( InternalError, RemoteActorError, StreamOverrun, - pack_error, + pack_from_raise, unpack_error, _raise_from_no_key_in_msg, ) from .log import get_logger from .msg import ( + Error, + MsgType, + MsgCodec, NamespacePath, - Msg, Return, Started, Stop, Yield, current_codec, - MsgCodec, pretty_struct, ) from ._ipc import Channel @@ -107,8 +108,7 @@ async def _drain_to_final_msg( # wait for a final context result by collecting (but # basically ignoring) any bi-dir-stream msgs still in transit # from the far end. - # pre_result_drained: list[dict] = [] - pre_result_drained: list[Msg] = [] + pre_result_drained: list[MsgType] = [] while not ( ctx.maybe_error and not ctx._final_result_is_set() @@ -168,7 +168,7 @@ async def _drain_to_final_msg( # pray to the `trio` gawds that we're corrent with this # msg: dict = await ctx._recv_chan.receive() - msg: Msg = await ctx._recv_chan.receive() + msg: MsgType = await ctx._recv_chan.receive() # always capture unexpected/non-result msgs pre_result_drained.append(msg) @@ -191,13 +191,12 @@ async def _drain_to_final_msg( raise match msg: + + # final result arrived! case Return( - cid=cid, + # cid=cid, pld=res, ): - # try: - # ctx._result: Any = msg['return'] - # ctx._result: Any = msg.pld ctx._result: Any = res log.runtime( 'Context delivered final draining msg:\n' @@ -210,13 +209,9 @@ async def _drain_to_final_msg( # TODO: ^ we don't need it right? break - # except KeyError: - # except AttributeError: + # far end task is still streaming to us so discard + # and report depending on local ctx state. case Yield(): - # if 'yield' in msg: - - # far end task is still streaming to us so discard - # and report per local context state. if ( (ctx._stream.closed and (reason := 'stream was already closed') @@ -257,45 +252,34 @@ async def _drain_to_final_msg( ) continue + # stream terminated, but no result yet.. + # # TODO: work out edge cases here where # a stream is open but the task also calls # this? # -[ ] should be a runtime error if a stream is open right? # Stop() case Stop(): - # elif 'stop' in msg: log.cancel( 'Remote stream terminated due to "stop" msg:\n\n' f'{pformat(msg)}\n' ) continue - # It's an internal error if any other msg type without - # a`'cid'` field arrives here! - case _: - # if not msg.get('cid'): - if not msg.cid: - raise InternalError( - 'Unexpected cid-missing msg?\n\n' - f'{msg}\n' - ) + # remote error msg, likely already handled inside + # `Context._deliver_msg()` + case Error(): - # XXX fallthrough to handle expected error XXX - # TODO: replace this with `ctx.maybe_raise()` + # TODO: can we replace this with `ctx.maybe_raise()`? + # -[ ] would this be handier for this case maybe? + # async with maybe_raise_on_exit() as raises: + # if raises: + # log.error('some msg about raising..') # - # TODO: would this be handier for this case maybe? - # async with maybe_raise_on_exit() as raises: - # if raises: - # log.error('some msg about raising..') - re: Exception|None = ctx._remote_error if re: - log.critical( - 'Remote ctx terminated due to "error" msg:\n' - f'{re}' - ) assert msg is ctx._cancel_msg - # NOTE: this solved a super dupe edge case XD + # NOTE: this solved a super duper edge case XD # this was THE super duper edge case of: # - local task opens a remote task, # - requests remote cancellation of far end @@ -312,9 +296,10 @@ async def _drain_to_final_msg( # does not re-raise any ctxc it receives # IFF **it** was the cancellation # requester.. - # will raise if necessary, ow break from - # loop presuming any error terminates the - # context! + # + # XXX will raise if necessary but ow break + # from loop presuming any supressed error + # (ctxc) should terminate the context! ctx._maybe_raise_remote_err( re, # NOTE: obvi we don't care if we @@ -338,6 +323,7 @@ async def _drain_to_final_msg( log.critical('SHOULD NEVER GET HERE!?') assert msg is ctx._cancel_msg assert error.msgdata == ctx._remote_error.msgdata + assert error.ipc_msg == ctx._remote_error.ipc_msg from .devx._debug import pause await pause() ctx._maybe_cancel_and_set_remote_error(error) @@ -346,6 +332,20 @@ async def _drain_to_final_msg( else: # bubble the original src key error raise + + # XXX should pretty much never get here unless someone + # overrides the default `MsgType` spec. + case _: + # It's definitely an internal error if any other + # msg type without a`'cid'` field arrives here! + if not msg.cid: + raise InternalError( + 'Unexpected cid-missing msg?\n\n' + f'{msg}\n' + ) + + raise RuntimeError('Unknown msg type: {msg}') + else: log.cancel( 'Skipping `MsgStream` drain since final outcome is set\n\n' @@ -1345,8 +1345,11 @@ class Context: # `._cancel_called == True`. not raise_overrun_from_self and isinstance(remote_error, RemoteActorError) - and remote_error.msgdata['boxed_type_str'] == 'StreamOverrun' - and tuple(remote_error.msgdata['sender']) == our_uid + + and remote_error.boxed_type_str == 'StreamOverrun' + + # and tuple(remote_error.msgdata['sender']) == our_uid + and tuple(remote_error.sender) == our_uid ): # NOTE: we set the local scope error to any "self # cancellation" error-response thus "absorbing" @@ -1415,16 +1418,11 @@ class Context: assert self._recv_chan raise_overrun: bool = not self._allow_overruns - # res_placeholder: int = id(self) if ( - # self._result == res_placeholder - # and not self._remote_error self.maybe_error is None - # not self._remote_error - # and not self._local_error - and not self._recv_chan._closed # type: ignore + and + not self._recv_chan._closed # type: ignore ): - # wait for a final context result/error by "draining" # (by more or less ignoring) any bi-dir-stream "yield" # msgs still in transit from the far end. @@ -1435,7 +1433,6 @@ class Context: for msg in drained_msgs: # TODO: mask this by default.. - # if 'return' in msg: if isinstance(msg, Return): # from .devx import pause # await pause() @@ -1451,6 +1448,9 @@ class Context: ) self.maybe_raise( + # NOTE: obvi we don't care if we + # overran the far end if we're already + # waiting on a final result (msg). raise_overrun_from_self=( raise_overrun and @@ -1461,34 +1461,12 @@ class Context: (not self._cancel_called) ) ) - # if ( - # (re := self._remote_error) - # # and self._result == res_placeholder - # ): - # self._maybe_raise_remote_err( - # re, - # # NOTE: obvi we don't care if we - # # overran the far end if we're already - # # waiting on a final result (msg). - # # raise_overrun_from_self=False, - # raise_overrun_from_self=( - # raise_overrun - # and - # # only when we ARE NOT the canceller - # # should we raise overruns, bc ow we're - # # raising something we know might happen - # # during cancellation ;) - # (not self._cancel_called) - # ), - # ) - # if maybe_err: - # self._result = maybe_err - return self.outcome - # TODO: switch this with above which should be named - # `.wait_for_outcome()` and instead do - # a `.outcome.Outcome.unwrap()` ? + # TODO: switch this with above! + # -[ ] should be named `.wait_for_outcome()` and instead do + # a `.outcome.Outcome.unwrap()` ? + # # @property # def result(self) -> Any|None: # if self._final_result_is_set(): @@ -1547,7 +1525,6 @@ class Context: return None def _final_result_is_set(self) -> bool: - # return not (self._result == id(self)) return self._result is not Unresolved # def get_result_nowait(self) -> Any|None: @@ -1764,8 +1741,7 @@ class Context: async def _deliver_msg( self, - # msg: dict, - msg: Msg, + msg: MsgType, ) -> bool: ''' @@ -1779,6 +1755,20 @@ class Context: `._scope_nursery: trio.Nursery`) which ensures that such messages are queued up and eventually sent if possible. + XXX RULES XXX + ------ - ------ + - NEVER raise remote errors from this method; a runtime task caller. + An error "delivered" to a ctx should always be raised by + the corresponding local task operating on the + `Portal`/`Context` APIs. + + - NEVER `return` early before delivering the msg! + bc if the error is a ctxc and there is a task waiting on + `.result()` we need the msg to be + `send_chan.send_nowait()`-ed over the `._recv_chan` so + that the error is relayed to that waiter task and thus + raised in user code! + ''' cid: str = self.cid chan: Channel = self.chan @@ -1809,28 +1799,14 @@ class Context: ) self._cancel_msg: dict = msg - # NOTE: this will not raise an error, merely set + # XXX NOTE: this will not raise an error, merely set # `._remote_error` and maybe cancel any task currently # entered in `Portal.open_context()` presuming the # error is "cancel causing" (i.e. a `ContextCancelled` # or `RemoteActorError`). self._maybe_cancel_and_set_remote_error(re) - # XXX NEVER do this XXX..!! - # bc if the error is a ctxc and there is a task - # waiting on `.result()` we need the msg to be sent - # over the `send_chan`/`._recv_chan` so that the error - # is relayed to that waiter task.. - # return True - # - # XXX ALSO NO!! XXX - # => NEVER raise remote errors from the calling - # runtime task, they should always be raised by - # consumer side tasks operating on the - # `Portal`/`Context` APIs. - # if self._remote_error: - # self._maybe_raise_remote_err(error) - + # XXX only case where returning early is fine! if self._in_overrun: log.warning( f'Queueing OVERRUN msg on caller task:\n' @@ -1949,31 +1925,27 @@ class Context: # anything different. return False else: - # txt += f'\n{msg}\n' # raise local overrun and immediately pack as IPC # msg for far end. - try: - raise StreamOverrun( + err_msg: Error = pack_from_raise( + local_err=StreamOverrun( txt, sender=from_uid, - ) - except StreamOverrun as err: - err_msg: dict[str, dict] = pack_error( - err, - cid=cid, - ) - try: - # relay condition to sender side remote task - await chan.send(err_msg) - return True + ), + cid=cid, + ) + try: + # relay condition to sender side remote task + await chan.send(err_msg) + return True - except trio.BrokenResourceError: - # XXX: local consumer has closed their side - # so cancel the far end streaming task - log.warning( - 'Channel for ctx is already closed?\n' - f'|_{chan}\n' - ) + # XXX: local consumer has closed their side of + # the IPC so cancel the far end streaming task + except trio.BrokenResourceError: + log.warning( + 'Channel for ctx is already closed?\n' + f'|_{chan}\n' + ) # ow, indicate unable to deliver by default return False @@ -2382,28 +2354,17 @@ async def open_context_from_portal( # an exception type boxed in a `RemoteActorError` # is returned (meaning it was obvi not raised) # that we want to log-report on. - msgdata: str|None = getattr( - result_or_err, - 'msgdata', - None - ) - match (msgdata, result_or_err): - case ( - {'tb_str': tbstr}, - ContextCancelled(), - ): - log.cancel(tbstr) + match result_or_err: + case ContextCancelled() as ctxc: + log.cancel(ctxc.tb_str) - case ( - {'tb_str': tbstr}, - RemoteActorError(), - ): + case RemoteActorError() as rae: log.exception( 'Context remotely errored!\n' f'<= peer: {uid}\n' f' |_ {nsf}()\n\n' - f'{tbstr}' + f'{rae.tb_str}' ) case (None, _): log.runtime( @@ -2413,7 +2374,6 @@ async def open_context_from_portal( f'`{result_or_err}`\n' ) - finally: # XXX: (MEGA IMPORTANT) if this is a root opened process we # wait for any immediate child in debug before popping the diff --git a/tractor/_portal.py b/tractor/_portal.py index 957eae59..e4db93a6 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -46,6 +46,7 @@ from ._state import ( from ._ipc import Channel from .log import get_logger from .msg import ( + Error, NamespacePath, Return, ) @@ -69,8 +70,7 @@ log = get_logger(__name__) # `._raise_from_no_key_in_msg()` (after tweak to # accept a `chan: Channel` arg) in key block! def _unwrap_msg( - # msg: dict[str, Any], - msg: Return, + msg: Return|Error, channel: Channel, hide_tb: bool = True, diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 435464be..f61ec80d 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -50,7 +50,6 @@ from pprint import pformat import signal import sys from typing import ( - Any, Callable, TYPE_CHECKING, ) -- 2.34.1 From 0b28b54e11c6a6b3e1efaf34905e28fe5a6f401b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 9 Apr 2024 13:58:10 -0400 Subject: [PATCH 039/190] Relay `MsgTypeError`s upward in RPC loop via `._deliver_ctx_payload()` --- tractor/_rpc.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index b494af2b..c9754ebc 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -47,12 +47,13 @@ from ._context import ( Context, ) from ._exceptions import ( - ModuleNotExposed, - is_multi_cancelled, ContextCancelled, + ModuleNotExposed, + MsgTypeError, + TransportClosed, + is_multi_cancelled, pack_error, unpack_error, - TransportClosed, ) from .devx import _debug from . import _state @@ -632,7 +633,7 @@ async def _invoke( # (callee) task, so relay this cancel signal to the # other side. ctxc = ContextCancelled( - msg, + message=msg, boxed_type=trio.Cancelled, canceller=canceller, ) @@ -822,7 +823,12 @@ async def process_messages( | Stop(cid=cid) | Return(cid=cid) | CancelAck(cid=cid) - | Error(cid=cid) # RPC-task ctx specific + + # `.cid` means RPC-ctx-task specific + | Error(cid=cid) + + # recv-side `MsgType` decode violation + | MsgTypeError(cid=cid) ): # deliver response to local caller/waiter # via its per-remote-context memory channel. -- 2.34.1 From 0653a70f2ba9cc73244a9374b1c737c13a95f6b4 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 11 Apr 2024 20:23:55 -0400 Subject: [PATCH 040/190] Add msg-from-dict constructor helper Handy for re-constructing a struct-`MsgType` from a `dict` decoded from wire-bytes wherein the msg failed to decode normally due to a field type error but you'd still like to show the "potential" msg in struct form, say inside a `MsgTypeError`'s meta data. Supporting deats: - add a `.msg.types.from_dict_msg()` to implement it (the helper). - also a `.msg.types._msg_table: dict[str, MsgType]` for supporting this func ^ as well as providing just a general `MsgType`-by-`str`-name lookup. Unrelated: - Drop commented idea for still supporting `dict`-msg set via `enc/dec_hook()`s that would translate to/from `MsgType`s, but that would require a duplicate impl in the runtime.. so eff that XD --- tractor/msg/pretty_struct.py | 1 + tractor/msg/types.py | 117 ++++++++++++----------------------- 2 files changed, 40 insertions(+), 78 deletions(-) diff --git a/tractor/msg/pretty_struct.py b/tractor/msg/pretty_struct.py index 412b6ed6..a67bbd26 100644 --- a/tractor/msg/pretty_struct.py +++ b/tractor/msg/pretty_struct.py @@ -140,6 +140,7 @@ class Struct( return sin_props + # TODO: make thisi a mod-func! def pformat( self, field_indent: int = 2, diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 14db09cd..9787504b 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -447,6 +447,29 @@ class Error( _msg_dict: dict|None = None +def from_dict_msg( + dict_msg: dict, + + msgT: MsgType|None = None, + tag_field: str = 'msg_type' + +) -> MsgType: + ''' + Helper to build a specific `MsgType` struct from + a "vanilla" decoded `dict`-ified equivalent of the + msg: i.e. if the `msgpack.Decoder.type == Any`. + + ''' + msg_type_tag_field: str = ( + msgT.__struct_config__.tag_field + if msgT is not None + else tag_field + ) + # XXX ensure tag field is removed + msgT_name: str = dict_msg.pop(msg_type_tag_field) + msgT: MsgType = _msg_table[msgT_name] + return msgT(**dict_msg) + # TODO: should be make a msg version of `ContextCancelled?` # and/or with a scope field or a full `ActorCancelled`? # class Cancelled(Msg): @@ -498,12 +521,18 @@ _payload_msgs: list[Msg] = [ # built-in SC shuttle protocol msg type set in # approx order of the IPC txn-state spaces. -__msg_types__: list[Msg] = ( +__msg_types__: list[MsgType] = ( _runtime_msgs + _payload_msgs ) + +_msg_table: dict[str, MsgType] = { + msgT.__name__: msgT + for msgT in __msg_types__ +} + # TODO: use new type declaration syntax for msg-type-spec # https://docs.python.org/3/library/typing.html#type-aliases # https://docs.python.org/3/reference/simple_stmts.html#type @@ -660,6 +689,11 @@ def mk_msg_spec( 'Generating new IPC msg-spec\n' f'{ipc_spec}\n' ) + assert ( + ipc_spec + and + ipc_spec is not Any + ) return ( ipc_spec, msgtypes_table[spec_build_method] + ipc_msg_types, @@ -669,9 +703,9 @@ def mk_msg_spec( # TODO: make something similar to this inside `._codec` such that # user can just pass a type table of some sort? # -[ ] we would need to decode all msgs to `pretty_struct.Struct` -# and then call `.to_dict()` on them? +# and then call `.to_dict()` on them? # -[ ] we're going to need to re-impl all the stuff changed in the -# runtime port such that it can handle dicts or `Msg`s? +# runtime port such that it can handle dicts or `Msg`s? # # def mk_dict_msg_codec_hooks() -> tuple[Callable, Callable]: # ''' @@ -679,88 +713,15 @@ def mk_msg_spec( # manual convertion from our above native `Msg` set # to `dict` equivalent (wire msgs) in order to keep legacy compat # with the original runtime implementation. - +# # Note: this is is/was primarly used while moving the core # runtime over to using native `Msg`-struct types wherein we # start with the send side emitting without loading # a typed-decoder and then later flipping the switch over to # load to the native struct types once all runtime usage has # been adjusted appropriately. - +# # ''' -# def enc_to_dict(msg: Any) -> Any: -# ''' -# Encode `Msg`-structs to `dict` msgs instead -# of using `msgspec.msgpack.Decoder.type`-ed -# features. - -# ''' -# match msg: -# case Start(): -# dctmsg: dict = pretty_struct.Struct.to_dict( -# msg -# )['pld'] - -# case Error(): -# dctmsg: dict = pretty_struct.Struct.to_dict( -# msg -# )['pld'] -# return {'error': dctmsg} - - -# def dec_from_dict( -# type: Type, -# obj: Any, -# ) -> Any: -# ''' -# Decode to `Msg`-structs from `dict` msgs instead -# of using `msgspec.msgpack.Decoder.type`-ed -# features. - -# ''' -# cid: str = obj.get('cid') -# match obj: -# case {'cmd': pld}: -# return Start( -# cid=cid, -# pld=pld, -# ) -# case {'functype': pld}: -# return StartAck( -# cid=cid, -# functype=pld, -# # pld=IpcCtxSpec( -# # functype=pld, -# # ), -# ) -# case {'started': pld}: -# return Started( -# cid=cid, -# pld=pld, -# ) -# case {'yield': pld}: -# return Yield( -# cid=obj['cid'], -# pld=pld, -# ) -# case {'stop': pld}: -# return Stop( -# cid=cid, -# ) -# case {'return': pld}: -# return Return( -# cid=cid, -# pld=pld, -# ) - -# case {'error': pld}: -# return Error( -# cid=cid, -# pld=ErrorData( -# **pld -# ), -# ) - # return ( # # enc_to_dict, # dec_from_dict, -- 2.34.1 From d716d8b6b420c7ad77aa0861194b8063e418079c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 11 Apr 2024 20:42:54 -0400 Subject: [PATCH 041/190] Expose `tractor.msg.PayloadT` from subpkg --- tractor/msg/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index 443b781b..8f13f5f8 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -53,6 +53,9 @@ from .types import ( Error as Error, + # type-var for `.pld` field + PayloadT as PayloadT, + # full msg class set from above as list __msg_types__ as __msg_types__, -- 2.34.1 From 1567dfc3e26e9f508525203362ca445b5bc059f6 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 11 Apr 2024 21:04:48 -0400 Subject: [PATCH 042/190] Add custom `MsgCodec.__repr__()` Sure makes console grokability a lot better by showing only the customizeable fields. Further, clean up `mk_codec()` a bunch by removing the `ipc_msg_spec` param since we don't plan to support another msg-set (for now) which allows cleaning out a buncha logic that was mostly just a source of bugs.. Also, - add temporary `log.info()` around codec application. - throw in some sanity `assert`s to `limit_msg_spec()`. - add but mask out the `extend_msg_spec()` idea since it seems `msgspec` won't allow `Decoder.type` extensions when using a custom `dec_hook()` for some extension type.. (not sure what approach to take here yet). --- tractor/msg/_codec.py | 137 +++++++++++++++++++++++++----------------- 1 file changed, 83 insertions(+), 54 deletions(-) diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index de3316c8..e117457f 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -37,6 +37,7 @@ from contextlib import ( # ContextVar, # Token, # ) +import textwrap from typing import ( Any, Callable, @@ -59,7 +60,9 @@ from tractor.msg.types import ( mk_msg_spec, MsgType, ) +from tractor.log import get_logger +log = get_logger(__name__) # TODO: overall IPC msg-spec features (i.e. in this mod)! # @@ -87,6 +90,27 @@ class MsgCodec(Struct): pld_spec: Union[Type[Struct]]|None + def __repr__(self) -> str: + speclines: str = textwrap.indent( + self.pformat_msg_spec(), + prefix=' '*3, + ) + body: str = textwrap.indent( + f'|_lib = {self.lib.__name__!r}\n' + f'|_enc_hook: {self.enc.enc_hook}\n' + f'|_dec_hook: {self.dec.dec_hook}\n' + f'|_pld_spec: {self.pld_spec_str}\n' + # f'|\n' + f'|__msg_spec__:\n' + f'{speclines}\n', + prefix=' '*2, + ) + return ( + f'<{type(self).__name__}(\n' + f'{body}' + ')>' + ) + @property def pld_spec_str(self) -> str: spec: Union[Type]|Type = self.pld_spec @@ -163,8 +187,8 @@ class MsgCodec(Struct): ) -> bytes: ''' - Encode input python objects to `msgpack` bytes for transfer - on a tranport protocol connection. + Encode input python objects to `msgpack` bytes for + transfer on a tranport protocol connection. ''' return self._enc.encode(py_obj) @@ -325,15 +349,9 @@ class MsgCodec(Struct): def mk_codec( - ipc_msg_spec: Union[Type[Struct]]|Any|None = None, - # - # ^TODO^: in the long run, do we want to allow using a diff IPC `Msg`-set? - # it would break the runtime, but maybe say if you wanted - # to add some kinda field-specific or wholesale `.pld` ecryption? - # struct type unions set for `Decoder` # https://jcristharif.com/msgspec/structs.html#tagged-unions - ipc_pld_spec: Union[Type[Struct]]|Any|None = None, + ipc_pld_spec: Union[Type[Struct]]|Any = Any, # TODO: offering a per-msg(-field) type-spec such that # the fields can be dynamically NOT decoded and left as `Raw` @@ -352,7 +370,6 @@ def mk_codec( dec_hook: Callable|None = None, enc_hook: Callable|None = None, # ------ - ------ - **kwargs, # # Encoder: # write_buffer_size=write_buffer_size, @@ -367,44 +384,19 @@ def mk_codec( `msgspec` ;). ''' - if ( - ipc_msg_spec is not None - and ipc_pld_spec - ): - raise RuntimeError( - f'If a payload spec is provided,\n' - "the builtin SC-shuttle-protocol's msg set\n" - f'(i.e. a `{MsgType}`) MUST be used!\n\n' - f'However both values were passed as => mk_codec(\n' - f' ipc_msg_spec={ipc_msg_spec}`\n' - f' ipc_pld_spec={ipc_pld_spec}`\n)\n' - ) - - elif ( - ipc_pld_spec - and - - # XXX required for now (or maybe forever?) until - # we can dream up a way to allow parameterizing and/or - # custom overrides to the `Msg`-spec protocol itself? - ipc_msg_spec is None - ): - # (manually) generate a msg-payload-spec for all relevant - # god-boxing-msg subtypes, parameterizing the `Msg.pld: PayloadT` - # for the decoder such that all sub-type msgs in our SCIPP - # will automatically decode to a type-"limited" payload (`Struct`) - # object (set). - ( - ipc_msg_spec, - msg_types, - ) = mk_msg_spec( - payload_type_union=ipc_pld_spec, - ) - assert len(ipc_msg_spec.__args__) == len(msg_types) - assert ipc_msg_spec - - else: - ipc_msg_spec = ipc_msg_spec or Any + # (manually) generate a msg-payload-spec for all relevant + # god-boxing-msg subtypes, parameterizing the `Msg.pld: PayloadT` + # for the decoder such that all sub-type msgs in our SCIPP + # will automatically decode to a type-"limited" payload (`Struct`) + # object (set). + ( + ipc_msg_spec, + msg_types, + ) = mk_msg_spec( + payload_type_union=ipc_pld_spec, + ) + assert len(ipc_msg_spec.__args__) == len(msg_types) + assert ipc_msg_spec enc = msgpack.Encoder( enc_hook=enc_hook, @@ -418,8 +410,6 @@ def mk_codec( _enc=enc, _dec=dec, pld_spec=ipc_pld_spec, - # payload_msg_specs=payload_msg_specs, - # **kwargs, ) # sanity on expected backend support @@ -500,8 +490,16 @@ def apply_codec( - https://github.com/oremanj/tricycle/blob/master/tricycle/_tests/test_tree_var.py ''' + __tracebackhide__: bool = True orig: MsgCodec = _ctxvar_MsgCodec.get() assert orig is not codec + if codec.pld_spec is None: + breakpoint() + + log.info( + 'Applying new msg-spec codec\n\n' + f'{codec}\n' + ) token: RunVarToken = _ctxvar_MsgCodec.set(codec) # TODO: for TreeVar approach, see docs for @cm `.being()` API: @@ -518,7 +516,10 @@ def apply_codec( _ctxvar_MsgCodec.reset(token) assert _ctxvar_MsgCodec.get() is orig - + log.info( + 'Reverted to last msg-spec codec\n\n' + f'{orig}\n' + ) def current_codec() -> MsgCodec: ''' @@ -532,14 +533,15 @@ def current_codec() -> MsgCodec: @cm def limit_msg_spec( - payload_types: Union[Type[Struct]], + payload_spec: Union[Type[Struct]], # TODO: don't need this approach right? # -> related to the `MsgCodec._payload_decs` stuff above.. # tagged_structs: list[Struct]|None = None, **codec_kwargs, -): + +) -> MsgCodec: ''' Apply a `MsgCodec` that will natively decode the SC-msg set's `Msg.pld: Union[Type[Struct]]` payload fields using @@ -547,10 +549,37 @@ def limit_msg_spec( for all IPC contexts in use by the current `trio.Task`. ''' + __tracebackhide__: bool = True + curr_codec = current_codec() msgspec_codec: MsgCodec = mk_codec( - payload_types=payload_types, + ipc_pld_spec=payload_spec, **codec_kwargs, ) with apply_codec(msgspec_codec) as applied_codec: assert applied_codec is msgspec_codec yield msgspec_codec + + assert curr_codec is current_codec() + + +# XXX: msgspec won't allow this with non-struct custom types +# like `NamespacePath`!@! +# @cm +# def extend_msg_spec( +# payload_spec: Union[Type[Struct]], + +# ) -> MsgCodec: +# ''' +# Extend the current `MsgCodec.pld_spec` (type set) by extending +# the payload spec to **include** the types specified by +# `payload_spec`. + +# ''' +# codec: MsgCodec = current_codec() +# pld_spec: Union[Type] = codec.pld_spec +# extended_spec: Union[Type] = pld_spec|payload_spec + +# with limit_msg_spec(payload_types=extended_spec) as ext_codec: +# # import pdbp; pdbp.set_trace() +# assert ext_codec.pld_spec == extended_spec +# yield ext_codec -- 2.34.1 From 9e2133e3be69525aef75af5010f33be4d7aca07f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 11 Apr 2024 21:24:02 -0400 Subject: [PATCH 043/190] Tweak some `pformat_boxed_tb()` indent inputs - add some `tb_str: str` indent-prefix args for diff indent levels for the body vs. the surrounding "ascii box". - ^-use it-^ from `RemoteActorError.__repr()__` obvi. - use new `msg.types.from_dict_msg()` in impl of `MsgTypeError.payload_msg`, handy for showing what the message "would have looked like in `Struct` form" had it not failed it's type constraints. --- tractor/_exceptions.py | 73 ++++++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 28 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index a31aa11e..31b7b36e 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -40,7 +40,7 @@ from tractor._state import current_actor from tractor.log import get_logger from tractor.msg import ( Error, - Msg, + MsgType, Stop, Yield, pretty_struct, @@ -130,7 +130,10 @@ def pformat_boxed_tb( tb_str: str, fields_str: str|None = None, field_prefix: str = ' |_', - indent: str = ' '*2 + + tb_box_indent: int|None = None, + tb_body_indent: int = 1, + ) -> str: if ( fields_str @@ -139,15 +142,19 @@ def pformat_boxed_tb( ): fields: str = textwrap.indent( fields_str, - # prefix=' '*2, - # prefix=' |_', prefix=field_prefix, ) else: fields = fields_str or '' - # body_indent: str = len(field_prefix) * ' ' - body: str = ( + tb_body = tb_str + if tb_body_indent: + tb_body: str = textwrap.indent( + tb_str, + prefix=tb_body_indent * ' ', + ) + + tb_box: str = ( # orig # f' |\n' @@ -158,21 +165,29 @@ def pformat_boxed_tb( f'|\n' f' ------ - ------\n\n' - f'{tb_str}\n' + # f'{tb_str}\n' + f'{tb_body}' f' ------ - ------\n' f'_|\n' ) - if len(indent): - body: str = textwrap.indent( - body, - # prefix=body_indent, - prefix=indent, + tb_box_indent: str = ( + tb_box_indent + or + 1 + + # (len(field_prefix)) + # ? ^-TODO-^ ? if you wanted another indent level + ) + if tb_box_indent > 0: + tb_box: str = textwrap.indent( + tb_box, + prefix=tb_box_indent * ' ', ) return ( fields + - body + tb_box ) @@ -316,7 +331,7 @@ class RemoteActorError(Exception): if self._ipc_msg is None: return None - msg_type: Msg = type(self._ipc_msg) + msg_type: MsgType = type(self._ipc_msg) fields: dict[str, Any] = { k: v for _, k, v in pretty_struct.iter_fields(self._ipc_msg) @@ -493,7 +508,10 @@ class RemoteActorError(Exception): tb_str=self.tb_str, fields_str=fields, field_prefix=' |_', - indent=' ', # no indent? + # ^- is so that it's placed like so, + # just after dict[str, Any]: ''' - If the underlying IPC `Msg` was received from a remote + If the underlying IPC `MsgType` was received from a remote actor but was unable to be decoded to a native `Yield`|`Started`|`Return` struct, the interchange backend native format decoder can be used to stash a `dict` @@ -643,22 +661,21 @@ class MsgTypeError( return self.msgdata.get('_msg_dict') @property - def payload_msg(self) -> Msg|None: + def payload_msg( + self, + ) -> MsgType|None: ''' Attempt to construct what would have been the original - `Msg`-with-payload subtype (i.e. an instance from the set + `MsgType`-with-payload subtype (i.e. an instance from the set of msgs in `.msg.types._payload_msgs`) which failed validation. ''' - msg_dict: dict = self.msg_dict.copy() - name: str = msg_dict.pop('msg_type') - msg_type: Msg = getattr( - msgtypes, - name, - Msg, - ) - return msg_type(**msg_dict) + if msg_dict := self.msg_dict.copy(): + return msgtypes.from_dict_msg( + dict_msg=msg_dict, + ) + return None @property def cid(self) -> str: @@ -908,7 +925,7 @@ def is_multi_cancelled(exc: BaseException) -> bool: def _raise_from_no_key_in_msg( ctx: Context, - msg: Msg, + msg: MsgType, src_err: KeyError, log: StackLevelAdapter, # caller specific `log` obj -- 2.34.1 From aa4a4be668fb65ec6efdd0851885191b0fe1555a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 12 Apr 2024 11:47:10 -0400 Subject: [PATCH 044/190] TOSQUASH 322e015d Fix `mk_codec()` input arg --- tractor/msg/_codec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index e117457f..82fd2011 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -420,7 +420,7 @@ def mk_codec( # instance of the default `msgspec.msgpack` codec settings, i.e. # no custom structs, hooks or other special types. -_def_msgspec_codec: MsgCodec = mk_codec(ipc_msg_spec=Any) +_def_msgspec_codec: MsgCodec = mk_codec(ipc_pld_spec=Any) # The built-in IPC `Msg` spec. # Our composing "shuttle" protocol which allows `tractor`-app code -- 2.34.1 From 8690a88e50dbf9078884fe8084e687262ec56e55 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 12 Apr 2024 11:49:50 -0400 Subject: [PATCH 045/190] Extend recv-side `MsgTypeError` default message Display the new `MsgCodec.pld_spec_str` and format the incorrect field value to be placed entirely (txt block wise) right of the "type annot" part of the line: Iow if you had a bad `dict` value where something else should be it'd look something like this: MsgTypeError: + import textwrap + # `Channel.send()` case if src_validation_error is None: # send-side @@ -209,10 +211,24 @@ def _mk_msg_type_err( msg, _, maybe_field = msgspec_msg.rpartition('$.') obj = object() if (field_val := msg_dict.get(maybe_field, obj)) is not obj: + field_name_expr: str = ( + f' |_{maybe_field}: {codec.pld_spec_str} = ' + ) + fmt_val_lines: list[str] = pformat(field_val).splitlines() + fmt_val: str = ( + f'{fmt_val_lines[0]}\n' + + + textwrap.indent( + '\n'.join(fmt_val_lines[1:]), + prefix=' '*len(field_name_expr), + ) + ) message += ( f'{msg.rstrip("`")}\n\n' - f'{msg_type}\n' - f' |_.{maybe_field}: {codec.pld_spec_str} = {field_val!r}\n' + f'<{msg_type.__qualname__}(\n' + # f'{".".join([msg_type.__module__, msg_type.__qualname__])}\n' + f'{field_name_expr}{fmt_val}\n' + f')>' ) msgtyperr = MsgTypeError.from_decode( @@ -338,7 +354,7 @@ class MsgpackTCPStream(MsgTransport): # self._task = task self._codec = codec log.runtime( - 'Using new codec in {self}.recv()\n' + f'Using new codec in {self}.recv()\n' f'codec: {self._codec}\n\n' f'msg_bytes: {msg_bytes}\n' ) @@ -420,7 +436,7 @@ class MsgpackTCPStream(MsgTransport): if self._codec.pld_spec != codec.pld_spec: self._codec = codec log.runtime( - 'Using new codec in {self}.send()\n' + f'Using new codec in {self}.send()\n' f'codec: {self._codec}\n\n' f'msg: {msg}\n' ) -- 2.34.1 From 26a3ff6b37a26c28510bc7460fb6ba0597d3a0d2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 13 Apr 2024 15:19:08 -0400 Subject: [PATCH 046/190] IPC ctx refinements around `MsgTypeError` awareness Add a bit of special handling for msg-type-errors with a dedicated log-msg detailing which `.side: str` is the sender/causer and avoiding a `._scope.cancel()` call in such cases since the local task might be written to handle and tolerate the badly (typed) IPC msg. As part of ^, change the ctx task-pair "side" semantics from "caller" -> "callee" to be "parent" -> "child" which better matches the cross-process SC-linked-task supervision hierarchy, and `trio.Nursery.parent_task`; in `trio` the task that opens a nursery is also named the "parent". Impl deats / fixes around the `.side` semantics: - ensure that `._portal: Portal` is set ASAP after `Actor.start_remote_task()` such that if the `Started` transaction fails, the parent-vs.-child sides are still denoted correctly (since `._portal` being set is the predicate for that). - add a helper func `Context.peer_side(side: str) -> str:` which inverts from "child" to "parent" and vice versa, useful for logging info. Other tweaks: - make `_drain_to_final_msg()` return a tuple of a maybe-`Return` and the list of other `pre_result_drained: list[MsgType]` such that we don't ever have to warn about the return msg getting captured as a pre-"result" msg. - Add some strictness flags to `.started()` which allow for toggling whether to error or warn log about mismatching roundtripped `Started` msgs prior to IPC transit. --- tractor/_context.py | 179 ++++++++++++++++++++++++++++++++------------ 1 file changed, 132 insertions(+), 47 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 1d0f67f0..2d9e6363 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -47,6 +47,7 @@ import trio from ._exceptions import ( ContextCancelled, InternalError, + MsgTypeError, RemoteActorError, StreamOverrun, pack_from_raise, @@ -59,12 +60,14 @@ from .msg import ( MsgType, MsgCodec, NamespacePath, + PayloadT, Return, Started, Stop, Yield, current_codec, pretty_struct, + types as msgtypes, ) from ._ipc import Channel from ._streaming import MsgStream @@ -88,7 +91,10 @@ async def _drain_to_final_msg( hide_tb: bool = True, msg_limit: int = 6, -) -> list[dict]: +) -> tuple[ + Return|None, + list[MsgType] +]: ''' Drain IPC msgs delivered to the underlying rx-mem-chan `Context._recv_chan` from the runtime in search for a final @@ -109,6 +115,7 @@ async def _drain_to_final_msg( # basically ignoring) any bi-dir-stream msgs still in transit # from the far end. pre_result_drained: list[MsgType] = [] + return_msg: Return|None = None while not ( ctx.maybe_error and not ctx._final_result_is_set() @@ -169,8 +176,6 @@ async def _drain_to_final_msg( # pray to the `trio` gawds that we're corrent with this # msg: dict = await ctx._recv_chan.receive() msg: MsgType = await ctx._recv_chan.receive() - # always capture unexpected/non-result msgs - pre_result_drained.append(msg) # NOTE: we get here if the far end was # `ContextCancelled` in 2 cases: @@ -207,11 +212,13 @@ async def _drain_to_final_msg( # if ctx._recv_chan: # await ctx._recv_chan.aclose() # TODO: ^ we don't need it right? + return_msg = msg break # far end task is still streaming to us so discard # and report depending on local ctx state. case Yield(): + pre_result_drained.append(msg) if ( (ctx._stream.closed and (reason := 'stream was already closed') @@ -236,7 +243,10 @@ async def _drain_to_final_msg( f'{pformat(msg)}\n' ) - return pre_result_drained + return ( + return_msg, + pre_result_drained, + ) # drain up to the `msg_limit` hoping to get # a final result or error/ctxc. @@ -260,6 +270,7 @@ async def _drain_to_final_msg( # -[ ] should be a runtime error if a stream is open right? # Stop() case Stop(): + pre_result_drained.append(msg) log.cancel( 'Remote stream terminated due to "stop" msg:\n\n' f'{pformat(msg)}\n' @@ -269,7 +280,6 @@ async def _drain_to_final_msg( # remote error msg, likely already handled inside # `Context._deliver_msg()` case Error(): - # TODO: can we replace this with `ctx.maybe_raise()`? # -[ ] would this be handier for this case maybe? # async with maybe_raise_on_exit() as raises: @@ -336,6 +346,7 @@ async def _drain_to_final_msg( # XXX should pretty much never get here unless someone # overrides the default `MsgType` spec. case _: + pre_result_drained.append(msg) # It's definitely an internal error if any other # msg type without a`'cid'` field arrives here! if not msg.cid: @@ -352,7 +363,10 @@ async def _drain_to_final_msg( f'{ctx.outcome}\n' ) - return pre_result_drained + return ( + return_msg, + pre_result_drained, + ) class Unresolved: @@ -719,21 +733,36 @@ class Context: Return string indicating which task this instance is wrapping. ''' - return 'caller' if self._portal else 'callee' + return 'parent' if self._portal else 'child' + @staticmethod + def peer_side(side: str) -> str: + match side: + case 'child': + return 'parent' + case 'parent': + return 'child' + + # TODO: remove stat! + # -[ ] re-implement the `.experiemental._pubsub` stuff + # with `MsgStream` and that should be last usage? + # -[ ] remove from `tests/legacy_one_way_streaming.py`! async def send_yield( self, data: Any, - ) -> None: + ''' + Deprecated method for what now is implemented in `MsgStream`. + We need to rework / remove some stuff tho, see above. + + ''' warnings.warn( "`Context.send_yield()` is now deprecated. " "Use ``MessageStream.send()``. ", DeprecationWarning, stacklevel=2, ) - # await self.chan.send({'yield': data, 'cid': self.cid}) await self.chan.send( Yield( cid=self.cid, @@ -742,12 +771,11 @@ class Context: ) async def send_stop(self) -> None: - # await pause() - # await self.chan.send({ - # # Stop( - # 'stop': True, - # 'cid': self.cid - # }) + ''' + Terminate a `MsgStream` dialog-phase by sending the IPC + equiv of a `StopIteration`. + + ''' await self.chan.send( Stop(cid=self.cid) ) @@ -843,6 +871,7 @@ class Context: # self-cancel (ack) or, # peer propagated remote cancellation. + msgtyperr: bool = False if isinstance(error, ContextCancelled): whom: str = ( @@ -854,6 +883,16 @@ class Context: f'{error}' ) + elif isinstance(error, MsgTypeError): + msgtyperr = True + peer_side: str = self.peer_side(self.side) + log.error( + f'IPC dialog error due to msg-type caused by {peer_side!r} side\n\n' + + f'{error}\n' + f'{pformat(self)}\n' + ) + else: log.error( f'Remote context error:\n\n' @@ -894,9 +933,9 @@ class Context: # if `._cancel_called` then `.cancel_acked and .cancel_called` # always should be set. and not self._is_self_cancelled() - and not cs.cancel_called and not cs.cancelled_caught + and not msgtyperr ): # TODO: it'd sure be handy to inject our own # `trio.Cancelled` subtype here ;) @@ -1004,7 +1043,7 @@ class Context: # when the runtime finally receives it during teardown # (normally in `.result()` called from # `Portal.open_context().__aexit__()`) - if side == 'caller': + if side == 'parent': if not self._portal: raise InternalError( 'No portal found!?\n' @@ -1426,7 +1465,10 @@ class Context: # wait for a final context result/error by "draining" # (by more or less ignoring) any bi-dir-stream "yield" # msgs still in transit from the far end. - drained_msgs: list[dict] = await _drain_to_final_msg( + ( + return_msg, + drained_msgs, + ) = await _drain_to_final_msg( ctx=self, hide_tb=hide_tb, ) @@ -1444,7 +1486,10 @@ class Context: log.cancel( 'Ctx drained pre-result msgs:\n' - f'{pformat(drained_msgs)}' + f'{pformat(drained_msgs)}\n\n' + + f'Final return msg:\n' + f'{return_msg}\n' ) self.maybe_raise( @@ -1611,7 +1656,13 @@ class Context: async def started( self, - value: Any | None = None + + # TODO: how to type this so that it's the + # same as the payload type? Is this enough? + value: PayloadT|None = None, + + strict_parity: bool = False, + complain_no_parity: bool = True, ) -> None: ''' @@ -1632,7 +1683,7 @@ class Context: f'called `.started()` twice on context with {self.chan.uid}' ) - started = Started( + started_msg = Started( cid=self.cid, pld=value, ) @@ -1653,28 +1704,54 @@ class Context: # https://zguide.zeromq.org/docs/chapter7/#The-Cheap-or-Nasty-Pattern # codec: MsgCodec = current_codec() - msg_bytes: bytes = codec.encode(started) + msg_bytes: bytes = codec.encode(started_msg) try: # be a "cheap" dialog (see above!) - rt_started = codec.decode(msg_bytes) - if rt_started != started: + if ( + strict_parity + or + complain_no_parity + ): + rt_started: Started = codec.decode(msg_bytes) - # TODO: break these methods out from the struct subtype? - diff = pretty_struct.Struct.__sub__(rt_started, started) + # XXX something is prolly totes cucked with the + # codec state! + if isinstance(rt_started, dict): + rt_started = msgtypes.from_dict_msg( + dict_msg=rt_started, + ) + raise RuntimeError( + 'Failed to roundtrip `Started` msg?\n' + f'{pformat(rt_started)}\n' + ) - complaint: str = ( - 'Started value does not match after codec rountrip?\n\n' - f'{diff}' - ) - # TODO: rn this will pretty much always fail with - # any other sequence type embeded in the - # payload... - if self._strict_started: - raise ValueError(complaint) - else: - log.warning(complaint) + if rt_started != started_msg: + # TODO: break these methods out from the struct subtype? - await self.chan.send(rt_started) + diff = pretty_struct.Struct.__sub__( + rt_started, + started_msg, + ) + complaint: str = ( + 'Started value does not match after codec rountrip?\n\n' + f'{diff}' + ) + + # TODO: rn this will pretty much always fail with + # any other sequence type embeded in the + # payload... + if ( + self._strict_started + or + strict_parity + ): + raise ValueError(complaint) + else: + log.warning(complaint) + + # started_msg = rt_started + + await self.chan.send(started_msg) # raise any msg type error NO MATTER WHAT! except msgspec.ValidationError as verr: @@ -1685,7 +1762,7 @@ class Context: src_validation_error=verr, verb_header='Trying to send payload' # > 'invalid `Started IPC msgs\n' - ) + ) from verr self._started_called = True @@ -1786,13 +1863,17 @@ class Context: else: log_meth = log.runtime - log_meth( - f'Delivering error-msg to caller\n\n' + side: str = self.side - f'<= peer: {from_uid}\n' + peer_side: str = self.peer_side(side) + + log_meth( + f'Delivering IPC ctx error from {peer_side!r} to {side!r} task\n\n' + + f'<= peer {peer_side!r}: {from_uid}\n' f' |_ {nsf}()\n\n' - f'=> cid: {cid}\n' + f'=> {side!r} cid: {cid}\n' f' |_{self._task}\n\n' f'{pformat(re)}\n' @@ -1807,6 +1888,7 @@ class Context: self._maybe_cancel_and_set_remote_error(re) # XXX only case where returning early is fine! + structfmt = pretty_struct.Struct.pformat if self._in_overrun: log.warning( f'Queueing OVERRUN msg on caller task:\n' @@ -1816,7 +1898,7 @@ class Context: f'=> cid: {cid}\n' f' |_{self._task}\n\n' - f'{pformat(msg)}\n' + f'{structfmt(msg)}\n' ) self._overflow_q.append(msg) return False @@ -1830,7 +1912,7 @@ class Context: f'=> {self._task}\n' f' |_cid={self.cid}\n\n' - f'{pformat(msg)}\n' + f'{structfmt(msg)}\n' ) # NOTE: if an error is deteced we should always still @@ -2050,6 +2132,9 @@ async def open_context_from_portal( # place.. allow_overruns=allow_overruns, ) + # ASAP, so that `Context.side: str` can be determined for + # logging / tracing / debug! + ctx._portal: Portal = portal assert ctx._remote_func_type == 'context' msg: Started = await ctx._recv_chan.receive() @@ -2068,10 +2153,10 @@ async def open_context_from_portal( msg=msg, src_err=src_error, log=log, - expect_key='started', + expect_msg=Started, + # expect_key='started', ) - ctx._portal: Portal = portal uid: tuple = portal.channel.uid cid: str = ctx.cid -- 2.34.1 From 97b9d417d22a005ad1b18902618e9fd97ce056b5 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 14 Apr 2024 16:29:21 -0400 Subject: [PATCH 047/190] Add `from_dict_msg(user_pretty: bool)` flag Allows for optionally (and dynamically) constructing the "expected" `MsgType` from a `dict` into a `pretty_struct.Struct`, mostly for logging usage. --- tractor/msg/types.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 9787504b..f7654f62 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -451,7 +451,8 @@ def from_dict_msg( dict_msg: dict, msgT: MsgType|None = None, - tag_field: str = 'msg_type' + tag_field: str = 'msg_type', + use_pretty: bool = False, ) -> MsgType: ''' @@ -468,6 +469,19 @@ def from_dict_msg( # XXX ensure tag field is removed msgT_name: str = dict_msg.pop(msg_type_tag_field) msgT: MsgType = _msg_table[msgT_name] + if use_pretty: + msgT = defstruct( + name=msgT_name, + fields=[ + (key, fi.type) + for fi, key, _ + in pretty_struct.iter_fields(msgT) + ], + bases=( + pretty_struct.Struct, + msgT, + ), + ) return msgT(**dict_msg) # TODO: should be make a msg version of `ContextCancelled?` -- 2.34.1 From 71693ac3ddfc1a7b5a85f7e4caa4ed8ff859f97a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 14 Apr 2024 16:32:18 -0400 Subject: [PATCH 048/190] Add `MsgTypeError.expected_msg_type` Which matches with renaming `.payload_msg` -> `.expected_msg` which is the value we attempt to construct from a vanilla-msgppack decode-to-`dict` and then construct manually into a `MsgType` using `.msg.types.from_dict_msg()`. Add a todo to use new `use_pretty` flag which currently conflicts with `._exceptions.pformat_boxed_type()` prefix formatting.. --- tractor/_exceptions.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 31b7b36e..259994a1 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -43,9 +43,12 @@ from tractor.msg import ( MsgType, Stop, Yield, - pretty_struct, types as msgtypes, ) +from tractor.msg.pretty_struct import ( + iter_fields, + Struct, +) if TYPE_CHECKING: from ._context import Context @@ -82,7 +85,7 @@ class InternalError(RuntimeError): _ipcmsg_keys: list[str] = [ fi.name for fi, k, v - in pretty_struct.iter_fields(Error) + in iter_fields(Error) ] @@ -321,7 +324,7 @@ class RemoteActorError(Exception): assert self.boxed_type is boxed_type @property - def ipc_msg(self) -> pretty_struct.Struct: + def ipc_msg(self) -> Struct: ''' Re-render the underlying `._ipc_msg: Msg` as a `pretty_struct.Struct` for introspection such that the @@ -334,12 +337,12 @@ class RemoteActorError(Exception): msg_type: MsgType = type(self._ipc_msg) fields: dict[str, Any] = { k: v for _, k, v in - pretty_struct.iter_fields(self._ipc_msg) + iter_fields(self._ipc_msg) } return defstruct( msg_type.__name__, fields=fields.keys(), - bases=(msg_type, pretty_struct.Struct), + bases=(msg_type, Struct), )(**fields) @property @@ -641,11 +644,11 @@ class MsgTypeError( ''' reprol_fields: list[str] = [ - 'payload_msg', + 'expected_msg_type', ] extra_body_fields: list[str] = [ 'cid', - 'payload_msg', + 'expected_msg', ] @property @@ -661,9 +664,7 @@ class MsgTypeError( return self.msgdata.get('_msg_dict') @property - def payload_msg( - self, - ) -> MsgType|None: + def expected_msg(self) -> MsgType|None: ''' Attempt to construct what would have been the original `MsgType`-with-payload subtype (i.e. an instance from the set @@ -674,9 +675,17 @@ class MsgTypeError( if msg_dict := self.msg_dict.copy(): return msgtypes.from_dict_msg( dict_msg=msg_dict, + # use_pretty=True, + # ^-TODO-^ would luv to use this BUT then the + # `field_prefix` in `pformat_boxed_tb()` cucks it + # all up.. XD ) return None + @property + def expected_msg_type(self) -> Type[MsgType]|None: + return type(self.expected_msg) + @property def cid(self) -> str: # pre-packed using `.from_decode()` constructor -- 2.34.1 From 203d0aceb4d789c4a27903e2ac3a7cad63b28b18 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 14 Apr 2024 18:31:41 -0400 Subject: [PATCH 049/190] Use `_raise_from_no_key_in_msg(allow_msgs)` Instead of `allow_msg_keys` since we've fully flipped over to struct-types for msgs in the runtime. - drop the loop from `MsgStream.receive_nowait()` since `Yield/Return.pld` getting will handle both (instead of a loop of `dict`-key reads). --- tractor/_context.py | 1 - tractor/_exceptions.py | 3 +-- tractor/_streaming.py | 48 ++++++++++++++++-------------------------- 3 files changed, 19 insertions(+), 33 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 2d9e6363..29fee0b1 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -2154,7 +2154,6 @@ async def open_context_from_portal( src_err=src_error, log=log, expect_msg=Started, - # expect_key='started', ) uid: tuple = portal.channel.uid diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 259994a1..65637fb5 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -938,7 +938,6 @@ def _raise_from_no_key_in_msg( src_err: KeyError, log: StackLevelAdapter, # caller specific `log` obj - expect_key: str = 'yield', expect_msg: str = Yield, stream: MsgStream | None = None, @@ -1053,7 +1052,7 @@ def _raise_from_no_key_in_msg( # is activated above. _type: str = 'Stream' if stream else 'Context' raise MessagingError( - f"{_type} was expecting a '{expect_key.upper()}' message" + f"{_type} was expecting a {expect_msg} message" " BUT received a non-error msg:\n" f'{pformat(msg)}' ) from src_err diff --git a/tractor/_streaming.py b/tractor/_streaming.py index fcf8dafc..ac4d482e 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -44,6 +44,7 @@ from .trionics import ( BroadcastReceiver, ) from tractor.msg import ( + Return, Stop, Yield, ) @@ -82,7 +83,7 @@ class MsgStream(trio.abc.Channel): self, ctx: Context, # typing: ignore # noqa rx_chan: trio.MemoryReceiveChannel, - _broadcaster: BroadcastReceiver | None = None, + _broadcaster: BroadcastReceiver|None = None, ) -> None: self._ctx = ctx @@ -96,36 +97,26 @@ class MsgStream(trio.abc.Channel): # delegate directly to underlying mem channel def receive_nowait( self, - allow_msg_keys: list[str] = ['yield'], + allow_msgs: list[str] = Yield, ): - # msg: dict = self._rx_chan.receive_nowait() msg: Yield|Stop = self._rx_chan.receive_nowait() - for ( - i, - key, - ) in enumerate(allow_msg_keys): - try: - # return msg[key] - return msg.pld - # except KeyError as kerr: - except AttributeError as attrerr: - if i < (len(allow_msg_keys) - 1): - continue - - _raise_from_no_key_in_msg( - ctx=self._ctx, - msg=msg, - # src_err=kerr, - src_err=attrerr, - log=log, - expect_key=key, - stream=self, - ) + # TODO: replace msg equiv of this or does the `.pld` + # interface read already satisfy it? I think so, yes? + try: + return msg.pld + except AttributeError as attrerr: + _raise_from_no_key_in_msg( + ctx=self._ctx, + msg=msg, + src_err=attrerr, + log=log, + stream=self, + ) async def receive( self, - hide_tb: bool = True, + hide_tb: bool = False, ): ''' Receive a single msg from the IPC transport, the next in @@ -157,10 +148,9 @@ class MsgStream(trio.abc.Channel): try: try: msg: Yield = await self._rx_chan.receive() - # return msg['yield'] return msg.pld - # except KeyError as kerr: + # TODO: implement with match: instead? except AttributeError as attrerr: # src_err = kerr src_err = attrerr @@ -170,10 +160,8 @@ class MsgStream(trio.abc.Channel): _raise_from_no_key_in_msg( ctx=self._ctx, msg=msg, - # src_err=kerr, src_err=attrerr, log=log, - expect_key='yield', stream=self, ) @@ -304,7 +292,7 @@ class MsgStream(trio.abc.Channel): while not drained: try: maybe_final_msg = self.receive_nowait( - allow_msg_keys=['yield', 'return'], + allow_msgs=[Yield, Return], ) if maybe_final_msg: log.debug( -- 2.34.1 From 88686e22711d3c848304939045507e06777c2f94 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 14 Apr 2024 19:31:50 -0400 Subject: [PATCH 050/190] Pass a `use_greenback: bool` runtime var to subs Such that the top level `maybe_enable_greenback` from `open_root_actor()` can toggle the entire actor tree's usage. Read the rtv in `._rpc` tasks and only enable if set. Also, rigor up the `._rpc.process_messages()` loop to handle `Error()` and `case _:` separately such that we now raise an explicit rte for unknown / invalid msgs. Use "parent" / "child" for side descriptions in loop comments and put a fat comment before the `StartAck` in `_invoke()`. --- tractor/_root.py | 12 +++++++-- tractor/_rpc.py | 68 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 58 insertions(+), 22 deletions(-) diff --git a/tractor/_root.py b/tractor/_root.py index 4469f3ed..3209555e 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -117,6 +117,8 @@ async def open_root_actor( os.environ['PYTHONBREAKPOINT'] = ( 'tractor.devx._debug.pause_from_sync' ) + _state._runtime_vars['use_greenback'] = True + else: # TODO: disable `breakpoint()` by default (without # `greenback`) since it will break any multi-actor @@ -392,14 +394,20 @@ async def open_root_actor( _state._last_actor_terminated = actor # restore built-in `breakpoint()` hook state - if debug_mode: + if ( + debug_mode + and + maybe_enable_greenback + ): if builtin_bp_handler is not None: sys.breakpointhook = builtin_bp_handler + if orig_bp_path is not None: os.environ['PYTHONBREAKPOINT'] = orig_bp_path + else: # clear env back to having no entry - os.environ.pop('PYTHONBREAKPOINT') + os.environ.pop('PYTHONBREAKPOINT', None) logger.runtime("Root actor terminated") diff --git a/tractor/_rpc.py b/tractor/_rpc.py index c9754ebc..7e259c1e 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -41,7 +41,6 @@ from trio import ( TaskStatus, ) -from .msg import NamespacePath from ._ipc import Channel from ._context import ( Context, @@ -58,6 +57,11 @@ from ._exceptions import ( from .devx import _debug from . import _state from .log import get_logger +from .msg import ( + current_codec, + MsgCodec, + NamespacePath, +) from tractor.msg.types import ( CancelAck, Error, @@ -94,6 +98,7 @@ async def _invoke_non_context( Context | BaseException ] = trio.TASK_STATUS_IGNORED, ): + __tracebackhide__: bool = True # TODO: can we unify this with the `context=True` impl below? if inspect.isasyncgen(coro): @@ -394,7 +399,11 @@ async def _invoke( __tracebackhide__: bool = hide_tb treat_as_gen: bool = False - if _state.debug_mode(): + if ( + _state.debug_mode() + and + _state._runtime_vars['use_greenback'] + ): # XXX for .pause_from_sync()` usage we need to make sure # `greenback` is boostrapped in the subactor! await _debug.maybe_init_greenback() @@ -508,10 +517,22 @@ async def _invoke( # wrapper that calls `Context.started()` and then does # the `await coro()`? - # a "context" endpoint type is the most general and - # "least sugary" type of RPC ep with support for + # ------ - ------ + # a "context" endpoint is the most general and + # "least sugary" type of RPC with support for # bi-dir streaming B) - # StartAck + # + # the concurrency relation is simlar to a task nursery + # wherein a "parent" task (the one that enters + # `trio.open_nursery()` in some actor "opens" (via + # `Portal.open_context()`) an IPC ctx to another peer + # (which is maybe a sub-) actor who then schedules (aka + # `trio.Nursery.start()`s) a new "child" task to execute + # the `@context` annotated func; that is this func we're + # running directly below! + # ------ - ------ + # + # StartAck: respond immediately with endpoint info await chan.send( StartAck( cid=cid, @@ -520,11 +541,11 @@ async def _invoke( ) # TODO: should we also use an `.open_context()` equiv - # for this callee side by factoring the impl from + # for this child side by factoring the impl from # `Portal.open_context()` into a common helper? # # NOTE: there are many different ctx state details - # in a callee side instance according to current impl: + # in a child side instance according to current impl: # - `.cancelled_caught` can never be `True`. # -> the below scope is never exposed to the # `@context` marked RPC function. @@ -550,7 +571,7 @@ async def _invoke( # NOTE: this happens IFF `ctx._scope.cancel()` is # called by any of, - # - *this* callee task manually calling `ctx.cancel()`. + # - *this* child task manually calling `ctx.cancel()`. # - the runtime calling `ctx._deliver_msg()` which # itself calls `ctx._maybe_cancel_and_set_remote_error()` # which cancels the scope presuming the input error @@ -627,10 +648,11 @@ async def _invoke( # f' |_{ctx}' ) - # task-contex was either cancelled by request using - # ``Portal.cancel_actor()`` or ``Context.cancel()`` - # on the far end, or it was cancelled by the local - # (callee) task, so relay this cancel signal to the + # task-contex was either cancelled by request + # using ``Portal.cancel_actor()`` or + # ``Context.cancel()`` on the far end, or it + # was cancelled by the local child (or callee) + # task, so relay this cancel signal to the # other side. ctxc = ContextCancelled( message=msg, @@ -651,7 +673,7 @@ async def _invoke( ) as scope_error: - # always set this (callee) side's exception as the + # always set this (child) side's exception as the # local error on the context ctx._local_error: BaseException = scope_error @@ -1020,9 +1042,8 @@ async def process_messages( trio.Event(), ) - # XXX remote (runtime scoped) error or uknown - # msg (type). - case Error() | _: + # runtime-scoped remote error (since no `.cid`) + case Error(): # NOTE: this is the non-rpc error case, # that is, an error **not** raised inside # a call to ``_invoke()`` (i.e. no cid was @@ -1030,10 +1051,6 @@ async def process_messages( # this error to all local channel # consumers (normally portals) by marking # the channel as errored - log.exception( - f'Unhandled IPC msg:\n\n' - f'{msg}\n' - ) # assert chan.uid chan._exc: Exception = unpack_error( msg, @@ -1041,6 +1058,17 @@ async def process_messages( ) raise chan._exc + # unknown/invalid msg type? + case _: + codec: MsgCodec = current_codec() + message: str = ( + f'Unhandled IPC msg for codec?\n\n' + f'|_{codec}\n\n' + f'{msg}\n' + ) + log.exception(message) + raise RuntimeError(message) + log.runtime( 'Waiting on next IPC msg from\n' f'peer: {chan.uid}\n' -- 2.34.1 From 1c01608c729c9d33435c82ed1f5cb9d1f2228b79 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 14 Apr 2024 19:50:09 -0400 Subject: [PATCH 051/190] More msg-spec tests tidying - Drop `test_msg_spec_xor_pld_spec()` since we no longer support `ipc_msg_spec` arg to `mk_codec()`. - Expect `MsgTypeError`s around `.open_context()` calls when `add_codec_hooks == False`. - toss in some `.pause()` points in the subactor ctx body whilst hacking out a `.pld` protocol for debug mode TTY locking. --- tests/test_caps_based_msging.py | 65 ++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 22 deletions(-) diff --git a/tests/test_caps_based_msging.py b/tests/test_caps_based_msging.py index f7cab2a5..9a73ba8d 100644 --- a/tests/test_caps_based_msging.py +++ b/tests/test_caps_based_msging.py @@ -14,19 +14,20 @@ from typing import ( from contextvars import ( Context, ) -# from inspect import Parameter from msgspec import ( structs, msgpack, - # defstruct, Struct, ValidationError, ) import pytest import tractor -from tractor import _state +from tractor import ( + _state, + MsgTypeError, +) from tractor.msg import ( _codec, _ctxvar_MsgCodec, @@ -47,21 +48,6 @@ from tractor.msg.types import ( import trio -def test_msg_spec_xor_pld_spec(): - ''' - If the `.msg.types.Msg`-set is overridden, we - can't also support a `Msg.pld` spec. - - ''' - # apply custom hooks and set a `Decoder` which only - # loads `NamespacePath` types. - with pytest.raises(RuntimeError): - mk_codec( - ipc_msg_spec=Any, - ipc_pld_spec=NamespacePath, - ) - - def mk_custom_codec( pld_spec: Union[Type]|Any, add_hooks: bool, @@ -134,7 +120,9 @@ def mk_custom_codec( f'{uid}\n' 'FAILED DECODE\n' f'type-> {obj_type}\n' - f'obj-arg-> `{obj}`: {type(obj)}\n' + f'obj-arg-> `{obj}`: {type(obj)}\n\n' + f'current codec:\n' + f'{current_codec()}\n' ) # TODO: figure out the ignore subsys for this! # -[ ] option whether to defense-relay backc the msg @@ -409,7 +397,9 @@ async def send_back_values( pld_spec=ipc_pld_spec, add_hooks=add_hooks, ) - with apply_codec(nsp_codec) as codec: + with ( + apply_codec(nsp_codec) as codec, + ): chk_codec_applied( expect_codec=nsp_codec, enter_value=codec, @@ -459,7 +449,7 @@ async def send_back_values( # XXX NOTE XXX THIS WON'T WORK WITHOUT SPECIAL # `str` handling! or special debug mode IPC # msgs! - # await tractor.pause() + await tractor.pause() raise RuntimeError( f'NOT-EXPECTED able to roundtrip value given spec:\n' @@ -470,7 +460,8 @@ async def send_back_values( break # move on to streaming block.. except tractor.MsgTypeError: - # await tractor.pause() + await tractor.pause() + if expect_send: raise RuntimeError( f'EXPECTED to `.started()` value given spec:\n' @@ -652,12 +643,42 @@ def test_codec_hooks_mod( pld_spec_type_strs: list[str] = enc_type_union(ipc_pld_spec) + # XXX should raise an mte (`MsgTypeError`) + # when `add_codec_hooks == False` bc the input + # `expect_ipc_send` kwarg has a nsp which can't be + # serialized! + # + # TODO:can we ensure this happens from the + # `Return`-side (aka the sub) as well? + if not add_codec_hooks: + try: + async with p.open_context( + send_back_values, + expect_debug=debug_mode, + pld_spec_type_strs=pld_spec_type_strs, + add_hooks=add_codec_hooks, + started_msg_bytes=nsp_codec.encode(expected_started), + + # XXX NOTE bc we send a `NamespacePath` in this kwarg + expect_ipc_send=expect_ipc_send, + + ) as (ctx, first): + pytest.fail('ctx should fail to open without custom enc_hook!?') + + # this test passes bc we can go no further! + except MsgTypeError: + # teardown nursery + await p.cancel_actor() + return + # TODO: send the original nsp here and # test with `limit_msg_spec()` above? # await tractor.pause() print('PARENT opening IPC ctx!\n') async with ( + # XXX should raise an mte (`MsgTypeError`) + # when `add_codec_hooks == False`.. p.open_context( send_back_values, expect_debug=debug_mode, -- 2.34.1 From 8baaeb414fe874086d1b92cc89ec5502ad8fdfdc Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 15 Apr 2024 15:20:00 -0400 Subject: [PATCH 052/190] Tweak a couple more log message fmts --- tractor/_context.py | 2 +- tractor/_streaming.py | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 29fee0b1..052c198d 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -1905,7 +1905,7 @@ class Context: try: log.runtime( - f'Delivering msg from IPC ctx:\n' + f'Delivering msg from IPC ctx:\n\n' f'<= {from_uid}\n' f' |_ {nsf}()\n\n' diff --git a/tractor/_streaming.py b/tractor/_streaming.py index ac4d482e..16e32cea 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -377,14 +377,17 @@ class MsgStream(trio.abc.Channel): # await rx_chan.aclose() if not self._eoc: - log.cancel( - 'Stream closed by self before it received an EoC?\n' - 'Setting eoc manually..\n..' - ) - self._eoc: bool = trio.EndOfChannel( - f'Context stream closed by self({self._ctx.side})\n' + message: str = ( + f'Context stream closed by {self._ctx.side!r}\n' f'|_{self}\n' ) + log.cancel( + 'Stream self-closed before receiving EoC\n\n' + + + message + ) + self._eoc = trio.EndOfChannel(message) + # ?XXX WAIT, why do we not close the local mem chan `._rx_chan` XXX? # => NO, DEFINITELY NOT! <= # if we're a bi-dir ``MsgStream`` BECAUSE this same -- 2.34.1 From 1d1c7cb3e856c1d3d7ff60037956e661b8f23364 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 16 Apr 2024 10:09:45 -0400 Subject: [PATCH 053/190] First draft, sub-msg-spec for debugger `Lock` sys Since it's totes possible to have a spec applied that won't permit `str`s, might as well formalize a small msg set for subactors to request the tree-wide TTY `Lock`. BTW, I'm prolly not going into every single change here in this first WIP since there's still a variety of broken stuff mostly to do with races on the codec apply being done in a `trio.lowleve.RunVar`; it should be re-done with a `ContextVar` such that each task does NOT mutate the global setting.. New msg set and usage is simply: - `LockStatus` which is the reponse msg delivered from `lock_tty_for_child()` - `LockRelease` a one-off request msg from the subactor to drop the `Lock` from a `MsgStream.send()`. - use these msgs throughout the root and sub sides of the locking ctx funcs: `lock_tty_for_child()` & `wait_for_parent_stdin_hijack()` The codec is now applied in both the root and sub `Lock` request tasks: - for root inside `lock_tty_for_child()` before the `.started()`. - for subs, inside `wait_for_parent_stdin_hijack()` since we only want to affect the codec *for the locking task*. - (hence the need for ctx-var as mentioned above but currently this can cause races which will break against other app tasks competing for the codec setting). - add a `apply_debug_codec()` helper for use in both cases. - add more detailed logging to both the root and sub side of `Lock` requesting funcs including requiring that the sub-side task "uid" (a `tuple[str, int]` = (trio.Task.name, id(trio.Task)` be provided (more on this later). A main issue discovered while proto-testing all this was the ability of a sub to "double lock" (leading to self-deadlock) via an error in `wait_for_parent_stdin_hijack()` which, for ex., can happen in debug mode via crash handling of a `MsgTypeError` received from the root during a codec applied msg-spec race! Originally I was attempting to solve this by making the SIGINT override handler more resilient but this case is somewhat impossible to detect by an external root task other then checking for duplicate ownership via the new `subactor_task_uid`. => SO NOW, we always stick the current task uid in the `Lock._blocked: set` and raise an rte on a double request by the same remote task. Included is a variety of small refinements: - finally figured out how to mark a variety of `.__exit__()` frames with `pdbp.hideframe()` to actually hide them B) - add cls methods around managing `Lock._locking_task_cs` from root only. - re-org all the `Lock` attrs into those only used in root vs. subactors and proto-prep a new `DebugStatus` actor-singleton to be used in subs. - add a `Lock.repr()` to contextually print the current conc primitives. - rename our `Pdb`-subtype to `PdbREPL`. - rigor out the SIGINT handler a bit, originally to try and hack-solve the double-lock issue mentioned above, but now just with better logging and logic for most (all?) possible hang cases that should be hang-recoverable after enough ctrl-c mashing by the user.. well hopefully: - using `Lock.repr()` for both root and sub cases. - lots more `log.warn()`s and handler reversions on stale lock or cs detection. - factor `._pause()` impl a little better moving the actual repl entry to a new `_enter_repl_sync()` (originally for easier wrapping in the sub case with `apply_codec()`). --- tractor/devx/__init__.py | 1 - tractor/devx/_debug.py | 1049 ++++++++++++++++++++++++++++---------- 2 files changed, 792 insertions(+), 258 deletions(-) diff --git a/tractor/devx/__init__.py b/tractor/devx/__init__.py index 75aec953..c1a93878 100644 --- a/tractor/devx/__init__.py +++ b/tractor/devx/__init__.py @@ -27,7 +27,6 @@ from ._debug import ( pause as pause, pause_from_sync as pause_from_sync, shield_sigint_handler as shield_sigint_handler, - MultiActorPdb as MultiActorPdb, open_crash_handler as open_crash_handler, maybe_open_crash_handler as maybe_open_crash_handler, post_mortem as post_mortem, diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 26155b22..51e74379 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -25,6 +25,7 @@ from contextlib import ( asynccontextmanager as acm, contextmanager as cm, nullcontext, + _GeneratorContextManager, ) from functools import ( partial, @@ -33,6 +34,7 @@ from functools import ( import os import signal import sys +import textwrap import threading import traceback from typing import ( @@ -40,6 +42,7 @@ from typing import ( Callable, AsyncIterator, AsyncGenerator, + TypeAlias, TYPE_CHECKING, ) from types import ( @@ -47,17 +50,23 @@ from types import ( ModuleType, ) +from msgspec import Struct import pdbp import sniffio import tractor import trio -from trio.lowlevel import current_task +from trio.lowlevel import ( + current_task, + Task, +) from trio import ( TaskStatus, - # Task, ) from tractor.log import get_logger +from tractor.msg import ( + _codec, +) from tractor._state import ( current_actor, is_root_process, @@ -76,6 +85,36 @@ if TYPE_CHECKING: log = get_logger(__name__) +# XXX HACKZONE XXX +# hide exit stack frames on nurseries and cancel-scopes! +# |_ so avoid seeing it when the `pdbp` REPL is first engaged from +# inside a `trio.open_nursery()` scope (with no line after it +# in before the block end??). +# +# TODO: FINALLY got this workin originally with +# `@pdbp.hideframe` around the `wrapper()` def embedded inside +# `_ki_protection_decoratior()`.. which is in the module: +# /home/goodboy/.virtualenvs/tractor311/lib/python3.11/site-packages/trio/_core/_ki.py +# +# -[ ] make an issue and patch for `trio` core? maybe linked +# to the long outstanding `pdb` one below? +# |_ it's funny that there's frame hiding throughout `._run.py` +# but not where it matters on the below exit funcs.. +# +# -[ ] provide a patchset for the lonstanding +# |_ https://github.com/python-trio/trio/issues/1155 +# +# -[ ] make a linked issue to ^ and propose allowing all the +# `._core._run` code to have their `__tracebackhide__` value +# configurable by a `RunVar` to allow getting scheduler frames +# if desired through configuration? +# +# -[ ] maybe dig into the core `pdb` issue why the extra frame is shown +# at all? +# +pdbp.hideframe(trio._core._run.NurseryManager.__aexit__) +pdbp.hideframe(trio._core._run.CancelScope.__exit__) +pdbp.hideframe(_GeneratorContextManager.__exit__) __all__ = [ 'breakpoint', @@ -83,6 +122,28 @@ __all__ = [ ] +class LockStatus( + Struct, + tag=True, + tag_field='msg_type', +): + subactor_uid: tuple[str, str] + cid: str + locked: bool + + +class LockRelease( + Struct, + tag=True, + tag_field='msg_type', +): + subactor_uid: tuple[str, str] + cid: str + + +__msg_spec__: TypeAlias = LockStatus|LockRelease + + class Lock: ''' Actor global debug lock state. @@ -90,41 +151,111 @@ class Lock: Mostly to avoid a lot of ``global`` declarations for now XD. ''' - repl: MultiActorPdb | None = None + # XXX local ref to the `Pbp` instance, ONLY set in the + # actor-process that currently has activated a REPL + # i.e. it will be `None` (unset) in any other actor-process + # that does not have this lock acquired in the root proc. + repl: PdbREPL|None = None + # placeholder for function to set a ``trio.Event`` on debugger exit # pdb_release_hook: Callable | None = None - _trio_handler: Callable[ - [int, FrameType | None], Any - ] | int | None = None + _trio_handler: ( + Callable[[int, FrameType|None], Any] + |int + | None + ) = None - # actor-wide variable pointing to current task name using debugger - local_task_in_debug: str | None = None + remote_task_in_debug: str|None = None - # NOTE: set by the current task waiting on the root tty lock from - # the CALLER side of the `lock_tty_for_child()` context entry-call - # and must be cancelled if this actor is cancelled via IPC - # request-message otherwise deadlocks with the parent actor may - # ensure + @staticmethod + def get_locking_task_cs() -> trio.CancelScope|None: + if is_root_process(): + return Lock._locking_task_cs + + raise RuntimeError( + '`Lock.locking_task_cs` is invalid in subactors!' + ) + + @staticmethod + def set_locking_task_cs( + cs: trio.CancelScope, + ) -> None: + if not is_root_process(): + raise RuntimeError( + '`Lock.locking_task_cs` is invalid in subactors!' + ) + + Lock._locking_task_cs = cs + + # SUBACTOR ONLY + # ------ - ------- + local_task_in_debug: Task|None = None _debugger_request_cs: trio.CancelScope|None = None + local_pdb_complete: trio.Event|None = None - # NOTE: set only in the root actor for the **local** root spawned task - # which has acquired the lock (i.e. this is on the callee side of - # the `lock_tty_for_child()` context entry). - _root_local_task_cs_in_debug: trio.CancelScope|None = None + # ROOT ONLY + # ------ - ------- + # the root-actor-ONLY singletons for, + # + # - the uid of the actor who's task is using a REPL + # - a literal task-lock, + # - a shielded-cancel-scope around the acquiring task*, + # - a broadcast event to signal no-actor using a REPL in tree, + # - a filter list to block subs-by-uid from locking. + # + # * in case it needs to be manually cancelled in root due to + # a stale lock condition (eg. IPC failure with the locking + # child + global_actor_in_debug: tuple[str, str]|None = None + no_remote_has_tty: trio.Event|None = None + _locking_task_cs: trio.CancelScope|None = None - # actor tree-wide actor uid that supposedly has the tty lock - global_actor_in_debug: tuple[str, str] = None - - local_pdb_complete: trio.Event | None = None - no_remote_has_tty: trio.Event | None = None - - # lock in root actor preventing multi-access to local tty _debug_lock: trio.StrictFIFOLock = trio.StrictFIFOLock() + _blocked: set[tuple[str, str]] = set() # `Actor.uid` block list + # TODO: should go on `PbpREPL`? _orig_sigint_handler: Callable | None = None - _blocked: set[tuple[str, str]] = set() + @classmethod + def repr(cls) -> str: + + # both root and subs + fields: str = ( + f'repl: {cls.repl}\n' + f'local_repl_task: {cls.local_task_in_debug}\n' + ) + + if is_root_process(): + lock_stats: trio.LockStatistics = cls._debug_lock.statistics() + fields += ( + f'global_actor_in_debug: {cls.global_actor_in_debug}\n' + f'no_remote_has_tty: {cls.no_remote_has_tty}\n' + f'remote_task_in_debug: {cls.remote_task_in_debug}\n' + f'_locking_task_cs: {cls.get_locking_task_cs()}\n' + f'_blocked: {cls._blocked}\n\n' + + f'_debug_lock: {cls._debug_lock}\n' + f'lock_stats: {lock_stats}\n' + ) + else: + fields += ( + f'local_task_in_debug: {cls.local_task_in_debug}\n' + f'local_pdb_complete: {cls.local_pdb_complete}\n' + f'_debugger_request_cs: {cls._debugger_request_cs}\n' + ) + + body: str = textwrap.indent( + fields, + prefix=' |_', + ) + return ( + f'<{cls.__name__}(\n' + f'{body}' + ')>' + ) + + # TODO: move to PdbREPL! @classmethod def shield_sigint(cls): ''' @@ -218,19 +349,35 @@ class Lock: else: cls._debug_lock.release() - except RuntimeError: + except RuntimeError as rte: # uhhh makes no sense but been seeing the non-owner # release error even though this is definitely the task # that locked? owner = cls._debug_lock.statistics().owner + # if ( + # owner + # and + # cls.remote_task_in_debug is None + # ): + # raise RuntimeError( + # 'Stale `Lock` detected, no remote task active!?\n' + # f'|_{owner}\n' + # # f'{Lock}' + # ) from rte + if owner: - raise + raise rte + + # OW suppress, can't member why tho .. XD + # something somethin corrupts a cancel-scope + # somewhere.. try: # sometimes the ``trio`` might already be terminated in # which case this call will raise. if cls.local_pdb_complete is not None: cls.local_pdb_complete.set() + finally: # restore original sigint handler cls.unshield_sigint() @@ -241,10 +388,33 @@ class Lock: cls.local_task_in_debug = None +# TODO: actually use this instead throughout for subs! +class DebugStatus: + ''' + Singleton-state for debugging machinery in a subactor. + + Composes conc primitives for syncing with a root actor to + acquire the tree-global (TTY) `Lock` such that only ever one + actor's task can have the REPL active at a given time. + + ''' + repl: PdbREPL|None = None + lock_status: LockStatus|None = None + + repl_task: Task|None = None + # local_task_in_debug: Task|None = None + + req_cs: trio.CancelScope|None = None + # _debugger_request_cs: trio.CancelScope|None = None + + repl_release: trio.Event|None = None + # local_pdb_complete: trio.Event|None = None + class TractorConfig(pdbp.DefaultConfig): ''' - Custom ``pdbp`` goodness :surfer: + Custom `pdbp` config which tries to use the best tradeoff + between pretty and minimal. ''' use_pygments: bool = True @@ -255,21 +425,41 @@ class TractorConfig(pdbp.DefaultConfig): # fixes line spacing issue when resizing terminal B) truncate_long_lines: bool = False + # ------ - ------ + # our own custom config vars mostly + # for syncing with the actor tree's singleton + # TTY `Lock`. -class MultiActorPdb(pdbp.Pdb): + +class PdbREPL(pdbp.Pdb): ''' - Add teardown hooks to the regular ``pdbp.Pdb``. + Add teardown hooks and local state describing any + ongoing TTY `Lock` request dialog. ''' # override the pdbp config with our coolio one + # NOTE: this is only loaded when no `~/.pdbrc` exists + # so we should prolly pass it into the .__init__() instead? + # i dunno, see the `DefaultFactory` and `pdb.Pdb` impls. DefaultConfig = TractorConfig + status = DebugStatus + # def preloop(self): # print('IN PRELOOP') # super().preloop() - # TODO: figure out how to disallow recursive .set_trace() entry - # since that'll cause deadlock for us. + # TODO: cleaner re-wrapping of all this? + # -[ ] figure out how to disallow recursive .set_trace() entry + # since that'll cause deadlock for us. + # -[ ] maybe a `@cm` to call `super().()`? + # -[ ] look at hooking into the `pp` hook specially with our + # own set of pretty-printers? + # * `.pretty_struct.Struct.pformat()` + # * `.pformat(MsgType.pld)` + # * `.pformat(Error.tb_str)`? + # * .. maybe more? + # def set_continue(self): try: super().set_continue() @@ -282,6 +472,17 @@ class MultiActorPdb(pdbp.Pdb): finally: Lock.release() + # TODO: special handling where we just want the next LOC and + # not to resume to the next pause/crash point? + # def set_next( + # self, + # frame: FrameType + # ) -> None: + # try: + # super().set_next(frame) + # finally: + # Lock.release() + # XXX NOTE: we only override this because apparently the stdlib pdb # bois likes to touch the SIGINT handler as much as i like to touch # my d$%&. @@ -314,7 +515,8 @@ class MultiActorPdb(pdbp.Pdb): @acm async def _acquire_debug_lock_from_root_task( - uid: tuple[str, str] + subactor_uid: tuple[str, str], + remote_task_uid: str, ) -> AsyncIterator[trio.StrictFIFOLock]: ''' @@ -326,16 +528,31 @@ async def _acquire_debug_lock_from_root_task( to the ``pdb`` repl. ''' - task_name: str = current_task().name + # task_name: str = current_task().name we_acquired: bool = False log.runtime( - f"Attempting to acquire TTY lock, remote task: {task_name}:{uid}" + f'Attempting to acquire TTY lock for,\n' + f'subactor_uid: {subactor_uid}\n' + f'remote task: {remote_task_uid}\n' ) try: - log.runtime( - f"entering lock checkpoint, remote task: {task_name}:{uid}" + pre_msg: str = ( + f'Entering lock checkpoint for sub-actor\n' + f'subactor_uid: {subactor_uid}\n' + f'remote task: {remote_task_uid}\n' ) + stats = Lock._debug_lock.statistics() + if owner := stats.owner: + # and Lock.no_remote_has_tty is not None + pre_msg += ( + f'\n' + f'`Lock` already held by local task\n' + f'{owner}\n\n' + f'On behalf of remote task: {Lock.remote_task_in_debug!r}\n' + ) + log.runtime(pre_msg) + # NOTE: if the surrounding cancel scope from the # `lock_tty_for_child()` caller is cancelled, this line should # unblock and NOT leave us in some kind of @@ -349,9 +566,14 @@ async def _acquire_debug_lock_from_root_task( # can try to avoid clobbering any connection from a child # that's currently relying on it. Lock.no_remote_has_tty = trio.Event() + Lock.remote_task_in_debug = remote_task_uid - Lock.global_actor_in_debug = uid - log.runtime(f"TTY lock acquired, remote task: {task_name}:{uid}") + Lock.global_actor_in_debug = subactor_uid + log.runtime( + f'TTY lock acquired for,\n' + f'subactor_uid: {subactor_uid}\n' + f'remote task: {remote_task_uid}\n' + ) # NOTE: critical section: this yield is unshielded! @@ -368,7 +590,8 @@ async def _acquire_debug_lock_from_root_task( finally: if ( we_acquired - and Lock._debug_lock.locked() + and + Lock._debug_lock.locked() ): Lock._debug_lock.release() @@ -380,16 +603,37 @@ async def _acquire_debug_lock_from_root_task( stats = Lock._debug_lock.statistics() if ( not stats.owner + # and Lock.no_remote_has_tty is not None ): - log.runtime(f"No more tasks waiting on tty lock! says {uid}") + # log.runtime( + log.info( + f'No more child ctx tasks hold the TTY lock!\n' + f'last subactor: {subactor_uid}\n' + f'remote task: {remote_task_uid}\n' + ) if Lock.no_remote_has_tty is not None: + # set and release Lock.no_remote_has_tty.set() Lock.no_remote_has_tty = None + Lock.remote_task_in_debug = None + else: + log.warning( + 'Not signalling `Lock.no_remote_has_tty` since it has value:\n' + f'{Lock.no_remote_has_tty}\n' + ) + else: + log.info( + f'A child ctx tasks still holds the TTY lock ??\n' + f'last subactor: {subactor_uid}\n' + f'remote task: {remote_task_uid}\n' + f'current local owner task: {stats.owner}\n' + ) Lock.global_actor_in_debug = None - log.runtime( - f"TTY lock released, remote task: {task_name}:{uid}" + 'TTY lock released by child\n' + f'last subactor: {subactor_uid}\n' + f'remote task: {remote_task_uid}\n' ) @@ -397,9 +641,14 @@ async def _acquire_debug_lock_from_root_task( async def lock_tty_for_child( ctx: tractor.Context, - subactor_uid: tuple[str, str] -) -> str: + # TODO: when we finally get a `Start.params: ParamSpec` + # working it'd sure be nice to have `msgspec` auto-decode this + # to an actual tuple XD + subactor_uid: tuple[str, str], + subactor_task_uid: tuple[str, int], + +) -> LockStatus|LockRelease: ''' Lock the TTY in the root process of an actor tree in a new inter-actor-context-task such that the ``pdbp`` debugger console @@ -411,53 +660,141 @@ async def lock_tty_for_child( highly reliable at releasing the mutex complete! ''' - task_name: str = current_task().name + + req_task_uid: tuple = tuple(subactor_task_uid) + if req_task_uid in Lock._blocked: + raise RuntimeError( + f'The same remote task already has an active request for TTY lock ??\n\n' + f'task uid: {req_task_uid}\n' + f'subactor uid: {subactor_uid}\n' + ) + + Lock._blocked.add(req_task_uid) + + root_task_name: str = current_task().name if tuple(subactor_uid) in Lock._blocked: log.warning( - f'Actor {subactor_uid} is blocked from acquiring debug lock\n' - f"remote task: {task_name}:{subactor_uid}" + f'Subactor is blocked from acquiring debug lock..\n' + f'subactor_uid: {subactor_uid}\n' + f'remote task: {subactor_task_uid}\n' ) ctx._enter_debugger_on_cancel: bool = False await ctx.cancel(f'Debug lock blocked for {subactor_uid}') - return 'pdb_lock_blocked' + return LockStatus( + subactor_uid=subactor_uid, + cid=ctx.cid, + locked=False, + ) # TODO: when we get to true remote debugging # this will deliver stdin data? log.debug( - "Attempting to acquire TTY lock\n" - f"remote task: {task_name}:{subactor_uid}" + 'Subactor attempting to acquire TTY lock\n' + f'root task: {root_task_name}\n' + f'subactor_uid: {subactor_uid}\n' + f'remote task: {subactor_task_uid}\n' ) - - log.debug(f"Actor {subactor_uid} is WAITING on stdin hijack lock") Lock.shield_sigint() - try: with ( + # NOTE: though a cs is created for every subactor lock + # REQUEST in this ctx-child task, only the root-task + # holding the `Lock` (on behalf of the ctx parent task + # in a subactor) will set + # `Lock._locking_task_cs` such that if the + # lock holdingn task ever needs to be cancelled (since + # it's shielded by default) that global ref can be + # used to do so! trio.CancelScope(shield=True) as debug_lock_cs, + + _codec.limit_msg_spec( + payload_spec=__msg_spec__, + ) as codec, ): - Lock._root_local_task_cs_in_debug = debug_lock_cs - async with _acquire_debug_lock_from_root_task(subactor_uid): + # sanity? + # TODO: don't need the ref right? + assert codec is _codec.current_codec() + + async with _acquire_debug_lock_from_root_task( + subactor_uid, + subactor_task_uid, + ): + # XXX SUPER IMPORTANT BELOW IS ON THIS LINE XXX + # without that the root cs might be, + # - set and then removed in the finally block by + # a task that never acquired the lock, leaving + # - the task that DID acquire the lock STUCK since + # it's original cs was GC-ed bc the first task + # already set the global ref to `None` + Lock.set_locking_task_cs(debug_lock_cs) # indicate to child that we've locked stdio - await ctx.started('Locked') - log.debug( - f"Actor {subactor_uid} acquired stdin hijack lock" + await ctx.started( + LockStatus( + subactor_uid=subactor_uid, + cid=ctx.cid, + locked=True, + ) ) + log.debug( f'Actor {subactor_uid} acquired TTY lock') + # wait for unlock pdb by child async with ctx.open_stream() as stream: - assert await stream.receive() == 'pdb_unlock' + release_msg: LockRelease = await stream.receive() - return "pdb_unlock_complete" + # TODO: security around only releasing if + # these match? + log.pdb( + f'TTY lock released requested\n\n' + f'{release_msg}\n' + ) + assert release_msg.cid == ctx.cid + assert release_msg.subactor_uid == tuple(subactor_uid) + + log.debug(f'Actor {subactor_uid} released TTY lock') + + return LockStatus( + subactor_uid=subactor_uid, + cid=ctx.cid, + locked=False, + ) finally: - Lock._root_local_task_cs_in_debug = None + debug_lock_cs.cancel() + Lock.set_locking_task_cs(None) Lock.unshield_sigint() +@cm +def apply_debug_codec() -> _codec.MsgCodec: + ''' + Apply the subactor TTY `Lock`-ing protocol's msgspec temporarily + (only in the current task). + + ''' + with ( + _codec.limit_msg_spec( + payload_spec=__msg_spec__, + ) as debug_codec, + ): + assert debug_codec is _codec.current_codec() + log.pdb( + 'Applied `.devx._debug` msg-spec via codec\n' + f'{debug_codec}\n' + ) + yield debug_codec + + log.pdb( + 'REMOVED `.devx._debug` msg-spec via codec\n' + f'{debug_codec}\n' + ) + + async def wait_for_parent_stdin_hijack( actor_uid: tuple[str, str], + task_uid: tuple[str, int], task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED ): ''' @@ -476,25 +813,30 @@ async def wait_for_parent_stdin_hijack( ''' from .._discovery import get_root - with trio.CancelScope(shield=True) as cs: + with ( + trio.CancelScope(shield=True) as cs, + apply_debug_codec(), + ): Lock._debugger_request_cs = cs - try: + # TODO: merge into sync async with ? async with get_root() as portal: - # this syncs to child's ``Context.started()`` call. async with portal.open_context( lock_tty_for_child, subactor_uid=actor_uid, + subactor_task_uid=task_uid, - ) as (ctx, val): - - log.debug('locked context') - assert val == 'Locked' + ) as (ctx, resp): + log.pdb( + 'Subactor locked TTY per msg\n' + f'{resp}\n' + ) + assert resp.subactor_uid == actor_uid + assert resp.cid async with ctx.open_stream() as stream: - try: - # unblock local caller + try: # to unblock local caller assert Lock.local_pdb_complete task_status.started(cs) @@ -503,14 +845,22 @@ async def wait_for_parent_stdin_hijack( await Lock.local_pdb_complete.wait() finally: - # TODO: shielding currently can cause hangs... - # with trio.CancelScope(shield=True): - await stream.send('pdb_unlock') + await stream.send( + LockRelease( + subactor_uid=actor_uid, + cid=resp.cid, + ) + ) # sync with callee termination - assert await ctx.result() == "pdb_unlock_complete" + status: LockStatus = await ctx.result() + assert not status.locked - log.debug('exitting child side locking task context') + log.pdb( + 'TTY lock was released for subactor with msg\n\n' + f'{status}\n\n' + 'Exitting {ctx.side!r} side locking of locking ctx' + ) except ContextCancelled: log.warning('Root actor cancelled debug lock') @@ -518,12 +868,17 @@ async def wait_for_parent_stdin_hijack( finally: Lock.local_task_in_debug = None - log.debug('Exiting debugger from child') + log.debug('Exiting debugger TTY lock request func from child') -def mk_mpdb() -> MultiActorPdb: + log.cancel('Reverting SIGINT handler!') + Lock.unshield_sigint() + + + +def mk_mpdb() -> PdbREPL: ''' - Deliver a new `MultiActorPdb`: a multi-process safe `pdbp` + Deliver a new `PdbREPL`: a multi-process safe `pdbp` REPL using the magic of SC! Our `pdb.Pdb` subtype accomplishes multi-process safe debugging @@ -538,7 +893,7 @@ def mk_mpdb() -> MultiActorPdb: by either explicit requests in the runtime or ''' - pdb = MultiActorPdb() + pdb = PdbREPL() # Always shield out SIGINTs for subactors when REPL is active. # @@ -560,7 +915,6 @@ def mk_mpdb() -> MultiActorPdb: def shield_sigint_handler( signum: int, frame: 'frame', # type: ignore # noqa - # pdb_obj: MultiActorPdb | None = None, *args, ) -> None: @@ -577,6 +931,7 @@ def shield_sigint_handler( uid_in_debug: tuple[str, str]|None = Lock.global_actor_in_debug actor: Actor = current_actor() + case_handled: bool = False def do_cancel(): # If we haven't tried to cancel the runtime then do that instead @@ -586,107 +941,202 @@ def shield_sigint_handler( actor.cancel_soon() # If the runtime is already cancelled it likely means the user - # hit ctrl-c again because teardown didn't full take place in + # hit ctrl-c again because teardown didn't fully take place in # which case we do the "hard" raising of a local KBI. else: raise KeyboardInterrupt + # try to see if the supposed (sub)actor in debug still + # has an active connection to *this* actor, and if not + # it's likely they aren't using the TTY lock / debugger + # and we should propagate SIGINT normally. any_connected: bool = False - if uid_in_debug is not None: - # try to see if the supposed (sub)actor in debug still - # has an active connection to *this* actor, and if not - # it's likely they aren't using the TTY lock / debugger - # and we should propagate SIGINT normally. - chans: list[tractor.Channel] = actor._peers.get(tuple(uid_in_debug)) + chans: list[tractor.Channel] = actor._peers.get( + tuple(uid_in_debug) + ) if chans: any_connected = any(chan.connected() for chan in chans) if not any_connected: log.warning( 'A global actor reported to be in debug ' - 'but no connection exists for this child:\n' - f'{uid_in_debug}\n' + 'but no connection exists for this child!?\n' + f'subactor_uid: {uid_in_debug}\n\n' 'Allowing SIGINT propagation..' ) return do_cancel() # only set in the actor actually running the REPL - pdb_obj: MultiActorPdb|None = Lock.repl + repl: PdbREPL|None = Lock.repl + # TODO: maybe we should flatten out all these cases using + # a match/case? + # # root actor branch that reports whether or not a child # has locked debugger. - if ( - is_root_process() - and uid_in_debug is not None + if is_root_process(): + lock_cs: trio.CancelScope = Lock.get_locking_task_cs() - # XXX: only if there is an existing connection to the - # (sub-)actor in debug do we ignore SIGINT in this - # parent! Otherwise we may hang waiting for an actor - # which has already terminated to unlock. - and any_connected - ): - # we are root and some actor is in debug mode - # if uid_in_debug is not None: + log.warning( + f'root {actor.uid} handling SIGINT\n' + f'any_connected: {any_connected}\n\n' - if pdb_obj: - name = uid_in_debug[0] - if name != 'root': - log.pdb( - f"Ignoring SIGINT, child in debug mode: `{uid_in_debug}`" - ) + f'{Lock.repr()}\n' + ) + + maybe_stale_lock_cs: bool = ( + lock_cs is not None + # and not lock_cs.cancel_called + and uid_in_debug is None + ) + if maybe_stale_lock_cs: + log.warning( + 'Stale `Lock._locking_task_cs: CancelScope` DETECTED?\n' + f'|_{lock_cs}\n\n' + ) + lock_cs.cancel() + + if uid_in_debug: # "someone" is (ostensibly) using debug `Lock` + name_in_debug: str = uid_in_debug[0] + if ( + not repl # but it's NOT us, the root actor. + ): + # sanity: since no repl ref is set, we def shouldn't + # be the lock owner! + assert name_in_debug != 'root' + + # XXX: only if there is an existing connection to the + # (sub-)actor in debug do we ignore SIGINT in this + # parent! Otherwise we may hang waiting for an actor + # which has already terminated to unlock. + if any_connected: # there are subactors we can contact + # NOTE: don't emit this with `.pdb()` level in + # root without a higher level. + log.debug( + f'Ignoring SIGINT while debug REPL in use by child\n' + f'subactor: {uid_in_debug}\n' + ) + # returns here minus tail logic + case_handled = True + + else: + message: str = ( + f'Ignoring SIGINT while debug REPL SUPPOSEDLY in use by child\n' + f'subactor: {uid_in_debug}\n\n' + f'BUT, no child actors are contactable!?!?\n\n' + + # f'Reverting to def `trio` SIGINT handler..\n' + ) + + if maybe_stale_lock_cs: + lock_cs.cancel() + message += ( + 'Maybe `Lock._locking_task_cs: CancelScope` is stale?\n' + f'|_{lock_cs}\n\n' + ) + + log.warning(message) + Lock.unshield_sigint() + case_handled = True else: + assert name_in_debug == 'root' # we are the registered locker + assert repl # we have a pdb REPL engaged log.pdb( - "Ignoring SIGINT while in debug mode" + f'Ignoring SIGINT while debug REPL in use\n' + f'root actor: {uid_in_debug}\n' ) - elif ( - is_root_process() - ): - if pdb_obj: - log.pdb( - "Ignoring SIGINT since debug mode is enabled" + # returns here minus tail logic + case_handled = True + + # root actor still has this SIGINT handler active without + # an actor using the `Lock` (a bug state) ?? + # => so immediately cancel any stale lock cs and revert + # the handler! + else: + # XXX revert back to ``trio`` handler since this handler shouldn't + # be enabled withtout an actor using a debug REPL! + log.warning( + 'Ignoring SIGINT in root actor but no actor using a `pdb` REPL?\n' + 'Reverting SIGINT handler to `trio` default!\n' ) - if ( - Lock._root_local_task_cs_in_debug - and not Lock._root_local_task_cs_in_debug.cancel_called - ): - Lock._root_local_task_cs_in_debug.cancel() + if maybe_stale_lock_cs: + lock_cs.cancel() - # revert back to ``trio`` handler asap! Lock.unshield_sigint() + case_handled = True # child actor that has locked the debugger elif not is_root_process(): + log.warning( + f'Subactor {actor.uid} handling SIGINT\n\n' + f'{Lock.repr()}\n' + ) - chan: Channel = actor._parent_chan - if not chan or not chan.connected(): + rent_chan: Channel = actor._parent_chan + if ( + rent_chan is None + or + not rent_chan.connected() + ): log.warning( - 'A global actor reported to be in debug ' - 'but no connection exists for its parent:\n' + 'A global sub-actor reported to be in debug ' + 'but it has no connection to its parent ??\n' f'{uid_in_debug}\n' 'Allowing SIGINT propagation..' ) - return do_cancel() + Lock.unshield_sigint() + # do_cancel() + case_handled = True - task: str | None = Lock.local_task_in_debug + task: str|None = Lock.local_task_in_debug if ( task - and pdb_obj + and + repl ): + # if repl: log.pdb( - f"Ignoring SIGINT while task in debug mode: `{task}`" + f'Ignoring SIGINT while local task using debug REPL\n' + f'|_{task}\n' + f' |_{repl}\n' ) + case_handled = True + else: + msg: str = ( + 'SIGINT shield handler still active BUT, \n\n' + ) + if task is None: + msg += ( + f'- No local task claims to be in debug?\n' + f' |_{task}\n\n' + ) + + if repl is None: + msg += ( + f'- No local REPL is currently active?\n' + f' |_{repl}\n\n' + ) + + log.warning( + msg + + + 'Reverting handler to `trio` default!\n' + ) + Lock.unshield_sigint() + case_handled = True + + # XXX ensure that the reverted-to-handler actually is + # able to rx what should have been **this** KBI ;) + do_cancel() + # raise KeyboardInterrupt # TODO: how to handle the case of an intermediary-child actor # that **is not** marked in debug mode? See oustanding issue: # https://github.com/goodboy/tractor/issues/320 # elif debug_mode(): - else: # XXX: shouldn't ever get here? - raise RuntimeError("WTFWTFWTF") - # raise KeyboardInterrupt("WTFWTFWTF") - # NOTE: currently (at least on ``fancycompleter`` 0.9.2) # it looks to be that the last command that was run (eg. ll) # will be repeated by default. @@ -695,31 +1145,37 @@ def shield_sigint_handler( # we want to alert the user that more input is expect since # nothing has been done dur to ignoring sigint. if ( - pdb_obj # only when this actor has a REPL engaged + repl # only when this actor has a REPL engaged ): # XXX: yah, mega hack, but how else do we catch this madness XD - if pdb_obj.shname == 'xonsh': - pdb_obj.stdout.write(pdb_obj.prompt) + if repl.shname == 'xonsh': + repl.stdout.write(repl.prompt) - pdb_obj.stdout.flush() + repl.stdout.flush() # TODO: make this work like sticky mode where if there is output # detected as written to the tty we redraw this part underneath # and erase the past draw of this same bit above? - # pdb_obj.sticky = True - # pdb_obj._print_if_sticky() + # repl.sticky = True + # repl._print_if_sticky() # also see these links for an approach from ``ptk``: # https://github.com/goodboy/tractor/issues/130#issuecomment-663752040 # https://github.com/prompt-toolkit/python-prompt-toolkit/blob/c2c6af8a0308f9e5d7c0e28cb8a02963fe0ce07a/prompt_toolkit/patch_stdout.py + if not case_handled: + log.critical( + f'{actor.uid} UNHANDLED SIGINT !?!?\n' + # TODO: pprint for `Lock`? + ) + _pause_msg: str = 'Attaching to pdb REPL in actor' def _set_trace( actor: tractor.Actor|None = None, - pdb: MultiActorPdb|None = None, + pdb: PdbREPL|None = None, shield: bool = False, extra_frames_up_when_async: int = 1, @@ -767,14 +1223,16 @@ def _set_trace( log.pdb( f'{msg}\n' '|\n' - f'|_ {actor.uid}\n' + # TODO: make an `Actor.__repr()__` + # f'|_ {current_task()} @ {actor.name}\n' + f'|_ {current_task()}\n' ) # no f!#$&* idea, but when we're in async land # we need 2x frames up? for i in range(extra_frames_up_when_async): frame: FrameType = frame.f_back log.debug( - f'Going up frame {i} -> {frame}\n' + f'Going up frame_{i}:\n|_{frame}\n' ) # engage ze REPL @@ -787,7 +1245,7 @@ async def _pause( debug_func: Callable = _set_trace, # NOTE: must be passed in the `.pause_from_sync()` case! - pdb: MultiActorPdb|None = None, + pdb: PdbREPL|None = None, # TODO: allow caller to pause despite task cancellation, # exactly the same as wrapping with: @@ -799,6 +1257,8 @@ async def _pause( # shield: bool = False, hide_tb: bool = True, + extra_frames_up_when_async: int = 4, + task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED ) -> None: @@ -813,7 +1273,9 @@ async def _pause( __tracebackhide__: bool = hide_tb actor: Actor = current_actor() try: - task_name: str = trio.lowlevel.current_task().name + # TODO: use the `Task` instance instead for `is` checks + # below! + task: Task = trio.lowlevel.current_task() except RuntimeError as rte: if actor.is_infected_aio(): raise RuntimeError( @@ -821,48 +1283,142 @@ async def _pause( 'for infected `asyncio` mode!' ) from rte + # task_name: str = task.name + if ( not Lock.local_pdb_complete - or Lock.local_pdb_complete.is_set() + or + Lock.local_pdb_complete.is_set() ): Lock.local_pdb_complete = trio.Event() if debug_func is not None: - debug_func = partial( - debug_func, - ) + debug_func = partial(debug_func) if pdb is None: - pdb: MultiActorPdb = mk_mpdb() + pdb: PdbREPL = mk_mpdb() + + def _enter_repl_sync( + debug_func: Callable, + ) -> None: + __tracebackhide__: bool = hide_tb + try: + # TODO: do we want to support using this **just** for the + # locking / common code (prolly to help address #320)? + # + if debug_func is None: + task_status.started(Lock) + else: + # block here one (at the appropriate frame *up*) where + # ``breakpoint()`` was awaited and begin handling stdio. + log.debug('Entering sync world of the `pdb` REPL..') + try: + # log.critical( + # f'stack len: {len(pdb.stack)}\n' + # ) + debug_func( + actor, + pdb, + extra_frames_up_when_async=extra_frames_up_when_async, + shield=shield, + ) + except BaseException: + log.exception( + 'Failed to invoke internal `debug_func = ' + f'{debug_func.func.__name__}`\n' + ) + raise + + except bdb.BdbQuit: + Lock.release() + raise + + except BaseException: + log.exception( + 'Failed to engage debugger via `_pause()` ??\n' + ) + raise + + if is_root_process(): + + # we also wait in the root-parent for any child that + # may have the tty locked prior + # TODO: wait, what about multiple root tasks acquiring it though? + if Lock.global_actor_in_debug == actor.uid: + # re-entrant root process already has it: noop. + log.warning( + f'{task.name}@{actor.uid} already has TTY lock\n' + f'ignoring..' + ) + await trio.lowlevel.checkpoint() + return + + # XXX: since we need to enter pdb synchronously below, + # we have to release the lock manually from pdb completion + # callbacks. Can't think of a nicer way then this atm. + if Lock._debug_lock.locked(): + log.warning( + 'attempting to shield-acquire active TTY lock' + f' owned by {Lock.global_actor_in_debug}' + ) + + # must shield here to avoid hitting a ``Cancelled`` and + # a child getting stuck bc we clobbered the tty + with trio.CancelScope(shield=True): + await Lock._debug_lock.acquire() + else: + # may be cancelled + await Lock._debug_lock.acquire() + + Lock.global_actor_in_debug = actor.uid + Lock.local_task_in_debug = task + Lock.repl = pdb + + # enter REPL from root, no TTY locking IPC ctx necessary + _enter_repl_sync(debug_func) + return # next branch is mutex and for subactors # TODO: need a more robust check for the "root" actor - if ( + elif ( not is_root_process() and actor._parent_chan # a connected child ): - if Lock.local_task_in_debug: # Recurrence entry case: this task already has the lock and # is likely recurrently entering a breakpoint - if Lock.local_task_in_debug == task_name: - # noop on recurrent entry case but we want to trigger - # a checkpoint to allow other actors error-propagate and - # potetially avoid infinite re-entries in some subactor. + # + # NOTE: noop on recurrent entry case but we want to trigger + # a checkpoint to allow other actors error-propagate and + # potetially avoid infinite re-entries in some + # subactor that would otherwise not bubble until the + # next checkpoint was hit. + if ( + (repl_task := Lock.local_task_in_debug) + and + repl_task is task + ): + log.warning( + f'{task.name}@{actor.uid} already has TTY lock\n' + f'ignoring..' + ) await trio.lowlevel.checkpoint() return - # if **this** actor is already in debug mode block here - # waiting for the control to be released - this allows - # support for recursive entries to `tractor.breakpoint()` - log.warning(f"{actor.uid} already has a debug lock, waiting...") - + # if **this** actor is already in debug REPL we want + # to maintain actor-local-task mutex access, so block + # here waiting for the control to be released - this + # -> allows for recursive entries to `tractor.pause()` + log.warning( + f'{task.name}@{actor.uid} already has TTY lock\n' + f'waiting for release..' + ) await Lock.local_pdb_complete.wait() await trio.sleep(0.1) # mark local actor as "in debug mode" to avoid recurrent # entries/requests to the root process - Lock.local_task_in_debug = task_name + Lock.local_task_in_debug = task # this **must** be awaited by the caller and is done using the # root nursery so that the debugger can continue to run without @@ -875,91 +1431,54 @@ async def _pause( # actor._service_n.cancel_scope.shield = shield # ``` # but not entirely sure if that's a sane way to implement it? - try: - with trio.CancelScope(shield=True): - await actor._service_n.start( + + # NOTE: MUST it here bc multiple tasks are spawned by any + # one sub-actor AND there will be a race between when the + # root locking task delivers the `Started(pld=LockStatus)` + # and when the REPL is actually entered here. SO ensure + # the codec is set before either are run! + # + with ( + # _codec.limit_msg_spec( + # payload_spec=__msg_spec__, + # ) as debug_codec, + trio.CancelScope(shield=shield), + ): + # async with trio.open_nursery() as tn: + # tn.cancel_scope.shield = True + try: + # cs: trio.CancelScope = await tn.start( + cs: trio.CancelScope = await actor._service_n.start( wait_for_parent_stdin_hijack, actor.uid, + (task.name, id(task)), ) + # our locker task should be the one in ctx + # with the root actor + assert Lock._debugger_request_cs is cs + + # XXX used by the SIGINT handler to check if + # THIS actor is in REPL interaction Lock.repl = pdb - except RuntimeError: - Lock.release() + except RuntimeError: + Lock.release() - if actor._cancel_called: - # service nursery won't be usable and we - # don't want to lock up the root either way since - # we're in (the midst of) cancellation. - return + if actor._cancel_called: + # service nursery won't be usable and we + # don't want to lock up the root either way since + # we're in (the midst of) cancellation. + return - raise - - elif is_root_process(): - - # we also wait in the root-parent for any child that - # may have the tty locked prior - # TODO: wait, what about multiple root tasks acquiring it though? - if Lock.global_actor_in_debug == actor.uid: - # re-entrant root process already has it: noop. - return - - # XXX: since we need to enter pdb synchronously below, - # we have to release the lock manually from pdb completion - # callbacks. Can't think of a nicer way then this atm. - if Lock._debug_lock.locked(): - log.warning( - 'Root actor attempting to shield-acquire active tty lock' - f' owned by {Lock.global_actor_in_debug}') - - # must shield here to avoid hitting a ``Cancelled`` and - # a child getting stuck bc we clobbered the tty - with trio.CancelScope(shield=True): - await Lock._debug_lock.acquire() - else: - # may be cancelled - await Lock._debug_lock.acquire() - - Lock.global_actor_in_debug = actor.uid - Lock.local_task_in_debug = task_name - Lock.repl = pdb - - try: - # TODO: do we want to support using this **just** for the - # locking / common code (prolly to help address #320)? - # - if debug_func is None: - task_status.started(Lock) - - else: - # block here one (at the appropriate frame *up*) where - # ``breakpoint()`` was awaited and begin handling stdio. - log.debug('Entering sync world of the `pdb` REPL..') - try: - # log.critical( - # f'stack len: {len(pdb.stack)}\n' - # ) - debug_func( - actor, - pdb, - extra_frames_up_when_async=2, - shield=shield, - ) - except BaseException: - log.exception( - 'Failed to invoke internal `debug_func = ' - f'{debug_func.func.__name__}`\n' - ) raise - except bdb.BdbQuit: - Lock.release() - raise + # enter REPL + + try: + _enter_repl_sync(debug_func) + finally: + Lock.unshield_sigint() - except BaseException: - log.exception( - 'Failed to engage debugger via `_pause()` ??\n' - ) - raise # XXX: apparently we can't do this without showing this frame # in the backtrace on first entry to the REPL? Seems like an odd @@ -1017,15 +1536,21 @@ async def pause( # __tracebackhide__: bool = True # super().__exit__(*args, **kwargs) - trio.CancelScope.__enter__.__tracebackhide__ = True - trio.CancelScope.__exit__.__tracebackhide__ = True + # trio.CancelScope.__enter__.__tracebackhide__ = True + # trio.CancelScope.__exit__.__tracebackhide__ = True # import types # with trio.CancelScope(shield=shield) as cs: # cs.__exit__ = types.MethodType(_exit, cs) # cs.__exit__.__tracebackhide__ = True - with trio.CancelScope(shield=shield) as cs: + # TODO: LOL, solved this with the `pdb.hideframe` stuff + # at top-of-mod.. so I guess we can just only use this + # block right? + with trio.CancelScope( + shield=shield, + ) as cs: + print(f'debug cs is {cs}\n') # setattr(cs.__exit__.__func__, '__tracebackhide__', True) # setattr(cs.__enter__.__func__, '__tracebackhide__', True) @@ -1135,7 +1660,7 @@ def pause_from_sync( # raises on not-found by default greenback: ModuleType = maybe_import_greenback() - mdb: MultiActorPdb = mk_mpdb() + mdb: PdbREPL = mk_mpdb() # run async task which will lock out the root proc's TTY. if not Lock.is_main_trio_thread(): @@ -1157,7 +1682,7 @@ def pause_from_sync( ) ) # TODO: maybe the `trio.current_task()` id/name if avail? - Lock.local_task_in_debug: str = str(threading.current_thread().name) + Lock.local_task_in_debug: str = str(threading.current_thread()) else: # we are presumably the `trio.run()` + main thread greenback.await_( @@ -1167,7 +1692,7 @@ def pause_from_sync( hide_tb=hide_tb, ) ) - Lock.local_task_in_debug: str = current_task().name + Lock.local_task_in_debug: str = current_task() # TODO: ensure we aggressively make the user aware about # entering the global ``breakpoint()`` built-in from sync @@ -1198,7 +1723,11 @@ async def breakpoint(**kwargs): '`tractor.breakpoint()` is deprecated!\n' 'Please use `tractor.pause()` instead!\n' ) - await pause(**kwargs) + __tracebackhide__: bool = True + await pause( + # extra_frames_up_when_async=6, + **kwargs + ) _crash_msg: str = ( @@ -1208,11 +1737,11 @@ _crash_msg: str = ( def _post_mortem( actor: tractor.Actor, - pdb: MultiActorPdb, + pdb: PdbREPL, shield: bool = False, # only for compat with `._set_trace()`.. - extra_frames_up_when_async=0, + extra_frames_up_when_async=1, ) -> None: ''' @@ -1225,7 +1754,11 @@ def _post_mortem( log.pdb( f'{_crash_msg}\n' '|\n' - f'|_ {actor.uid}\n' + f'|_ {current_task()}\n' + + # f'|_ @{actor.uid}\n' + # TODO: make an `Actor.__repr()__` + # f'|_ {current_task()} @ {actor.name}\n' ) # TODO: only replacing this to add the @@ -1278,9 +1811,12 @@ async def _maybe_enter_pm(err): @acm async def acquire_debug_lock( subactor_uid: tuple[str, str], -) -> AsyncGenerator[None, tuple]: +) -> AsyncGenerator[ + trio.CancelScope|None, + tuple, +]: ''' - Grab root's debug lock on entry, release on exit. + Request to acquire the TTY `Lock` in the root actor, release on exit. This helper is for actor's who don't actually need to acquired the debugger but want to wait until the lock is free in the @@ -1297,7 +1833,7 @@ async def acquire_debug_lock( wait_for_parent_stdin_hijack, subactor_uid, ) - yield None + yield cs cs.cancel() @@ -1328,7 +1864,6 @@ async def maybe_wait_for_debugger( # Instead try to wait for pdb to be released before # tearing down. in_debug: tuple[str, str]|None = Lock.global_actor_in_debug - debug_complete: trio.Event|None = Lock.no_remote_has_tty if in_debug == current_actor().uid: log.debug( @@ -1340,7 +1875,7 @@ async def maybe_wait_for_debugger( elif in_debug: msg += ( - f'Debug `Lock` in use by subactor: {in_debug}\n' + f'Debug `Lock` in use by subactor\n|\n|_{in_debug}\n' ) # TODO: could this make things more deterministic? # wait to see if a sub-actor task will be @@ -1358,17 +1893,17 @@ async def maybe_wait_for_debugger( for istep in range(poll_steps): if ( - debug_complete - and not debug_complete.is_set() + Lock.no_remote_has_tty is not None + and not Lock.no_remote_has_tty.is_set() and in_debug is not None ): log.pdb( msg + - 'Root is waiting on tty lock to release..\n' + '\nRoot is waiting on tty lock to release..\n' ) with trio.CancelScope(shield=True): - await debug_complete.wait() + await Lock.no_remote_has_tty.wait() log.pdb( f'Child subactor released debug lock\n' f'|_{in_debug}\n' @@ -1378,8 +1913,8 @@ async def maybe_wait_for_debugger( if ( in_debug is None and ( - debug_complete is None - or debug_complete.is_set() + Lock.no_remote_has_tty is None + or Lock.no_remote_has_tty.is_set() ) ): log.pdb( -- 2.34.1 From 846aff272482214fcc0176273749294480463748 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 17 Apr 2024 23:19:31 -0400 Subject: [PATCH 054/190] The src error to `_raise_from_no_key_in_msg()` is always an attr-error now! --- tractor/_exceptions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 65637fb5..4ace626f 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -935,7 +935,7 @@ def is_multi_cancelled(exc: BaseException) -> bool: def _raise_from_no_key_in_msg( ctx: Context, msg: MsgType, - src_err: KeyError, + src_err: AttributeError, log: StackLevelAdapter, # caller specific `log` obj expect_msg: str = Yield, @@ -994,7 +994,7 @@ def _raise_from_no_key_in_msg( ctx.chan, hide_tb=hide_tb, - ) from None + ) from src_err # `MsgStream` termination msg. # TODO: does it make more sense to pack -- 2.34.1 From dc31f0dac927287d1a8255a3a07a31032ec0d3a1 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 18 Apr 2024 12:47:28 -0400 Subject: [PATCH 055/190] Use `DebugStatus` around subactor lock requests Breaks out all the (sub)actor local conc primitives from `Lock` (which is now only used in and by the root actor) such that there's an explicit distinction between a task that's "consuming" the `Lock` (remotely) vs. the root-side service tasks which do the actual acquire on behalf of the requesters. `DebugStatus` changeover deats: ------ - ------ - move all the actor-local vars over `DebugStatus` including: - move `_trio_handler` and `_orig_sigint_handler` - `local_task_in_debug` now `repl_task` - `_debugger_request_cs` now `req_cs` - `local_pdb_complete` now `repl_release` - drop all ^ fields from `Lock.repr()` obvi.. - move over the `.[un]shield_sigint()` and `.is_main_trio_thread()` methods. - add some new attrs/meths: - `DebugStatus.repl` for the currently running `Pdb` in-actor singleton. - `.repr()` for pprint of state (like `Lock`). - Note: that even when a root-actor task is in REPL, the `DebugStatus` is still used for certain actor-local state mgmt, such as SIGINT handler shielding. - obvi change all lock-requester code bits to now use a `DebugStatus` in their local actor-state instead of `Lock`, i.e. change usage from `Lock` in `._runtime` and `._root`. - use new `Lock.get_locking_task_cs()` API in when checking for sub-in-debug from `._runtime.Actor._stream_handler()`. Unrelated to topic-at-hand tweaks: ------ - ------ - drop the commented bits about hiding `@[a]cm` stack frames from `_debug.pause()` and simplify to only one block with the `shield` passthrough since we already solved the issue with cancel-scopes using `@pdbp.hideframe` B) - this includes all the extra logging about the extra frame for the user (good thing i put in that wasted effort back then eh..) - put the `try/except BaseException` with `log.exception()` around the whole of `._pause()` to ensure we don't miss in-func errors which can cause hangs.. - allow passing in `portal: Portal` to `Actor.start_remote_task()` such that `Portal` task spawning methods are always denoted correctly in terms of `Context.side`. - lotsa logging tweaks, decreasing a bit of noise from `.runtime()`s. --- tractor/_root.py | 2 +- tractor/_runtime.py | 101 ++++---- tractor/devx/_debug.py | 573 +++++++++++++++++++---------------------- 3 files changed, 322 insertions(+), 354 deletions(-) diff --git a/tractor/_root.py b/tractor/_root.py index 3209555e..377f494e 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -137,7 +137,7 @@ async def open_root_actor( # attempt to retreive ``trio``'s sigint handler and stash it # on our debugger lock state. - _debug.Lock._trio_handler = signal.getsignal(signal.SIGINT) + _debug.DebugStatus._trio_handler = signal.getsignal(signal.SIGINT) # mark top most level process as root actor _state._runtime_vars['_is_root'] = True diff --git a/tractor/_runtime.py b/tractor/_runtime.py index f61ec80d..4d675716 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -263,10 +263,13 @@ class Actor: self._listeners: list[trio.abc.Listener] = [] self._parent_chan: Channel|None = None self._forkserver_info: tuple|None = None + + # track each child/sub-actor in it's locally + # supervising nursery self._actoruid2nursery: dict[ - tuple[str, str], + tuple[str, str], # sub-`Actor.uid` ActorNursery|None, - ] = {} # type: ignore # noqa + ] = {} # when provided, init the registry addresses property from # input via the validator. @@ -661,12 +664,18 @@ class Actor: # TODO: NEEEDS TO BE TESTED! # actually, no idea if this ever even enters.. XD + # + # XXX => YES IT DOES, when i was testing ctl-c + # from broken debug TTY locking due to + # msg-spec races on application using RunVar... pdb_user_uid: tuple = pdb_lock.global_actor_in_debug if ( pdb_user_uid and local_nursery ): - entry: tuple|None = local_nursery._children.get(pdb_user_uid) + entry: tuple|None = local_nursery._children.get( + tuple(pdb_user_uid) + ) if entry: proc: trio.Process _, proc, _ = entry @@ -676,10 +685,10 @@ class Actor: and poll() is None ): log.cancel( - 'Root actor reports no-more-peers, BUT ' + 'Root actor reports no-more-peers, BUT\n' 'a DISCONNECTED child still has the debug ' - 'lock!\n' - f'root uid: {self.uid}\n' + 'lock!\n\n' + # f'root uid: {self.uid}\n' f'last disconnected child uid: {uid}\n' f'locking child uid: {pdb_user_uid}\n' ) @@ -705,9 +714,8 @@ class Actor: # if a now stale local task has the TTY lock still # we cancel it to allow servicing other requests for # the lock. - db_cs: trio.CancelScope|None = pdb_lock._root_local_task_cs_in_debug if ( - db_cs + (db_cs := pdb_lock.get_locking_task_cs()) and not db_cs.cancel_called and uid == pdb_user_uid ): @@ -744,7 +752,7 @@ class Actor: except KeyError: log.warning( 'Ignoring invalid IPC ctx msg!\n\n' - f'<= sender: {uid}\n' + f'<= sender: {uid}\n\n' # XXX don't need right since it's always in msg? # f'=> cid: {cid}\n\n' @@ -798,7 +806,7 @@ class Actor: cid, # side, )] - log.runtime( + log.debug( f'Retreived cached IPC ctx for\n' f'peer: {chan.uid}\n' f'cid:{cid}\n' @@ -837,10 +845,14 @@ class Actor: nsf: NamespacePath, kwargs: dict, + # determines `Context.side: str` + portal: Portal|None = None, + # IPC channel config msg_buffer_size: int|None = None, allow_overruns: bool = False, load_nsf: bool = False, + ack_timeout: float = 3, ) -> Context: ''' @@ -865,10 +877,12 @@ class Actor: msg_buffer_size=msg_buffer_size, allow_overruns=allow_overruns, ) + ctx._portal = portal if ( 'self' in nsf - or not load_nsf + or + not load_nsf ): ns, _, func = nsf.partition(':') else: @@ -876,42 +890,29 @@ class Actor: # -[ ] but, how to do `self:`?? ns, func = nsf.to_tuple() + msg = msgtypes.Start( + ns=ns, + func=func, + kwargs=kwargs, + uid=self.uid, + cid=cid, + ) log.runtime( - 'Sending cmd to\n' - f'peer: {chan.uid} => \n' - '\n' - f'=> {ns}.{func}({kwargs})\n' + 'Sending RPC start msg\n\n' + f'=> peer: {chan.uid}\n' + f' |_ {ns}.{func}({kwargs})\n' ) - await chan.send( - msgtypes.Start( - ns=ns, - func=func, - kwargs=kwargs, - uid=self.uid, - cid=cid, - ) - ) - # {'cmd': ( - # ns, - # func, - # kwargs, - # self.uid, - # cid, - # )} - # ) + await chan.send(msg) - # Wait on first response msg and validate; this should be - # immediate. - # first_msg: dict = await ctx._recv_chan.receive() - # functype: str = first_msg.get('functype') - - first_msg: msgtypes.StartAck = await ctx._recv_chan.receive() + # NOTE wait on first `StartAck` response msg and validate; + # this should be immediate and does not (yet) wait for the + # remote child task to sync via `Context.started()`. + with trio.fail_after(ack_timeout): + first_msg: msgtypes.StartAck = await ctx._recv_chan.receive() try: functype: str = first_msg.functype except AttributeError: raise unpack_error(first_msg, chan) - # if 'error' in first_msg: - # raise unpack_error(first_msg, chan) if functype not in ( 'asyncfunc', @@ -919,7 +920,7 @@ class Actor: 'context', ): raise ValueError( - f'{first_msg} is an invalid response packet?' + f'Invalid `StartAck.functype: str = {first_msg!r}` ??' ) ctx._remote_func_type = functype @@ -1164,7 +1165,7 @@ class Actor: # kill any debugger request task to avoid deadlock # with the root actor in this tree - dbcs = _debug.Lock._debugger_request_cs + dbcs = _debug.DebugStatus.req_cs if dbcs is not None: msg += ( '>> Cancelling active debugger request..\n' @@ -1239,9 +1240,9 @@ class Actor: except KeyError: # NOTE: during msging race conditions this will often # emit, some examples: - # - callee returns a result before cancel-msg/ctxc-raised - # - callee self raises ctxc before caller send request, - # - callee errors prior to cancel req. + # - child returns a result before cancel-msg/ctxc-raised + # - child self raises ctxc before parent send request, + # - child errors prior to cancel req. log.cancel( 'Cancel request invalid, RPC task already completed?\n\n' f'<= canceller: {requesting_uid}\n\n' @@ -1304,15 +1305,15 @@ class Actor: flow_info: str = ( f'<= canceller: {requesting_uid}\n' f'=> ipc-parent: {parent_chan}\n' - f' |_{ctx}\n' + f'|_{ctx}\n' ) log.runtime( - 'Waiting on RPC task to cancel\n' + 'Waiting on RPC task to cancel\n\n' f'{flow_info}' ) await is_complete.wait() log.runtime( - f'Sucessfully cancelled RPC task\n' + f'Sucessfully cancelled RPC task\n\n' f'{flow_info}' ) return True @@ -1538,8 +1539,8 @@ async def async_main( ''' # attempt to retreive ``trio``'s sigint handler and stash it - # on our debugger lock state. - _debug.Lock._trio_handler = signal.getsignal(signal.SIGINT) + # on our debugger state. + _debug.DebugStatus._trio_handler = signal.getsignal(signal.SIGINT) is_registered: bool = False try: diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 51e74379..e4ab7d83 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -160,12 +160,6 @@ class Lock: # placeholder for function to set a ``trio.Event`` on debugger exit # pdb_release_hook: Callable | None = None - _trio_handler: ( - Callable[[int, FrameType|None], Any] - |int - | None - ) = None - remote_task_in_debug: str|None = None @staticmethod @@ -188,12 +182,6 @@ class Lock: Lock._locking_task_cs = cs - # SUBACTOR ONLY - # ------ - ------- - local_task_in_debug: Task|None = None - _debugger_request_cs: trio.CancelScope|None = None - local_pdb_complete: trio.Event|None = None - # ROOT ONLY # ------ - ------- # the root-actor-ONLY singletons for, @@ -214,16 +202,12 @@ class Lock: _debug_lock: trio.StrictFIFOLock = trio.StrictFIFOLock() _blocked: set[tuple[str, str]] = set() # `Actor.uid` block list - # TODO: should go on `PbpREPL`? - _orig_sigint_handler: Callable | None = None - @classmethod def repr(cls) -> str: # both root and subs fields: str = ( f'repl: {cls.repl}\n' - f'local_repl_task: {cls.local_task_in_debug}\n' ) if is_root_process(): @@ -238,12 +222,6 @@ class Lock: f'_debug_lock: {cls._debug_lock}\n' f'lock_stats: {lock_stats}\n' ) - else: - fields += ( - f'local_task_in_debug: {cls.local_task_in_debug}\n' - f'local_pdb_complete: {cls.local_pdb_complete}\n' - f'_debugger_request_cs: {cls._debugger_request_cs}\n' - ) body: str = textwrap.indent( fields, @@ -255,7 +233,101 @@ class Lock: ')>' ) - # TODO: move to PdbREPL! + @classmethod + def release(cls): + try: + if not DebugStatus.is_main_trio_thread(): + trio.from_thread.run_sync( + cls._debug_lock.release + ) + else: + cls._debug_lock.release() + + except RuntimeError as rte: + # uhhh makes no sense but been seeing the non-owner + # release error even though this is definitely the task + # that locked? + owner = cls._debug_lock.statistics().owner + # if ( + # owner + # and + # cls.remote_task_in_debug is None + # ): + # raise RuntimeError( + # 'Stale `Lock` detected, no remote task active!?\n' + # f'|_{owner}\n' + # # f'{Lock}' + # ) from rte + + if owner: + raise rte + + # OW suppress, can't member why tho .. XD + # something somethin corrupts a cancel-scope + # somewhere.. + + try: + # sometimes the ``trio`` might already be terminated in + # which case this call will raise. + if DebugStatus.repl_release is not None: + DebugStatus.repl_release.set() + + finally: + cls.repl = None + cls.global_actor_in_debug = None + + # restore original sigint handler + DebugStatus.unshield_sigint() + # actor-local state, irrelevant for non-root. + DebugStatus.repl_task = None + + +# TODO: actually use this instead throughout for subs! +class DebugStatus: + ''' + Singleton-state for debugging machinery in a subactor. + + Composes conc primitives for syncing with a root actor to + acquire the tree-global (TTY) `Lock` such that only ever one + actor's task can have the REPL active at a given time. + + Methods to shield the process' `SIGINT` handler are used + whenever a local task is an active REPL. + + ''' + repl: PdbREPL|None = None + repl_task: Task|None = None + req_cs: trio.CancelScope|None = None + repl_release: trio.Event|None = None + + lock_status: LockStatus|None = None + + _orig_sigint_handler: Callable | None = None + _trio_handler: ( + Callable[[int, FrameType|None], Any] + |int + | None + ) = None + + + @classmethod + def repr(cls) -> str: + fields: str = ( + f'repl: {cls.repl}\n' + f'repl_task: {cls.repl_task}\n' + f'repl_release: {cls.repl_release}\n' + f'req_cs: {cls.req_cs}\n' + ) + body: str = textwrap.indent( + fields, + prefix=' |_', + ) + return ( + f'<{cls.__name__}(\n' + f'{body}' + ')>' + ) + @classmethod def shield_sigint(cls): ''' @@ -339,77 +411,6 @@ class Lock: # is not threading.main_thread() # ) - @classmethod - def release(cls): - try: - if not cls.is_main_trio_thread(): - trio.from_thread.run_sync( - cls._debug_lock.release - ) - else: - cls._debug_lock.release() - - except RuntimeError as rte: - # uhhh makes no sense but been seeing the non-owner - # release error even though this is definitely the task - # that locked? - owner = cls._debug_lock.statistics().owner - # if ( - # owner - # and - # cls.remote_task_in_debug is None - # ): - # raise RuntimeError( - # 'Stale `Lock` detected, no remote task active!?\n' - # f'|_{owner}\n' - # # f'{Lock}' - # ) from rte - - if owner: - raise rte - - # OW suppress, can't member why tho .. XD - # something somethin corrupts a cancel-scope - # somewhere.. - - try: - # sometimes the ``trio`` might already be terminated in - # which case this call will raise. - if cls.local_pdb_complete is not None: - cls.local_pdb_complete.set() - - finally: - # restore original sigint handler - cls.unshield_sigint() - cls.repl = None - - # actor-local state, irrelevant for non-root. - cls.global_actor_in_debug = None - cls.local_task_in_debug = None - - -# TODO: actually use this instead throughout for subs! -class DebugStatus: - ''' - Singleton-state for debugging machinery in a subactor. - - Composes conc primitives for syncing with a root actor to - acquire the tree-global (TTY) `Lock` such that only ever one - actor's task can have the REPL active at a given time. - - ''' - repl: PdbREPL|None = None - lock_status: LockStatus|None = None - - repl_task: Task|None = None - # local_task_in_debug: Task|None = None - - req_cs: trio.CancelScope|None = None - # _debugger_request_cs: trio.CancelScope|None = None - - repl_release: trio.Event|None = None - # local_pdb_complete: trio.Event|None = None - class TractorConfig(pdbp.DefaultConfig): ''' @@ -445,6 +446,7 @@ class PdbREPL(pdbp.Pdb): status = DebugStatus + # def preloop(self): # print('IN PRELOOP') # super().preloop() @@ -660,16 +662,19 @@ async def lock_tty_for_child( highly reliable at releasing the mutex complete! ''' - req_task_uid: tuple = tuple(subactor_task_uid) if req_task_uid in Lock._blocked: raise RuntimeError( + f'Double lock request!?\n' f'The same remote task already has an active request for TTY lock ??\n\n' f'task uid: {req_task_uid}\n' - f'subactor uid: {subactor_uid}\n' - ) + f'subactor uid: {subactor_uid}\n\n' - Lock._blocked.add(req_task_uid) + 'This might be mean that the requesting task ' + 'in `wait_for_parent_stdin_hijack()` may have crashed?\n' + 'Consider that an internal bug exists given the TTY ' + '`Lock`ing IPC dialog..\n' + ) root_task_name: str = current_task().name if tuple(subactor_uid) in Lock._blocked: @@ -695,8 +700,9 @@ async def lock_tty_for_child( f'subactor_uid: {subactor_uid}\n' f'remote task: {subactor_task_uid}\n' ) - Lock.shield_sigint() + DebugStatus.shield_sigint() try: + Lock._blocked.add(req_task_uid) with ( # NOTE: though a cs is created for every subactor lock # REQUEST in this ctx-child task, only the root-task @@ -708,6 +714,9 @@ async def lock_tty_for_child( # used to do so! trio.CancelScope(shield=True) as debug_lock_cs, + # TODO: make this ONLY limit the pld_spec such that we + # can on-error-decode-`.pld: Raw` fields in + # `Context._deliver_msg()`? _codec.limit_msg_spec( payload_spec=__msg_spec__, ) as codec, @@ -763,8 +772,9 @@ async def lock_tty_for_child( finally: debug_lock_cs.cancel() + Lock._blocked.remove(req_task_uid) Lock.set_locking_task_cs(None) - Lock.unshield_sigint() + DebugStatus.unshield_sigint() @cm @@ -817,7 +827,7 @@ async def wait_for_parent_stdin_hijack( trio.CancelScope(shield=True) as cs, apply_debug_codec(), ): - Lock._debugger_request_cs = cs + DebugStatus.req_cs = cs try: # TODO: merge into sync async with ? async with get_root() as portal: @@ -829,7 +839,7 @@ async def wait_for_parent_stdin_hijack( ) as (ctx, resp): log.pdb( - 'Subactor locked TTY per msg\n' + 'Subactor locked TTY with msg\n\n' f'{resp}\n' ) assert resp.subactor_uid == actor_uid @@ -837,12 +847,12 @@ async def wait_for_parent_stdin_hijack( async with ctx.open_stream() as stream: try: # to unblock local caller - assert Lock.local_pdb_complete + assert DebugStatus.repl_release task_status.started(cs) # wait for local task to exit and # release the REPL - await Lock.local_pdb_complete.wait() + await DebugStatus.repl_release.wait() finally: await stream.send( @@ -867,12 +877,12 @@ async def wait_for_parent_stdin_hijack( raise finally: - Lock.local_task_in_debug = None + DebugStatus.repl_task = None log.debug('Exiting debugger TTY lock request func from child') log.cancel('Reverting SIGINT handler!') - Lock.unshield_sigint() + DebugStatus.unshield_sigint() @@ -901,7 +911,7 @@ def mk_mpdb() -> PdbREPL: # in which case schedule the SIGINT shielding override # to in the main thread. # https://docs.python.org/3/library/signal.html#signals-and-threads - Lock.shield_sigint() + DebugStatus.shield_sigint() # XXX: These are the important flags mentioned in # https://github.com/python-trio/trio/issues/1155 @@ -1036,7 +1046,8 @@ def shield_sigint_handler( ) log.warning(message) - Lock.unshield_sigint() + # Lock.unshield_sigint() + DebugStatus.unshield_sigint() case_handled = True else: @@ -1064,7 +1075,7 @@ def shield_sigint_handler( if maybe_stale_lock_cs: lock_cs.cancel() - Lock.unshield_sigint() + DebugStatus.unshield_sigint() case_handled = True # child actor that has locked the debugger @@ -1086,11 +1097,11 @@ def shield_sigint_handler( f'{uid_in_debug}\n' 'Allowing SIGINT propagation..' ) - Lock.unshield_sigint() + DebugStatus.unshield_sigint() # do_cancel() case_handled = True - task: str|None = Lock.local_task_in_debug + task: str|None = DebugStatus.repl_task if ( task and @@ -1124,7 +1135,7 @@ def shield_sigint_handler( + 'Reverting handler to `trio` default!\n' ) - Lock.unshield_sigint() + DebugStatus.unshield_sigint() case_handled = True # XXX ensure that the reverted-to-handler actually is @@ -1200,32 +1211,15 @@ def _set_trace( pdb and actor is not None ) - # or shield ): - msg: str = _pause_msg - if shield: - # log.warning( - msg = ( - '\n\n' - ' ------ - ------\n' - 'Debugger invoked with `shield=True` so an extra\n' - '`trio.CancelScope.__exit__()` frame is shown..\n' - '\n' - 'Try going up one frame to see your pause point!\n' - '\n' - ' SORRY we need to fix this!\n' - ' ------ - ------\n\n' - ) + msg - - # pdbp.set_trace() # TODO: maybe print the actor supervion tree up to the # root here? Bo + log.pdb( - f'{msg}\n' + f'{_pause_msg}\n' '|\n' # TODO: make an `Actor.__repr()__` - # f'|_ {current_task()} @ {actor.name}\n' - f'|_ {current_task()}\n' + f'|_ {current_task()} @ {actor.uid}\n' ) # no f!#$&* idea, but when we're in async land # we need 2x frames up? @@ -1286,11 +1280,11 @@ async def _pause( # task_name: str = task.name if ( - not Lock.local_pdb_complete + not DebugStatus.repl_release or - Lock.local_pdb_complete.is_set() + DebugStatus.repl_release.is_set() ): - Lock.local_pdb_complete = trio.Event() + DebugStatus.repl_release = trio.Event() if debug_func is not None: debug_func = partial(debug_func) @@ -1333,71 +1327,14 @@ async def _pause( Lock.release() raise - except BaseException: - log.exception( - 'Failed to engage debugger via `_pause()` ??\n' - ) - raise + try: + if is_root_process(): - if is_root_process(): - - # we also wait in the root-parent for any child that - # may have the tty locked prior - # TODO: wait, what about multiple root tasks acquiring it though? - if Lock.global_actor_in_debug == actor.uid: - # re-entrant root process already has it: noop. - log.warning( - f'{task.name}@{actor.uid} already has TTY lock\n' - f'ignoring..' - ) - await trio.lowlevel.checkpoint() - return - - # XXX: since we need to enter pdb synchronously below, - # we have to release the lock manually from pdb completion - # callbacks. Can't think of a nicer way then this atm. - if Lock._debug_lock.locked(): - log.warning( - 'attempting to shield-acquire active TTY lock' - f' owned by {Lock.global_actor_in_debug}' - ) - - # must shield here to avoid hitting a ``Cancelled`` and - # a child getting stuck bc we clobbered the tty - with trio.CancelScope(shield=True): - await Lock._debug_lock.acquire() - else: - # may be cancelled - await Lock._debug_lock.acquire() - - Lock.global_actor_in_debug = actor.uid - Lock.local_task_in_debug = task - Lock.repl = pdb - - # enter REPL from root, no TTY locking IPC ctx necessary - _enter_repl_sync(debug_func) - return # next branch is mutex and for subactors - - # TODO: need a more robust check for the "root" actor - elif ( - not is_root_process() - and actor._parent_chan # a connected child - ): - if Lock.local_task_in_debug: - - # Recurrence entry case: this task already has the lock and - # is likely recurrently entering a breakpoint - # - # NOTE: noop on recurrent entry case but we want to trigger - # a checkpoint to allow other actors error-propagate and - # potetially avoid infinite re-entries in some - # subactor that would otherwise not bubble until the - # next checkpoint was hit. - if ( - (repl_task := Lock.local_task_in_debug) - and - repl_task is task - ): + # we also wait in the root-parent for any child that + # may have the tty locked prior + # TODO: wait, what about multiple root tasks acquiring it though? + if Lock.global_actor_in_debug == actor.uid: + # re-entrant root process already has it: noop. log.warning( f'{task.name}@{actor.uid} already has TTY lock\n' f'ignoring..' @@ -1405,79 +1342,137 @@ async def _pause( await trio.lowlevel.checkpoint() return - # if **this** actor is already in debug REPL we want - # to maintain actor-local-task mutex access, so block - # here waiting for the control to be released - this - # -> allows for recursive entries to `tractor.pause()` - log.warning( - f'{task.name}@{actor.uid} already has TTY lock\n' - f'waiting for release..' - ) - await Lock.local_pdb_complete.wait() - await trio.sleep(0.1) - - # mark local actor as "in debug mode" to avoid recurrent - # entries/requests to the root process - Lock.local_task_in_debug = task - - # this **must** be awaited by the caller and is done using the - # root nursery so that the debugger can continue to run without - # being restricted by the scope of a new task nursery. - - # TODO: if we want to debug a trio.Cancelled triggered exception - # we have to figure out how to avoid having the service nursery - # cancel on this task start? I *think* this works below: - # ```python - # actor._service_n.cancel_scope.shield = shield - # ``` - # but not entirely sure if that's a sane way to implement it? - - # NOTE: MUST it here bc multiple tasks are spawned by any - # one sub-actor AND there will be a race between when the - # root locking task delivers the `Started(pld=LockStatus)` - # and when the REPL is actually entered here. SO ensure - # the codec is set before either are run! - # - with ( - # _codec.limit_msg_spec( - # payload_spec=__msg_spec__, - # ) as debug_codec, - trio.CancelScope(shield=shield), - ): - # async with trio.open_nursery() as tn: - # tn.cancel_scope.shield = True - try: - # cs: trio.CancelScope = await tn.start( - cs: trio.CancelScope = await actor._service_n.start( - wait_for_parent_stdin_hijack, - actor.uid, - (task.name, id(task)), + # XXX: since we need to enter pdb synchronously below, + # we have to release the lock manually from pdb completion + # callbacks. Can't think of a nicer way then this atm. + if Lock._debug_lock.locked(): + log.warning( + 'attempting to shield-acquire active TTY lock' + f' owned by {Lock.global_actor_in_debug}' ) - # our locker task should be the one in ctx - # with the root actor - assert Lock._debugger_request_cs is cs - # XXX used by the SIGINT handler to check if - # THIS actor is in REPL interaction - Lock.repl = pdb + # must shield here to avoid hitting a ``Cancelled`` and + # a child getting stuck bc we clobbered the tty + with trio.CancelScope(shield=True): + await Lock._debug_lock.acquire() + else: + # may be cancelled + await Lock._debug_lock.acquire() - except RuntimeError: - Lock.release() + Lock.global_actor_in_debug = actor.uid + DebugStatus.repl_task = task + DebugStatus.repl = Lock.repl = pdb - if actor._cancel_called: - # service nursery won't be usable and we - # don't want to lock up the root either way since - # we're in (the midst of) cancellation. + # enter REPL from root, no TTY locking IPC ctx necessary + _enter_repl_sync(debug_func) + return # next branch is mutex and for subactors + + # TODO: need a more robust check for the "root" actor + elif ( + not is_root_process() + and actor._parent_chan # a connected child + ): + if DebugStatus.repl_task: + + # Recurrence entry case: this task already has the lock and + # is likely recurrently entering a breakpoint + # + # NOTE: noop on recurrent entry case but we want to trigger + # a checkpoint to allow other actors error-propagate and + # potetially avoid infinite re-entries in some + # subactor that would otherwise not bubble until the + # next checkpoint was hit. + if ( + (repl_task := DebugStatus.repl_task) + and + repl_task is task + ): + log.warning( + f'{task.name}@{actor.uid} already has TTY lock\n' + f'ignoring..' + ) + await trio.lowlevel.checkpoint() return - raise + # if **this** actor is already in debug REPL we want + # to maintain actor-local-task mutex access, so block + # here waiting for the control to be released - this + # -> allows for recursive entries to `tractor.pause()` + log.warning( + f'{task.name}@{actor.uid} already has TTY lock\n' + f'waiting for release..' + ) + await DebugStatus.repl_release.wait() + await trio.sleep(0.1) - # enter REPL + # mark local actor as "in debug mode" to avoid recurrent + # entries/requests to the root process + DebugStatus.repl_task = task - try: - _enter_repl_sync(debug_func) - finally: - Lock.unshield_sigint() + # this **must** be awaited by the caller and is done using the + # root nursery so that the debugger can continue to run without + # being restricted by the scope of a new task nursery. + + # TODO: if we want to debug a trio.Cancelled triggered exception + # we have to figure out how to avoid having the service nursery + # cancel on this task start? I *think* this works below: + # ```python + # actor._service_n.cancel_scope.shield = shield + # ``` + # but not entirely sure if that's a sane way to implement it? + + # NOTE: MUST it here bc multiple tasks are spawned by any + # one sub-actor AND there will be a race between when the + # root locking task delivers the `Started(pld=LockStatus)` + # and when the REPL is actually entered here. SO ensure + # the codec is set before either are run! + # + with ( + # _codec.limit_msg_spec( + # payload_spec=__msg_spec__, + # ) as debug_codec, + trio.CancelScope(shield=shield), + ): + # async with trio.open_nursery() as tn: + # tn.cancel_scope.shield = True + try: + # cs: trio.CancelScope = await tn.start( + cs: trio.CancelScope = await actor._service_n.start( + wait_for_parent_stdin_hijack, + actor.uid, + (task.name, id(task)), + ) + # our locker task should be the one in ctx + # with the root actor + assert DebugStatus.req_cs is cs + + # XXX used by the SIGINT handler to check if + # THIS actor is in REPL interaction + Lock.repl = pdb + + except RuntimeError: + Lock.release() + + if actor._cancel_called: + # service nursery won't be usable and we + # don't want to lock up the root either way since + # we're in (the midst of) cancellation. + return + + raise + + # enter REPL + + try: + _enter_repl_sync(debug_func) + finally: + DebugStatus.unshield_sigint() + + except BaseException: + log.exception( + 'Failed to engage debugger via `_pause()` ??\n' + ) + raise # XXX: apparently we can't do this without showing this frame @@ -1527,45 +1522,16 @@ async def pause( ''' __tracebackhide__: bool = True - if shield: - # NOTE XXX: even hard coding this inside the `class CancelScope:` - # doesn't seem to work for me!? - # ^ XXX ^ + with trio.CancelScope( + shield=shield, + ) as cs: - # def _exit(self, *args, **kwargs): - # __tracebackhide__: bool = True - # super().__exit__(*args, **kwargs) - - # trio.CancelScope.__enter__.__tracebackhide__ = True - # trio.CancelScope.__exit__.__tracebackhide__ = True - - # import types - # with trio.CancelScope(shield=shield) as cs: - # cs.__exit__ = types.MethodType(_exit, cs) - # cs.__exit__.__tracebackhide__ = True - - # TODO: LOL, solved this with the `pdb.hideframe` stuff - # at top-of-mod.. so I guess we can just only use this - # block right? - with trio.CancelScope( - shield=shield, - ) as cs: - print(f'debug cs is {cs}\n') - # setattr(cs.__exit__.__func__, '__tracebackhide__', True) - # setattr(cs.__enter__.__func__, '__tracebackhide__', True) - - # NOTE: so the caller can always cancel even if shielded - task_status.started(cs) - return await _pause( - debug_func=debug_func, - shield=True, - task_status=task_status, - **_pause_kwargs - ) - else: + # NOTE: so the caller can always manually cancel even + # if shielded! + task_status.started(cs) return await _pause( debug_func=debug_func, - shield=False, + shield=shield, task_status=task_status, **_pause_kwargs ) @@ -1682,7 +1648,7 @@ def pause_from_sync( ) ) # TODO: maybe the `trio.current_task()` id/name if avail? - Lock.local_task_in_debug: str = str(threading.current_thread()) + DebugStatus.repl_task: str = str(threading.current_thread()) else: # we are presumably the `trio.run()` + main thread greenback.await_( @@ -1692,7 +1658,7 @@ def pause_from_sync( hide_tb=hide_tb, ) ) - Lock.local_task_in_debug: str = current_task() + DebugStatus.repl_task: str = current_task() # TODO: ensure we aggressively make the user aware about # entering the global ``breakpoint()`` built-in from sync @@ -1754,7 +1720,8 @@ def _post_mortem( log.pdb( f'{_crash_msg}\n' '|\n' - f'|_ {current_task()}\n' + # f'|_ {current_task()}\n' + f'|_ {current_task()} @ {actor.uid}\n' # f'|_ @{actor.uid}\n' # TODO: make an `Actor.__repr()__` -- 2.34.1 From 94b735ed967bdcd58b3b9be667fbfb6ef4a28a8b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 18 Apr 2024 15:10:23 -0400 Subject: [PATCH 056/190] `NamespacePath._mk_fqnp()` handle `__mod__` for methods Need to use `__self__.__mod__` in the method case i guess.. --- tractor/msg/ptr.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/tractor/msg/ptr.py b/tractor/msg/ptr.py index 4d089c3e..abe5406e 100644 --- a/tractor/msg/ptr.py +++ b/tractor/msg/ptr.py @@ -76,9 +76,11 @@ class NamespacePath(str): return self._ref @staticmethod - def _mk_fqnp(ref: type | object) -> tuple[str, str]: + def _mk_fqnp( + ref: type|object, + ) -> tuple[str, str]: ''' - Generate a minial ``str`` pair which describes a python + Generate a minial `str` pair which describes a python object's namespace path and object/type name. In more precise terms something like: @@ -87,10 +89,9 @@ class NamespacePath(str): of THIS type XD ''' - if ( - isfunction(ref) - ): + if isfunction(ref): name: str = getattr(ref, '__name__') + mod_name: str = ref.__module__ elif ismethod(ref): # build out the path manually i guess..? @@ -99,15 +100,19 @@ class NamespacePath(str): type(ref.__self__).__name__, ref.__func__.__name__, ]) + mod_name: str = ref.__self__.__module__ else: # object or other? # isinstance(ref, object) # and not isfunction(ref) name: str = type(ref).__name__ + mod_name: str = ref.__module__ + # TODO: return static value direactly? + # # fully qualified namespace path, tuple. fqnp: tuple[str, str] = ( - ref.__module__, + mod_name, name, ) return fqnp @@ -115,7 +120,7 @@ class NamespacePath(str): @classmethod def from_ref( cls, - ref: type | object, + ref: type|object, ) -> NamespacePath: -- 2.34.1 From 2801ccf2294d36bb683dc688f17ed80f479eb183 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 18 Apr 2024 15:17:50 -0400 Subject: [PATCH 057/190] Annotate nursery and portal methods for `CallerInfo` scanning --- tractor/_portal.py | 27 +++++++++++++++++++++------ tractor/_supervise.py | 9 ++++++++- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/tractor/_portal.py b/tractor/_portal.py index e4db93a6..052dd8ef 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -161,17 +161,18 @@ class Portal: self._expect_result = await self.actor.start_remote_task( self.channel, nsf=NamespacePath(f'{ns}:{func}'), - kwargs=kwargs + kwargs=kwargs, + portal=self, ) async def _return_once( self, ctx: Context, - ) -> dict[str, Any]: + ) -> Return: assert ctx._remote_func_type == 'asyncfunc' # single response - msg: dict = await ctx._recv_chan.receive() + msg: Return = await ctx._recv_chan.receive() return msg async def result(self) -> Any: @@ -247,6 +248,8 @@ class Portal: purpose. ''' + __runtimeframe__: int = 1 # noqa + chan: Channel = self.channel if not chan.connected(): log.runtime( @@ -324,16 +327,18 @@ class Portal: internals! ''' + __runtimeframe__: int = 1 # noqa nsf = NamespacePath( f'{namespace_path}:{function_name}' ) - ctx = await self.actor.start_remote_task( + ctx: Context = await self.actor.start_remote_task( chan=self.channel, nsf=nsf, kwargs=kwargs, + portal=self, ) - ctx._portal = self - msg = await self._return_once(ctx) + ctx._portal: Portal = self + msg: Return = await self._return_once(ctx) return _unwrap_msg( msg, self.channel, @@ -384,6 +389,7 @@ class Portal: self.channel, nsf=nsf, kwargs=kwargs, + portal=self, ) ctx._portal = self return _unwrap_msg( @@ -398,6 +404,14 @@ class Portal: **kwargs, ) -> AsyncGenerator[MsgStream, None]: + ''' + Legacy one-way streaming API. + + TODO: re-impl on top `Portal.open_context()` + an async gen + around `Context.open_stream()`. + + ''' + __runtimeframe__: int = 1 # noqa if not inspect.isasyncgenfunction(async_gen_func): if not ( @@ -411,6 +425,7 @@ class Portal: self.channel, nsf=NamespacePath.from_ref(async_gen_func), kwargs=kwargs, + portal=self, ) ctx._portal = self diff --git a/tractor/_supervise.py b/tractor/_supervise.py index be81e4e6..dc65cc65 100644 --- a/tractor/_supervise.py +++ b/tractor/_supervise.py @@ -131,7 +131,12 @@ class ActorNursery: "main task" besides the runtime. ''' - loglevel = loglevel or self._actor.loglevel or get_loglevel() + __runtimeframe__: int = 1 # noqa + loglevel: str = ( + loglevel + or self._actor.loglevel + or get_loglevel() + ) # configure and pass runtime state _rtv = _state._runtime_vars.copy() @@ -209,6 +214,7 @@ class ActorNursery: the actor is terminated. ''' + __runtimeframe__: int = 1 # noqa mod_path: str = fn.__module__ if name is None: @@ -257,6 +263,7 @@ class ActorNursery: directly without any far end graceful ``trio`` cancellation. ''' + __runtimeframe__: int = 1 # noqa self.cancelled = True # TODO: impl a repr for spawn more compact -- 2.34.1 From 34b26862ad6c21e1249b177334b9749441cfc650 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 18 Apr 2024 15:18:29 -0400 Subject: [PATCH 058/190] TOSQUASH 77a15eb use `DebugStatus` in `._rpc` --- tractor/_rpc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 7e259c1e..5970a101 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -810,7 +810,7 @@ async def process_messages( # should use it? # https://github.com/python-trio/trio/issues/467 log.runtime( - 'Entering IPC msg loop:\n' + 'Entering RPC msg loop:\n' f'peer: {chan.uid}\n' f'|_{chan}\n' ) @@ -872,7 +872,7 @@ async def process_messages( # XXX NOTE XXX don't start entire actor # runtime cancellation if this actor is # currently in debug mode! - pdb_complete: trio.Event|None = _debug.Lock.local_pdb_complete + pdb_complete: trio.Event|None = _debug.DebugStatus.repl_release if pdb_complete: await pdb_complete.wait() @@ -1069,7 +1069,7 @@ async def process_messages( log.exception(message) raise RuntimeError(message) - log.runtime( + log.transport( 'Waiting on next IPC msg from\n' f'peer: {chan.uid}\n' f'|_{chan}\n' -- 2.34.1 From 958e91962bd535357c7a5b25bcf2b9a77d745be6 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 18 Apr 2024 15:40:26 -0400 Subject: [PATCH 059/190] Add some `bytes` annots --- tractor/_ipc.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tractor/_ipc.py b/tractor/_ipc.py index a5b44a4e..f76d4ef5 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -314,8 +314,7 @@ class MsgpackTCPStream(MsgTransport): while True: try: - header = await self.recv_stream.receive_exactly(4) - + header: bytes = await self.recv_stream.receive_exactly(4) except ( ValueError, ConnectionResetError, @@ -337,8 +336,7 @@ class MsgpackTCPStream(MsgTransport): size, = struct.unpack(" Date: Thu, 18 Apr 2024 15:41:06 -0400 Subject: [PATCH 060/190] Tweak `current_actor()` failure msg --- tractor/_state.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tractor/_state.py b/tractor/_state.py index b76e8ac9..30346a6a 100644 --- a/tractor/_state.py +++ b/tractor/_state.py @@ -66,7 +66,7 @@ def current_actor( err_on_no_runtime and _current_actor is None ): - msg: str = 'No local actor has been initialized yet' + msg: str = 'No local actor has been initialized yet?\n' from ._exceptions import NoRuntime if last := last_actor(): @@ -79,8 +79,8 @@ def current_actor( # this process. else: msg += ( - 'No last actor found?\n' - 'Did you forget to open one of:\n\n' + # 'No last actor found?\n' + '\nDid you forget to call one of,\n' '- `tractor.open_root_actor()`\n' '- `tractor.open_nursery()`\n' ) -- 2.34.1 From cc69d86baf621d1310c628d3b5bf57a38eaaafcb Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 18 Apr 2024 15:53:34 -0400 Subject: [PATCH 061/190] Proto in new `Context` refinements As per some newly added features and APIs: - pass `portal: Portal` to `Actor.start_remote_task()` from `open_context_from_portal()` marking `Portal.open_context()` as always being the "parent" task side. - add caller tracing via `.devx._code.CallerInfo/.find_caller_info()` called in `mk_context()` and (for now) a `__runtimeframe__: int = 2` inside `open_context_from_portal()` such that any enter-er of `Portal.open_context()` will be reported. - pass in a new `._caller_info` attr which is used in 2 new meths: - `.repr_caller: str` for showing the name of the app-code-func. - `.repr_api: str` for showing the API ep, which for now we just hardcode to `Portal.open_context()` since ow its gonna show the mod func name `open_context_from_portal()`. - use those new props ^ in the `._deliver_msg()` flow body log msg content for much clearer msg-flow tracing Bo - add `Context._cancel_on_msgerr: bool` to toggle whether a delivered `MsgTypeError` should trigger a `._scope.cancel()` call. - also (temporarily) add separate `.cancel()` emissions for both cases as i work through hacking out the maybe `MsgType.pld: Raw` support. --- tractor/_context.py | 147 +++++++++++++++++++++++++++++++++----------- 1 file changed, 111 insertions(+), 36 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 052c198d..9e5fe6c1 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -26,6 +26,7 @@ disjoint, parallel executing tasks in separate actors. from __future__ import annotations from collections import deque from contextlib import asynccontextmanager as acm +from contextvars import ContextVar from dataclasses import ( dataclass, field, @@ -56,6 +57,7 @@ from ._exceptions import ( ) from .log import get_logger from .msg import ( + _codec, Error, MsgType, MsgCodec, @@ -80,6 +82,9 @@ if TYPE_CHECKING: from ._portal import Portal from ._runtime import Actor from ._ipc import MsgTransport + from .devx._code import ( + CallerInfo, + ) log = get_logger(__name__) @@ -499,6 +504,18 @@ class Context: _started_called: bool = False _stream_opened: bool = False _stream: MsgStream|None = None + _pld_codec_var: ContextVar[MsgCodec] = ContextVar( + 'pld_codec', + default=_codec._def_msgspec_codec, # i.e. `Any`-payloads + ) + + @property + def pld_codec(self) -> MsgCodec|None: + return self._pld_codec_var.get() + + # caller of `Portal.open_context()` for + # logging purposes mostly + _caller_info: CallerInfo|None = None # overrun handling machinery # NOTE: none of this provides "backpressure" to the remote @@ -525,6 +542,7 @@ class Context: # TODO: figure out how we can enforce this without losing our minds.. _strict_started: bool = False + _cancel_on_msgerr: bool = True def __str__(self) -> str: ds: str = '=' @@ -857,6 +875,7 @@ class Context: # TODO: never do this right? # if self._remote_error: # return + peer_side: str = self.peer_side(self.side) # XXX: denote and set the remote side's error so that # after we cancel whatever task is the opener of this @@ -864,14 +883,15 @@ class Context: # appropriately. log.runtime( 'Setting remote error for ctx\n\n' - f'<= remote ctx uid: {self.chan.uid}\n' - f'=>{error}' + f'<= {peer_side!r}: {self.chan.uid}\n' + f'=> {self.side!r}\n\n' + f'{error}' ) self._remote_error: BaseException = error # self-cancel (ack) or, # peer propagated remote cancellation. - msgtyperr: bool = False + msgerr: bool = False if isinstance(error, ContextCancelled): whom: str = ( @@ -884,7 +904,7 @@ class Context: ) elif isinstance(error, MsgTypeError): - msgtyperr = True + msgerr = True peer_side: str = self.peer_side(self.side) log.error( f'IPC dialog error due to msg-type caused by {peer_side!r} side\n\n' @@ -935,13 +955,24 @@ class Context: and not self._is_self_cancelled() and not cs.cancel_called and not cs.cancelled_caught - and not msgtyperr + and ( + msgerr + and + # NOTE: allow user to config not cancelling the + # local scope on `MsgTypeError`s + self._cancel_on_msgerr + ) ): # TODO: it'd sure be handy to inject our own # `trio.Cancelled` subtype here ;) # https://github.com/goodboy/tractor/issues/368 + log.cancel('Cancelling local `.open_context()` scope!') self._scope.cancel() + else: + log.cancel('NOT cancelling local `.open_context()` scope!') + + # TODO: maybe we should also call `._res_scope.cancel()` if it # exists to support cancelling any drain loop hangs? # NOTE: this usage actually works here B) @@ -969,9 +1000,7 @@ class Context: dmaddr = dst_maddr @property - def repr_rpc( - self, - ) -> str: + def repr_rpc(self) -> str: # TODO: how to show the transport interchange fmt? # codec: str = self.chan.transport.codec_key outcome_str: str = self.repr_outcome( @@ -983,6 +1012,27 @@ class Context: f'{self._nsf}() -> {outcome_str}:' ) + @property + def repr_caller(self) -> str: + ci: CallerInfo|None = self._caller_info + if ci: + return ( + f'{ci.caller_nsp}()' + # f'|_api: {ci.api_nsp}' + ) + + return '' + + @property + def repr_api(self) -> str: + # ci: CallerInfo|None = self._caller_info + # if ci: + # return ( + # f'{ci.api_nsp}()\n' + # ) + + return 'Portal.open_context()' + async def cancel( self, timeout: float = 0.616, @@ -1187,8 +1237,9 @@ class Context: ) # NOTE: in one way streaming this only happens on the - # caller side inside `Actor.start_remote_task()` so if you try - # to send a stop from the caller to the callee in the + # parent-ctx-task side (on the side that calls + # `Actor.start_remote_task()`) so if you try to send + # a stop from the caller to the callee in the # single-direction-stream case you'll get a lookup error # currently. ctx: Context = actor.get_context( @@ -1853,6 +1904,19 @@ class Context: send_chan: trio.MemorySendChannel = self._send_chan nsf: NamespacePath = self._nsf + side: str = self.side + if side == 'child': + assert not self._portal + peer_side: str = self.peer_side(side) + + flow_body: str = ( + f'<= peer {peer_side!r}: {from_uid}\n' + f' |_<{nsf}()>\n\n' + + f'=> {side!r}: {self._task}\n' + f' |_<{self.repr_api} @ {self.repr_caller}>\n\n' + ) + re: Exception|None if re := unpack_error( msg, @@ -1863,18 +1927,10 @@ class Context: else: log_meth = log.runtime - side: str = self.side - - peer_side: str = self.peer_side(side) - log_meth( f'Delivering IPC ctx error from {peer_side!r} to {side!r} task\n\n' - f'<= peer {peer_side!r}: {from_uid}\n' - f' |_ {nsf}()\n\n' - - f'=> {side!r} cid: {cid}\n' - f' |_{self._task}\n\n' + f'{flow_body}' f'{pformat(re)}\n' ) @@ -1887,30 +1943,27 @@ class Context: # or `RemoteActorError`). self._maybe_cancel_and_set_remote_error(re) - # XXX only case where returning early is fine! + # TODO: expose as mod func instead! structfmt = pretty_struct.Struct.pformat if self._in_overrun: log.warning( - f'Queueing OVERRUN msg on caller task:\n' - f'<= peer: {from_uid}\n' - f' |_ {nsf}()\n\n' + f'Queueing OVERRUN msg on caller task:\n\n' - f'=> cid: {cid}\n' - f' |_{self._task}\n\n' + f'{flow_body}' f'{structfmt(msg)}\n' ) self._overflow_q.append(msg) + + # XXX NOTE XXX + # overrun is the ONLY case where returning early is fine! return False try: log.runtime( f'Delivering msg from IPC ctx:\n\n' - f'<= {from_uid}\n' - f' |_ {nsf}()\n\n' - f'=> {self._task}\n' - f' |_cid={self.cid}\n\n' + f'{flow_body}' f'{structfmt(msg)}\n' ) @@ -1942,6 +1995,7 @@ class Context: f'cid: {self.cid}\n' 'Failed to deliver msg:\n' f'send_chan: {send_chan}\n\n' + f'{pformat(msg)}\n' ) return False @@ -2095,6 +2149,12 @@ async def open_context_from_portal( ''' __tracebackhide__: bool = hide_tb + # denote this frame as a "runtime frame" for stack + # introspection where we report the caller code in logging + # and error message content. + # NOTE: 2 bc of the wrapping `@acm` + __runtimeframe__: int = 2 # noqa + # conduct target func method structural checks if not inspect.iscoroutinefunction(func) and ( getattr(func, '_tractor_contex_function', False) @@ -2122,6 +2182,8 @@ async def open_context_from_portal( nsf=nsf, kwargs=kwargs, + portal=portal, + # NOTE: it's imporant to expose this since you might # get the case where the parent who opened the context does # not open a stream until after some slow startup/init @@ -2132,13 +2194,17 @@ async def open_context_from_portal( # place.. allow_overruns=allow_overruns, ) - # ASAP, so that `Context.side: str` can be determined for - # logging / tracing / debug! - ctx._portal: Portal = portal - assert ctx._remote_func_type == 'context' - msg: Started = await ctx._recv_chan.receive() + assert ctx._caller_info + # XXX NOTE since `._scope` is NOT set BEFORE we retreive the + # `Started`-msg any cancellation triggered + # in `._maybe_cancel_and_set_remote_error()` will + # NOT actually cancel the below line! + # -> it's expected that if there is an error in this phase of + # the dialog, the `Error` msg should be raised from the `msg` + # handling block below. + msg: Started = await ctx._recv_chan.receive() try: # the "first" value here is delivered by the callee's # ``Context.started()`` call. @@ -2148,6 +2214,7 @@ async def open_context_from_portal( # except KeyError as src_error: except AttributeError as src_error: + log.exception('Raising from unexpected msg!\n') _raise_from_no_key_in_msg( ctx=ctx, msg=msg, @@ -2573,7 +2640,6 @@ async def open_context_from_portal( None, ) - def mk_context( chan: Channel, cid: str, @@ -2595,6 +2661,10 @@ def mk_context( recv_chan: trio.MemoryReceiveChannel send_chan, recv_chan = trio.open_memory_channel(msg_buffer_size) + # TODO: only scan caller-info if log level so high! + from .devx._code import find_caller_info + caller_info: CallerInfo|None = find_caller_info() + ctx = Context( chan=chan, cid=cid, @@ -2603,6 +2673,7 @@ def mk_context( _recv_chan=recv_chan, _nsf=nsf, _task=trio.lowlevel.current_task(), + _caller_info=caller_info, **kwargs, ) # TODO: we can drop the old placeholder yah? @@ -2613,7 +2684,11 @@ def mk_context( def context(func: Callable) -> Callable: ''' - Mark an async function as a streaming routine with ``@context``. + Mark an (async) function as an SC-supervised, inter-`Actor`, + child-`trio.Task`, IPC endpoint otherwise known more + colloquially as a (RPC) "context". + + Functions annotated the fundamental IPC endpoint type offered by `tractor`. ''' # TODO: apply whatever solution ``mypy`` ends up picking for this: -- 2.34.1 From ca1d7c28ea645e0b95977a03c7395488159c5092 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 18 Apr 2024 16:24:59 -0400 Subject: [PATCH 062/190] Go back to `ContextVar` for codec mgmt Turns out we do want per-task inheritance particularly if there's to be per `Context` dynamic mutation of the spec; we don't want mutation in some task to affect any parent/global setting. Turns out since we use a common "feeder task" in the rpc loop, we need to offer a per `Context` payload decoder sys anyway in order to enable per-task controls for inter-actor multi-task-ctx scenarios. --- tractor/msg/_codec.py | 126 +++++++++++++++++++++++++----------------- 1 file changed, 75 insertions(+), 51 deletions(-) diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 82fd2011..766a297a 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -33,25 +33,29 @@ from __future__ import annotations from contextlib import ( contextmanager as cm, ) -# from contextvars import ( -# ContextVar, -# Token, -# ) +from contextvars import ( + ContextVar, + Token, +) import textwrap from typing import ( Any, Callable, Type, + TYPE_CHECKING, Union, ) from types import ModuleType import msgspec -from msgspec import msgpack -from trio.lowlevel import ( - RunVar, - RunVarToken, +from msgspec import ( + msgpack, + # Raw, ) +# from trio.lowlevel import ( +# RunVar, +# RunVarToken, +# ) # TODO: see notes below from @mikenerone.. # from tricycle import TreeVar @@ -62,6 +66,9 @@ from tractor.msg.types import ( ) from tractor.log import get_logger +if TYPE_CHECKING: + from tractor._context import Context + log = get_logger(__name__) # TODO: overall IPC msg-spec features (i.e. in this mod)! @@ -157,24 +164,6 @@ class MsgCodec(Struct): lib: ModuleType = msgspec - # TODO: a sub-decoder system as well? - # payload_msg_specs: Union[Type[Struct]] = Any - # see related comments in `.msg.types` - # _payload_decs: ( - # dict[ - # str, - # msgpack.Decoder, - # ] - # |None - # ) = None - # OR - # ) = { - # # pre-seed decoders for std-py-type-set for use when - # # `MsgType.pld == None|Any`. - # None: msgpack.Decoder(Any), - # Any: msgpack.Decoder(Any), - # } - # TODO: use `functools.cached_property` for these ? # https://docs.python.org/3/library/functools.html#functools.cached_property @property @@ -210,7 +199,25 @@ class MsgCodec(Struct): # https://jcristharif.com/msgspec/usage.html#typed-decoding return self._dec.decode(msg) - # TODO: do we still want to try and support the sub-decoder with + # TODO: a sub-decoder system as well? + # payload_msg_specs: Union[Type[Struct]] = Any + # see related comments in `.msg.types` + # _payload_decs: ( + # dict[ + # str, + # msgpack.Decoder, + # ] + # |None + # ) = None + # OR + # ) = { + # # pre-seed decoders for std-py-type-set for use when + # # `MsgType.pld == None|Any`. + # None: msgpack.Decoder(Any), + # Any: msgpack.Decoder(Any), + # } + # + # -[ ] do we still want to try and support the sub-decoder with # `.Raw` technique in the case that the `Generic` approach gives # future grief? # @@ -429,6 +436,9 @@ _def_msgspec_codec: MsgCodec = mk_codec(ipc_pld_spec=Any) # _def_tractor_codec: MsgCodec = mk_codec( ipc_pld_spec=Any, + + # TODO: use this for debug mode locking prot? + # ipc_pld_spec=Raw, ) # TODO: IDEALLY provides for per-`trio.Task` specificity of the # IPC msging codec used by the transport layer when doing @@ -462,11 +472,9 @@ _def_tractor_codec: MsgCodec = mk_codec( # TODO: STOP USING THIS, since it's basically a global and won't # allow sub-IPC-ctxs to limit the msg-spec however desired.. -_ctxvar_MsgCodec: MsgCodec = RunVar( +# _ctxvar_MsgCodec: MsgCodec = RunVar( +_ctxvar_MsgCodec: ContextVar[MsgCodec] = ContextVar( 'msgspec_codec', - - # TODO: move this to our new `Msg`-spec! - # default=_def_msgspec_codec, default=_def_tractor_codec, ) @@ -475,23 +483,36 @@ _ctxvar_MsgCodec: MsgCodec = RunVar( def apply_codec( codec: MsgCodec, + ctx: Context|None = None, + ) -> MsgCodec: ''' - Dynamically apply a `MsgCodec` to the current task's - runtime context such that all IPC msgs are processed - with it for that task. + Dynamically apply a `MsgCodec` to the current task's runtime + context such that all (of a certain class of payload + containing i.e. `MsgType.pld: PayloadT`) IPC msgs are + processed with it for that task. + + Uses a `contextvars.ContextVar` to ensure the scope of any + codec setting matches the current `Context` or + `._rpc.process_messages()` feeder task's prior setting without + mutating any surrounding scope. + + When a `ctx` is supplied, only mod its `Context.pld_codec`. - Uses a `tricycle.TreeVar` to ensure the scope of the codec matches the `@cm` block and DOES NOT change to the original (default) value in new tasks (as it does for `ContextVar`). - See the docs: - - https://tricycle.readthedocs.io/en/latest/reference.html#tree-variables - - https://github.com/oremanj/tricycle/blob/master/tricycle/_tests/test_tree_var.py - ''' __tracebackhide__: bool = True - orig: MsgCodec = _ctxvar_MsgCodec.get() + + if ctx is not None: + var: ContextVar = ctx._var_pld_codec + else: + # use IPC channel-connection "global" codec + var: ContextVar = _ctxvar_MsgCodec + + orig: MsgCodec = var.get() + assert orig is not codec if codec.pld_spec is None: breakpoint() @@ -500,22 +521,25 @@ def apply_codec( 'Applying new msg-spec codec\n\n' f'{codec}\n' ) - token: RunVarToken = _ctxvar_MsgCodec.set(codec) + token: Token = var.set(codec) - # TODO: for TreeVar approach, see docs for @cm `.being()` API: - # https://tricycle.readthedocs.io/en/latest/reference.html#tree-variables - # try: - # with _ctxvar_MsgCodec.being(codec): - # new = _ctxvar_MsgCodec.get() - # assert new is codec - # yield codec + # ?TODO? for TreeVar approach which copies from the + # cancel-scope of the prior value, NOT the prior task + # See the docs: + # - https://tricycle.readthedocs.io/en/latest/reference.html#tree-variables + # - https://github.com/oremanj/tricycle/blob/master/tricycle/_tests/test_tree_var.py + # ^- see docs for @cm `.being()` API + # with _ctxvar_MsgCodec.being(codec): + # new = _ctxvar_MsgCodec.get() + # assert new is codec + # yield codec try: - yield _ctxvar_MsgCodec.get() + yield var.get() finally: - _ctxvar_MsgCodec.reset(token) + var.reset(token) - assert _ctxvar_MsgCodec.get() is orig + assert var.get() is orig log.info( 'Reverted to last msg-spec codec\n\n' f'{orig}\n' -- 2.34.1 From cc9af5758dc99682879e77ae2ac20e3e9e24e1fd Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 21 Apr 2024 17:02:39 -0400 Subject: [PATCH 063/190] Mark `.pld` msgs as also taking `msgspec.Raw` --- tractor/msg/types.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index f7654f62..59ec2a4e 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -38,6 +38,7 @@ from typing import ( from msgspec import ( defstruct, # field, + Raw, Struct, # UNSET, # UnsetType, @@ -105,7 +106,7 @@ class Msg( # TODO: could also be set to `msgspec.Raw` if the sub-decoders # approach is preferred over the generic parameterization # approach as take by `mk_msg_spec()` below. - pld: PayloadT + pld: PayloadT|Raw class Aid( @@ -332,7 +333,7 @@ class Started( decorated IPC endpoint. ''' - pld: PayloadT + pld: PayloadT|Raw # TODO: instead of using our existing `Start` @@ -349,7 +350,7 @@ class Yield( Per IPC transmission of a value from `await MsgStream.send()`. ''' - pld: PayloadT + pld: PayloadT|Raw class Stop( @@ -377,7 +378,7 @@ class Return( func-as-`trio.Task`. ''' - pld: PayloadT + pld: PayloadT|Raw class CancelAck( @@ -710,7 +711,9 @@ def mk_msg_spec( ) return ( ipc_spec, - msgtypes_table[spec_build_method] + ipc_msg_types, + msgtypes_table[spec_build_method] + + + ipc_msg_types, ) -- 2.34.1 From 65e918298b897683428f75b2b6011ae025ea6bb4 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 21 Apr 2024 17:08:27 -0400 Subject: [PATCH 064/190] Drop more `dict`-msg cruft from `._exceptions` --- tractor/_exceptions.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 4ace626f..90163241 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -170,7 +170,7 @@ def pformat_boxed_tb( f' ------ - ------\n\n' # f'{tb_str}\n' f'{tb_body}' - f' ------ - ------\n' + f' ------ - ------\n' f'_|\n' ) tb_box_indent: str = ( @@ -972,8 +972,6 @@ def _raise_from_no_key_in_msg( # an internal error should never get here try: cid: str = msg.cid - # cid: str = msg['cid'] - # except KeyError as src_err: except AttributeError as src_err: raise MessagingError( f'IPC `Context` rx-ed msg without a ctx-id (cid)!?\n' @@ -985,7 +983,6 @@ def _raise_from_no_key_in_msg( # TODO: test that shows stream raising an expected error!!! # raise the error message in a boxed exception type! - # if msg.get('error'): if isinstance(msg, Error): # match msg: # case Error(): @@ -1001,7 +998,6 @@ def _raise_from_no_key_in_msg( # the stream._eoc outside this in the calleer always? # case Stop(): elif ( - # msg.get('stop') isinstance(msg, Stop) or ( stream -- 2.34.1 From dfc92352b301cb1a6e12cf562be7c1ac64aea4ca Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 22 Apr 2024 18:01:09 -0400 Subject: [PATCH 065/190] Move `MsgTypeError` maker func to `._exceptions` Since it's going to be used from the IPC primitive APIs (`Context`/`MsgStream`) for similarly handling payload type spec validation errors and bc it's really not well situation in the IPC module XD Summary of (impl) tweaks: - obvi move `_mk_msg_type_err()` and import and use it in `._ipc`; ends up avoiding a lot of ad-hoc imports we had from `._exceptions` anyway! - mask out "new codec" runtime log emission from `MsgpackTCPStream`. - allow passing a (coming in next commit) `codec: MsgDec` (message decoder) which supports the same required `.pld_spec_str: str` attr. - for send side logging use existing `MsgCodec..pformat_msg_spec()`. - rename `_raise_from_no_key_in_msg()` to the now more appropriate `_raise_from_unexpected_msg()`, but leaving alias for now. --- tractor/_exceptions.py | 136 ++++++++++++++++++++++++++++++++++++++++- tractor/_ipc.py | 130 ++------------------------------------- 2 files changed, 138 insertions(+), 128 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 90163241..b2ba6e84 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -24,6 +24,7 @@ import importlib from pprint import pformat from typing import ( Any, + Callable, Type, TYPE_CHECKING, ) @@ -32,8 +33,11 @@ import traceback import trio from msgspec import ( - structs, defstruct, + msgpack, + Raw, + structs, + ValidationError, ) from tractor._state import current_actor @@ -44,6 +48,8 @@ from tractor.msg import ( Stop, Yield, types as msgtypes, + MsgCodec, + MsgDec, ) from tractor.msg.pretty_struct import ( iter_fields, @@ -932,7 +938,7 @@ def is_multi_cancelled(exc: BaseException) -> bool: return False -def _raise_from_no_key_in_msg( +def _raise_from_unexpected_msg( ctx: Context, msg: MsgType, src_err: AttributeError, @@ -1032,7 +1038,6 @@ def _raise_from_no_key_in_msg( # that arrived which is probably the source of this stream # closure ctx.maybe_raise() - raise eoc from src_err if ( @@ -1052,3 +1057,128 @@ def _raise_from_no_key_in_msg( " BUT received a non-error msg:\n" f'{pformat(msg)}' ) from src_err + + +_raise_from_no_key_in_msg = _raise_from_unexpected_msg + + +def _mk_msg_type_err( + msg: Any|bytes|Raw, + codec: MsgCodec|MsgDec, + + message: str|None = None, + verb_header: str = '', + + src_validation_error: ValidationError|None = None, + src_type_error: TypeError|None = None, + +) -> MsgTypeError: + ''' + Compose a `MsgTypeError` from an input runtime context. + + ''' + # `Channel.send()` case + if src_validation_error is None: + + if isinstance(codec, MsgDec): + raise RuntimeError( + '`codec` must be a `MsgCodec` for send-side errors?' + ) + + # no src error from `msgspec.msgpack.Decoder.decode()` so + # prolly a manual type-check on our part. + if message is None: + fmt_spec: str = codec.pformat_msg_spec() + fmt_stack: str = ( + '\n'.join(traceback.format_stack(limit=3)) + ) + tb_fmt: str = pformat_boxed_tb( + tb_str=fmt_stack, + # fields_str=header, + field_prefix=' ', + indent='', + ) + message: str = ( + f'invalid msg -> {msg}: {type(msg)}\n\n' + f'{tb_fmt}\n' + f'Valid IPC msgs are:\n\n' + # f' ------ - ------\n' + f'{fmt_spec}\n', + ) + elif src_type_error: + src_message: str = str(src_type_error) + patt: str = 'type ' + type_idx: int = src_message.find('type ') + invalid_type: str = src_message[type_idx + len(patt):].split()[0] + + enc_hook: Callable|None = codec.enc.enc_hook + if enc_hook is None: + message += ( + '\n\n' + + f"The current IPC-msg codec can't encode type `{invalid_type}` !\n" + f'Maybe a `msgpack.Encoder.enc_hook()` extension is needed?\n\n' + + f'Check the `msgspec` docs for ad-hoc type extending:\n' + '|_ https://jcristharif.com/msgspec/extending.html\n' + '|_ https://jcristharif.com/msgspec/extending.html#defining-a-custom-extension-messagepack-only\n' + ) + + + msgtyperr = MsgTypeError( + message=message, + ipc_msg=msg, + ) + # ya, might be `None` + msgtyperr.__cause__ = src_type_error + return msgtyperr + + # `Channel.recv()` case + else: + # decode the msg-bytes using the std msgpack + # interchange-prot (i.e. without any + # `msgspec.Struct` handling) so that we can + # determine what `.msg.types.Msg` is the culprit + # by reporting the received value. + msg_dict: dict = msgpack.decode(msg) + msg_type_name: str = msg_dict['msg_type'] + msg_type = getattr(msgtypes, msg_type_name) + message: str = ( + f'invalid `{msg_type_name}` IPC msg\n\n' + ) + if verb_header: + message = f'{verb_header} ' + message + + # XXX see if we can determine the exact invalid field + # such that we can comprehensively report the + # specific field's type problem + msgspec_msg: str = src_validation_error.args[0].rstrip('`') + msg, _, maybe_field = msgspec_msg.rpartition('$.') + obj = object() + if (field_val := msg_dict.get(maybe_field, obj)) is not obj: + field_name_expr: str = ( + f' |_{maybe_field}: {codec.pld_spec_str} = ' + ) + fmt_val_lines: list[str] = pformat(field_val).splitlines() + fmt_val: str = ( + f'{fmt_val_lines[0]}\n' + + + textwrap.indent( + '\n'.join(fmt_val_lines[1:]), + prefix=' '*len(field_name_expr), + ) + ) + message += ( + f'{msg.rstrip("`")}\n\n' + f'<{msg_type.__qualname__}(\n' + # f'{".".join([msg_type.__module__, msg_type.__qualname__])}\n' + f'{field_name_expr}{fmt_val}\n' + f')>' + ) + + msgtyperr = MsgTypeError.from_decode( + message=message, + msgdict=msg_dict, + ) + msgtyperr.__cause__ = src_validation_error + return msgtyperr diff --git a/tractor/_ipc.py b/tractor/_ipc.py index f76d4ef5..70774bed 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -49,6 +49,7 @@ from tractor._exceptions import ( MsgTypeError, pack_from_raise, TransportClosed, + _mk_msg_type_err, ) from tractor.msg import ( _ctxvar_MsgCodec, @@ -118,127 +119,6 @@ class MsgTransport(Protocol[MsgType]): ... -def _mk_msg_type_err( - msg: Any|bytes, - codec: MsgCodec, - - message: str|None = None, - verb_header: str = '', - - src_validation_error: msgspec.ValidationError|None = None, - src_type_error: TypeError|None = None, - -) -> MsgTypeError: - - import textwrap - - # `Channel.send()` case - if src_validation_error is None: # send-side - - # no src error from `msgspec.msgpack.Decoder.decode()` so - # prolly a manual type-check on our part. - if message is None: - import traceback - from tractor._exceptions import pformat_boxed_tb - - fmt_spec: str = '\n'.join( - map(str, codec.msg_spec.__args__) - ) - fmt_stack: str = ( - '\n'.join(traceback.format_stack(limit=3)) - ) - tb_fmt: str = pformat_boxed_tb( - tb_str=fmt_stack, - # fields_str=header, - field_prefix=' ', - indent='', - ) - message: str = ( - f'invalid msg -> {msg}: {type(msg)}\n\n' - f'{tb_fmt}\n' - f'Valid IPC msgs are:\n\n' - # f' ------ - ------\n' - f'{fmt_spec}\n', - ) - elif src_type_error: - src_message: str = str(src_type_error) - patt: str = 'type ' - type_idx: int = src_message.find('type ') - invalid_type: str = src_message[type_idx + len(patt):].split()[0] - - enc_hook: Callable|None = codec.enc.enc_hook - if enc_hook is None: - message += ( - '\n\n' - - f"The current IPC-msg codec can't encode type `{invalid_type}` !\n" - f'Maybe a `msgpack.Encoder.enc_hook()` extension is needed?\n\n' - - f'Check the `msgspec` docs for ad-hoc type extending:\n' - '|_ https://jcristharif.com/msgspec/extending.html\n' - '|_ https://jcristharif.com/msgspec/extending.html#defining-a-custom-extension-messagepack-only\n' - ) - - - msgtyperr = MsgTypeError( - message=message, - ipc_msg=msg, - ) - # ya, might be `None` - msgtyperr.__cause__ = src_type_error - return msgtyperr - - # `Channel.recv()` case - else: - # decode the msg-bytes using the std msgpack - # interchange-prot (i.e. without any - # `msgspec.Struct` handling) so that we can - # determine what `.msg.types.Msg` is the culprit - # by reporting the received value. - msg_dict: dict = msgspec.msgpack.decode(msg) - msg_type_name: str = msg_dict['msg_type'] - msg_type = getattr(msgtypes, msg_type_name) - message: str = ( - f'invalid `{msg_type_name}` IPC msg\n\n' - ) - if verb_header: - message = f'{verb_header} ' + message - - # XXX see if we can determine the exact invalid field - # such that we can comprehensively report the - # specific field's type problem - msgspec_msg: str = src_validation_error.args[0].rstrip('`') - msg, _, maybe_field = msgspec_msg.rpartition('$.') - obj = object() - if (field_val := msg_dict.get(maybe_field, obj)) is not obj: - field_name_expr: str = ( - f' |_{maybe_field}: {codec.pld_spec_str} = ' - ) - fmt_val_lines: list[str] = pformat(field_val).splitlines() - fmt_val: str = ( - f'{fmt_val_lines[0]}\n' - + - textwrap.indent( - '\n'.join(fmt_val_lines[1:]), - prefix=' '*len(field_name_expr), - ) - ) - message += ( - f'{msg.rstrip("`")}\n\n' - f'<{msg_type.__qualname__}(\n' - # f'{".".join([msg_type.__module__, msg_type.__qualname__])}\n' - f'{field_name_expr}{fmt_val}\n' - f')>' - ) - - msgtyperr = MsgTypeError.from_decode( - message=message, - msgdict=msg_dict, - ) - msgtyperr.__cause__ = src_validation_error - return msgtyperr - - # TODO: not sure why we have to inherit here, but it seems to be an # issue with ``get_msg_transport()`` returning a ``Type[Protocol]``; # probably should make a `mypy` issue? @@ -299,10 +179,10 @@ class MsgpackTCPStream(MsgTransport): _codec._ctxvar_MsgCodec.get() ) # TODO: mask out before release? - log.runtime( - f'New {self} created with codec\n' - f'codec: {self._codec}\n' - ) + # log.runtime( + # f'New {self} created with codec\n' + # f'codec: {self._codec}\n' + # ) async def _iter_packets(self) -> AsyncGenerator[dict, None]: ''' -- 2.34.1 From fee20103c62c7511f5ce1f4bdd72b9d7b2f94449 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 22 Apr 2024 18:24:42 -0400 Subject: [PATCH 066/190] Add a `MsgDec` for receive-only decoding In prep for a "payload receiver" abstraction that will wrap `MsgType.pld`-IO delivery from `Context` and `MsgStream`, adds a small `msgspec.msgpack.Decoder` shim which delegates an API similar to `MsgCodec` and is offered via a `.msg._codec.mk_dec()` factory. Detalles: - move over the TODOs/comments from `.msg.types.Start` to to `MsgDec.spec` since it's probably the ideal spot to start thinking about it from a consumer code PoV. - move codec reversion assert and log emit into `finally:` block. - flip default `.types._tractor_codec = mk_codec_ipc_pld(ipc_pld_spec=Raw)` in prep for always doing payload-delayed decodes. - make `MsgCodec._dec` private with public property getter. - change `CancelAck` to NOT derive from `Return` so it's mutex in `match/case:` handling. --- tractor/msg/__init__.py | 2 + tractor/msg/_codec.py | 156 ++++++++++++++++++++++++++++++++++------ tractor/msg/types.py | 33 +-------- 3 files changed, 141 insertions(+), 50 deletions(-) diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index 8f13f5f8..d968f6cf 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -34,6 +34,7 @@ from ._codec import ( apply_codec as apply_codec, mk_codec as mk_codec, MsgCodec as MsgCodec, + MsgDec as MsgDec, current_codec as current_codec, ) @@ -50,6 +51,7 @@ from .types import ( Yield as Yield, Stop as Stop, Return as Return, + CancelAck as CancelAck, Error as Error, diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 766a297a..104f7d99 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -50,7 +50,7 @@ from types import ModuleType import msgspec from msgspec import ( msgpack, - # Raw, + Raw, ) # from trio.lowlevel import ( # RunVar, @@ -71,6 +71,108 @@ if TYPE_CHECKING: log = get_logger(__name__) + +# TODO: unify with `MsgCodec` by making `._dec` part this? +class MsgDec(Struct): + ''' + An IPC msg decoder. + + Normally used to decode only a payload: `MsgType.pld: + PayloadT` field before delivery to IPC consumer code. + + ''' + _dec: msgpack.Decoder + + @property + def dec(self) -> msgpack.Decoder: + return self._dec + + # struct type unions + # https://jcristharif.com/msgspec/structs.html#tagged-unions + # + # ^-TODO-^: make a wrapper type for this such that alt + # backends can be represented easily without a `Union` needed, + # AND so that we have better support for wire transport. + # + # -[ ] maybe `FieldSpec` is a good name since msg-spec + # better applies to a `MsgType[FieldSpec]`? + # + # -[ ] both as part of the `.open_context()` call AND as part of the + # immediate ack-reponse (see similar below) + # we should do spec matching and fail if anything is awry? + # + # -[ ] eventually spec should be generated/parsed from the + # type-annots as # desired in GH issue: + # https://github.com/goodboy/tractor/issues/365 + # + # -[ ] semantics of the mismatch case + # - when caller-callee specs we should raise + # a `MsgTypeError` or `MsgSpecError` or similar? + # + # -[ ] wrapper types for both spec types such that we can easily + # IPC transport them? + # - `TypeSpec: Union[Type]` + # * also a `.__contains__()` for doing `None in + # TypeSpec[None|int]` since rn you need to do it on + # `.__args__` for unions.. + # - `MsgSpec: Union[Type[Msg]] + # + # -[ ] auto-genning this from new (in 3.12) type parameter lists Bo + # |_ https://docs.python.org/3/reference/compound_stmts.html#type-params + # |_ historical pep 695: https://peps.python.org/pep-0695/ + # |_ full lang spec: https://typing.readthedocs.io/en/latest/spec/ + # |_ on annotation scopes: + # https://docs.python.org/3/reference/executionmodel.html#annotation-scopes + # |_ 3.13 will have subscriptable funcs Bo + # https://peps.python.org/pep-0718/ + @property + def spec(self) -> Union[Type[Struct]]: + # NOTE: defined and applied inside `mk_codec()` + return self._dec.type + + # no difference, as compared to a `MsgCodec` which defines the + # `MsgType.pld: PayloadT` part of its spec separately + pld_spec = spec + + # TODO: would get moved into `FieldSpec.__str__()` right? + @property + def spec_str(self) -> str: + + # TODO: could also use match: instead? + spec: Union[Type]|Type = self.spec + + # `typing.Union` case + if getattr(spec, '__args__', False): + return str(spec) + + # just a single type + else: + return spec.__name__ + + pld_spec_str = spec_str + + def decode( + self, + raw: Raw|bytes, + ) -> Any: + return self._dec.decode(raw) + + @property + def hook(self) -> Callable|None: + return self._dec.dec_hook + + +def mk_dec( + spec: Union[Type[Struct]]|Any = Any, + dec_hook: Callable|None = None, + +) -> MsgDec: + + return msgpack.Decoder( + type=spec, # like `Msg[Any]` + dec_hook=dec_hook, + ) + # TODO: overall IPC msg-spec features (i.e. in this mod)! # # -[ ] API changes towards being interchange lib agnostic! @@ -94,8 +196,7 @@ class MsgCodec(Struct): ''' _enc: msgpack.Encoder _dec: msgpack.Decoder - - pld_spec: Union[Type[Struct]]|None + _pld_spec: Type[Struct]|Raw|Any def __repr__(self) -> str: speclines: str = textwrap.indent( @@ -118,14 +219,21 @@ class MsgCodec(Struct): ')>' ) + @property + def pld_spec(self) -> Type[Struct]|Raw|Any: + return self._pld_spec + @property def pld_spec_str(self) -> str: - spec: Union[Type]|Type = self.pld_spec # TODO: could also use match: instead? + spec: Union[Type]|Type = self.pld_spec + + # `typing.Union` case if getattr(spec, '__args__', False): - # `typing.Union` case return str(spec) + + # just a single type else: return spec.__name__ @@ -133,6 +241,7 @@ class MsgCodec(Struct): # https://jcristharif.com/msgspec/structs.html#tagged-unions @property def msg_spec(self) -> Union[Type[Struct]]: + # NOTE: defined and applied inside `mk_codec()` return self._dec.type def msg_spec_items( @@ -157,8 +266,9 @@ class MsgCodec(Struct): def pformat_msg_spec( self, msg: MsgType|None = None, + join_char: str = '\n', ) -> str: - return '\n'.join( + return join_char.join( self.msg_spec_items(msg=msg).values() ) @@ -405,18 +515,25 @@ def mk_codec( assert len(ipc_msg_spec.__args__) == len(msg_types) assert ipc_msg_spec + # TODO: use this shim instead? + # bc.. unification, err somethin? + # dec: MsgDec = mk_dec( + # spec=ipc_msg_spec, + # dec_hook=dec_hook, + # ) + + dec = msgpack.Decoder( + type=ipc_msg_spec, + dec_hook=dec_hook, + ) enc = msgpack.Encoder( enc_hook=enc_hook, ) - dec = msgpack.Decoder( - type=ipc_msg_spec, # like `Msg[Any]` - dec_hook=dec_hook, - ) codec = MsgCodec( _enc=enc, _dec=dec, - pld_spec=ipc_pld_spec, + _pld_spec=ipc_pld_spec, ) # sanity on expected backend support @@ -435,10 +552,9 @@ _def_msgspec_codec: MsgCodec = mk_codec(ipc_pld_spec=Any) # https://jcristharif.com/msgspec/supported-types.html # _def_tractor_codec: MsgCodec = mk_codec( - ipc_pld_spec=Any, - # TODO: use this for debug mode locking prot? - # ipc_pld_spec=Raw, + # ipc_pld_spec=Any, + ipc_pld_spec=Raw, ) # TODO: IDEALLY provides for per-`trio.Task` specificity of the # IPC msging codec used by the transport layer when doing @@ -538,12 +654,12 @@ def apply_codec( yield var.get() finally: var.reset(token) + log.info( + 'Reverted to last msg-spec codec\n\n' + f'{orig}\n' + ) + assert var.get() is orig - assert var.get() is orig - log.info( - 'Reverted to last msg-spec codec\n\n' - f'{orig}\n' - ) def current_codec() -> MsgCodec: ''' @@ -574,7 +690,7 @@ def limit_msg_spec( ''' __tracebackhide__: bool = True - curr_codec = current_codec() + curr_codec: MsgCodec = current_codec() msgspec_codec: MsgCodec = mk_codec( ipc_pld_spec=payload_spec, **codec_kwargs, diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 59ec2a4e..cb124324 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -266,35 +266,7 @@ class Start( # TODO: enforcing a msg-spec in terms `Msg.pld` # parameterizable msgs to be used in the appls IPC dialog. - # - # -[ ] both as part of the `.open_context()` call AND as part of the - # immediate ack-reponse (see similar below) - # we should do spec matching and fail if anything is awry? - # - # -[ ] eventually spec should be generated/parsed from the - # type-annots as # desired in GH issue: - # https://github.com/goodboy/tractor/issues/365 - # - # -[ ] semantics of the mismatch case - # - when caller-callee specs we should raise - # a `MsgTypeError` or `MsgSpecError` or similar? - # - # -[ ] wrapper types for both spec types such that we can easily - # IPC transport them? - # - `TypeSpec: Union[Type]` - # * also a `.__contains__()` for doing `None in - # TypeSpec[None|int]` since rn you need to do it on - # `.__args__` for unions.. - # - `MsgSpec: Union[Type[Msg]] - # - # -[ ] auto-genning this from new (in 3.12) type parameter lists Bo - # |_ https://docs.python.org/3/reference/compound_stmts.html#type-params - # |_ historical pep 695: https://peps.python.org/pep-0695/ - # |_ full lang spec: https://typing.readthedocs.io/en/latest/spec/ - # |_ on annotation scopes: - # https://docs.python.org/3/reference/executionmodel.html#annotation-scopes - # |_ 3.13 will have subscriptable funcs Bo - # https://peps.python.org/pep-0718/ + # => SEE `._codec.MsgDec` for more <= pld_spec: str = str(Any) @@ -382,7 +354,8 @@ class Return( class CancelAck( - Return, + Msg, + Generic[PayloadT], ): ''' Deliver the `bool` return-value from a cancellation `Actor` -- 2.34.1 From e4e04c516f029834c315cf8df7eaa24e21167aa0 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 23 Apr 2024 17:43:45 -0400 Subject: [PATCH 067/190] First draft "payload receiver in a new `.msg._ops` As per much tinkering, re-designs and preceding rubber-ducking via many "commit msg novelas", **finally** this adds the (hopefully) final missing layer for typed msg safety: `tractor.msg._ops.PldRx` (or `PayloadReceiver`? haven't decided how verbose to go..) Design justification summary: ------ - ------ - need a way to be as-close-as-possible to the `tractor`-application such that when `MsgType.pld: PayloadT` validation takes place, it is straightforward and obvious how user code can decide to handle any resulting `MsgTypeError`. - there should be a common and optional-yet-modular way to modify **how** data delivered via IPC (possibly embedded as user defined, type-constrained `.pld: msgspec.Struct`s) can be handled and processed during fault conditions and/or IPC "msg attacks". - support for nested type constraints within a `MsgType.pld` field should be simple to define, implement and understand at runtime. - a layer between the app-level IPC primitive APIs (`Context`/`MsgStream`) and application-task code (consumer code of those APIs) should be easily customized and prove-to-be-as-such through demonstrably rigorous internal (sub-sys) use! -> eg. via seemless runtime RPC eps support like `Actor.cancel()` -> by correctly implementing our `.devx._debug.Lock` REPL TTY mgmt dialog prot, via a dead simple payload-as-ctl-msg-spec. There are some fairly detailed doc strings included so I won't duplicate that content, the majority of the work here is actually somewhat of a factoring of many similar blocks that are doing more or less the same `msg = await Context._rx_chan.receive()` with boilerplate for `Error`/`Stop` handling via `_raise_from_no_key_in_msg()`. The new `PldRx` basically provides a shim layer for this common "receive msg, decode its payload, yield it up to the consuming app task" by pairing the RPC feeder mem-chan with a msg-payload decoder and expecting IPC API internals to use **one** API instead of re-implementing the same pattern all over the place XD `PldRx` breakdown ------ - ------ - for now only expects a `._msgdec: MsgDec` which allows for override-able `MsgType.pld` validation and most obviously used in the impl of `.dec_msg()`, the decode message method. - provides multiple mem-chan receive options including: |_ `.recv_pld()` which does the e2e operation of receiving a payload item. |_ a sync `.recv_pld_nowait()` version. |_ a `.recv_msg_w_pld()` which optionally allows retreiving both the shuttling `MsgType` as well as it's `.pld` body for use cases where info on both is important (eg. draining a `MsgStream`). Dirty internal changeover/implementation deatz: ------ - ------ - obvi move over all the IPC "primitives" that previously had the duplicate recv-n-yield logic: - `MsgStream.receive[_nowait]()` delegating instead to the equivalent `PldRx.recv_pld[_nowait]()`. - add `Context._pld_rx: PldRx`, created and passed in by `mk_context()`; use it for the `.started()` -> `first: Started` retrieval inside `open_context_from_portal()`. - all the relevant `Portal` invocation methods: `.result()`, `.run_from_ns()`, `.run()`; also allows for dropping `_unwrap_msg()` and `.Portal_return_once()` outright Bo - rename `Context.ctx._recv_chan` -> `._rx_chan`. - add detailed `Context._scope` info for logging whether or not it's cancelled inside `_maybe_cancel_and_set_remote_error()`. - move `._context._drain_to_final_msg()` -> `._ops.drain_to_final_msg()` since it's really not necessarily ctx specific per say, and it does kinda fit with "msg operations" more abstractly ;) --- tractor/_context.py | 438 +++++++------------------------- tractor/_portal.py | 127 +++++----- tractor/_runtime.py | 6 +- tractor/_streaming.py | 71 ++---- tractor/msg/_ops.py | 563 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 738 insertions(+), 467 deletions(-) create mode 100644 tractor/msg/_ops.py diff --git a/tractor/_context.py b/tractor/_context.py index 9e5fe6c1..f0fc966e 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -25,26 +25,31 @@ disjoint, parallel executing tasks in separate actors. ''' from __future__ import annotations from collections import deque -from contextlib import asynccontextmanager as acm -from contextvars import ContextVar +from contextlib import ( + asynccontextmanager as acm, +) from dataclasses import ( dataclass, field, ) from functools import partial import inspect -import msgspec from pprint import pformat from typing import ( Any, Callable, AsyncGenerator, + Type, TYPE_CHECKING, + Union, ) import warnings - +# ------ - ------ import trio - +from msgspec import ( + ValidationError, +) +# ------ - ------ from ._exceptions import ( ContextCancelled, InternalError, @@ -53,7 +58,6 @@ from ._exceptions import ( StreamOverrun, pack_from_raise, unpack_error, - _raise_from_no_key_in_msg, ) from .log import get_logger from .msg import ( @@ -70,8 +74,12 @@ from .msg import ( current_codec, pretty_struct, types as msgtypes, + _ops as msgops, +) +from ._ipc import ( + Channel, + _mk_msg_type_err, ) -from ._ipc import Channel from ._streaming import MsgStream from ._state import ( current_actor, @@ -86,294 +94,9 @@ if TYPE_CHECKING: CallerInfo, ) - log = get_logger(__name__) -async def _drain_to_final_msg( - ctx: Context, - - hide_tb: bool = True, - msg_limit: int = 6, - -) -> tuple[ - Return|None, - list[MsgType] -]: - ''' - Drain IPC msgs delivered to the underlying rx-mem-chan - `Context._recv_chan` from the runtime in search for a final - result or error msg. - - The motivation here is to ideally capture errors during ctxc - conditions where a canc-request/or local error is sent but the - local task also excepts and enters the - `Portal.open_context().__aexit__()` block wherein we prefer to - capture and raise any remote error or ctxc-ack as part of the - `ctx.result()` cleanup and teardown sequence. - - ''' - __tracebackhide__: bool = hide_tb - raise_overrun: bool = not ctx._allow_overruns - - # wait for a final context result by collecting (but - # basically ignoring) any bi-dir-stream msgs still in transit - # from the far end. - pre_result_drained: list[MsgType] = [] - return_msg: Return|None = None - while not ( - ctx.maybe_error - and not ctx._final_result_is_set() - ): - try: - # TODO: can remove? - # await trio.lowlevel.checkpoint() - - # NOTE: this REPL usage actually works here dawg! Bo - # from .devx._debug import pause - # await pause() - - # TODO: bad idea? - # -[ ] wrap final outcome channel wait in a scope so - # it can be cancelled out of band if needed? - # - # with trio.CancelScope() as res_cs: - # ctx._res_scope = res_cs - # msg: dict = await ctx._recv_chan.receive() - # if res_cs.cancelled_caught: - - # TODO: ensure there's no more hangs, debugging the - # runtime pretty preaase! - # from .devx._debug import pause - # await pause() - - # TODO: can remove this finally? - # we have no more need for the sync draining right - # since we're can kinda guarantee the async - # `.receive()` below will never block yah? - # - # if ( - # ctx._cancel_called and ( - # ctx.cancel_acked - # # or ctx.chan._cancel_called - # ) - # # or not ctx._final_result_is_set() - # # ctx.outcome is not - # # or ctx.chan._closed - # ): - # try: - # msg: dict = await ctx._recv_chan.receive_nowait()() - # except trio.WouldBlock: - # log.warning( - # 'When draining already `.cancel_called` ctx!\n' - # 'No final msg arrived..\n' - # ) - # break - # else: - # msg: dict = await ctx._recv_chan.receive() - - # TODO: don't need it right jefe? - # with trio.move_on_after(1) as cs: - # if cs.cancelled_caught: - # from .devx._debug import pause - # await pause() - - # pray to the `trio` gawds that we're corrent with this - # msg: dict = await ctx._recv_chan.receive() - msg: MsgType = await ctx._recv_chan.receive() - - # NOTE: we get here if the far end was - # `ContextCancelled` in 2 cases: - # 1. we requested the cancellation and thus - # SHOULD NOT raise that far end error, - # 2. WE DID NOT REQUEST that cancel and thus - # SHOULD RAISE HERE! - except trio.Cancelled: - - # CASE 2: mask the local cancelled-error(s) - # only when we are sure the remote error is - # the source cause of this local task's - # cancellation. - ctx.maybe_raise() - - # CASE 1: we DID request the cancel we simply - # continue to bubble up as normal. - raise - - match msg: - - # final result arrived! - case Return( - # cid=cid, - pld=res, - ): - ctx._result: Any = res - log.runtime( - 'Context delivered final draining msg:\n' - f'{pformat(msg)}' - ) - # XXX: only close the rx mem chan AFTER - # a final result is retreived. - # if ctx._recv_chan: - # await ctx._recv_chan.aclose() - # TODO: ^ we don't need it right? - return_msg = msg - break - - # far end task is still streaming to us so discard - # and report depending on local ctx state. - case Yield(): - pre_result_drained.append(msg) - if ( - (ctx._stream.closed - and (reason := 'stream was already closed') - ) - or (ctx.cancel_acked - and (reason := 'ctx cancelled other side') - ) - or (ctx._cancel_called - and (reason := 'ctx called `.cancel()`') - ) - or (len(pre_result_drained) > msg_limit - and (reason := f'"yield" limit={msg_limit}') - ) - ): - log.cancel( - 'Cancelling `MsgStream` drain since ' - f'{reason}\n\n' - f'<= {ctx.chan.uid}\n' - f' |_{ctx._nsf}()\n\n' - f'=> {ctx._task}\n' - f' |_{ctx._stream}\n\n' - - f'{pformat(msg)}\n' - ) - return ( - return_msg, - pre_result_drained, - ) - - # drain up to the `msg_limit` hoping to get - # a final result or error/ctxc. - else: - log.warning( - 'Ignoring "yield" msg during `ctx.result()` drain..\n' - f'<= {ctx.chan.uid}\n' - f' |_{ctx._nsf}()\n\n' - f'=> {ctx._task}\n' - f' |_{ctx._stream}\n\n' - - f'{pformat(msg)}\n' - ) - continue - - # stream terminated, but no result yet.. - # - # TODO: work out edge cases here where - # a stream is open but the task also calls - # this? - # -[ ] should be a runtime error if a stream is open right? - # Stop() - case Stop(): - pre_result_drained.append(msg) - log.cancel( - 'Remote stream terminated due to "stop" msg:\n\n' - f'{pformat(msg)}\n' - ) - continue - - # remote error msg, likely already handled inside - # `Context._deliver_msg()` - case Error(): - # TODO: can we replace this with `ctx.maybe_raise()`? - # -[ ] would this be handier for this case maybe? - # async with maybe_raise_on_exit() as raises: - # if raises: - # log.error('some msg about raising..') - # - re: Exception|None = ctx._remote_error - if re: - assert msg is ctx._cancel_msg - # NOTE: this solved a super duper edge case XD - # this was THE super duper edge case of: - # - local task opens a remote task, - # - requests remote cancellation of far end - # ctx/tasks, - # - needs to wait for the cancel ack msg - # (ctxc) or some result in the race case - # where the other side's task returns - # before the cancel request msg is ever - # rxed and processed, - # - here this surrounding drain loop (which - # iterates all ipc msgs until the ack or - # an early result arrives) was NOT exiting - # since we are the edge case: local task - # does not re-raise any ctxc it receives - # IFF **it** was the cancellation - # requester.. - # - # XXX will raise if necessary but ow break - # from loop presuming any supressed error - # (ctxc) should terminate the context! - ctx._maybe_raise_remote_err( - re, - # NOTE: obvi we don't care if we - # overran the far end if we're already - # waiting on a final result (msg). - # raise_overrun_from_self=False, - raise_overrun_from_self=raise_overrun, - ) - - break # OOOOOF, yeah obvi we need this.. - - # XXX we should never really get here - # right! since `._deliver_msg()` should - # always have detected an {'error': ..} - # msg and already called this right!?! - elif error := unpack_error( - msg=msg, - chan=ctx._portal.channel, - hide_tb=False, - ): - log.critical('SHOULD NEVER GET HERE!?') - assert msg is ctx._cancel_msg - assert error.msgdata == ctx._remote_error.msgdata - assert error.ipc_msg == ctx._remote_error.ipc_msg - from .devx._debug import pause - await pause() - ctx._maybe_cancel_and_set_remote_error(error) - ctx._maybe_raise_remote_err(error) - - else: - # bubble the original src key error - raise - - # XXX should pretty much never get here unless someone - # overrides the default `MsgType` spec. - case _: - pre_result_drained.append(msg) - # It's definitely an internal error if any other - # msg type without a`'cid'` field arrives here! - if not msg.cid: - raise InternalError( - 'Unexpected cid-missing msg?\n\n' - f'{msg}\n' - ) - - raise RuntimeError('Unknown msg type: {msg}') - - else: - log.cancel( - 'Skipping `MsgStream` drain since final outcome is set\n\n' - f'{ctx.outcome}\n' - ) - - return ( - return_msg, - pre_result_drained, - ) - - class Unresolved: ''' Placeholder value for `Context._result` until @@ -423,9 +146,12 @@ class Context: # the "feeder" channels for delivering message values to the # local task from the runtime's msg processing loop. - _recv_chan: trio.MemoryReceiveChannel + _rx_chan: trio.MemoryReceiveChannel _send_chan: trio.MemorySendChannel + # payload receiver + _pld_rx: msgops.PldRx + # full "namespace-path" to target RPC function _nsf: NamespacePath @@ -447,7 +173,7 @@ class Context: _task: trio.lowlevel.Task|None = None # TODO: cs around result waiting so we can cancel any - # permanently blocking `._recv_chan.receive()` call in + # permanently blocking `._rx_chan.receive()` call in # a drain loop? # _res_scope: trio.CancelScope|None = None @@ -504,14 +230,6 @@ class Context: _started_called: bool = False _stream_opened: bool = False _stream: MsgStream|None = None - _pld_codec_var: ContextVar[MsgCodec] = ContextVar( - 'pld_codec', - default=_codec._def_msgspec_codec, # i.e. `Any`-payloads - ) - - @property - def pld_codec(self) -> MsgCodec|None: - return self._pld_codec_var.get() # caller of `Portal.open_context()` for # logging purposes mostly @@ -916,9 +634,8 @@ class Context: else: log.error( f'Remote context error:\n\n' - + # f'{pformat(self)}\n' f'{error}\n' - f'{pformat(self)}\n' ) # always record the cancelling actor's uid since its @@ -955,24 +672,49 @@ class Context: and not self._is_self_cancelled() and not cs.cancel_called and not cs.cancelled_caught - and ( - msgerr - and - # NOTE: allow user to config not cancelling the - # local scope on `MsgTypeError`s - self._cancel_on_msgerr - ) ): - # TODO: it'd sure be handy to inject our own - # `trio.Cancelled` subtype here ;) - # https://github.com/goodboy/tractor/issues/368 - log.cancel('Cancelling local `.open_context()` scope!') - self._scope.cancel() + if not ( + msgerr + # NOTE: we allow user to config not cancelling the + # local scope on `MsgTypeError`s + and not self._cancel_on_msgerr + ): + # TODO: it'd sure be handy to inject our own + # `trio.Cancelled` subtype here ;) + # https://github.com/goodboy/tractor/issues/368 + message: str = 'Cancelling `Context._scope` !\n\n' + self._scope.cancel() + + else: + message: str = ( + 'NOT Cancelling `Context._scope` since,\n' + f'Context._cancel_on_msgerr = {self._cancel_on_msgerr}\n\n' + f'AND we got a msg-type-error!\n' + f'{error}\n' + ) else: - log.cancel('NOT cancelling local `.open_context()` scope!') + message: str = 'NOT cancelling `Context._scope` !\n\n' + scope_info: str = 'No `self._scope: CancelScope` was set/used ?' + if cs: + scope_info: str = ( + f'self._scope: {cs}\n' + f'|_ .cancel_called: {cs.cancel_called}\n' + f'|_ .cancelled_caught: {cs.cancelled_caught}\n' + f'|_ ._cancel_status: {cs._cancel_status}\n\n' + f'{self}\n' + f'|_ ._is_self_cancelled(): {self._is_self_cancelled()}\n' + f'|_ ._cancel_on_msgerr: {self._cancel_on_msgerr}\n\n' + + f'msgerr: {msgerr}\n' + ) + log.cancel( + message + + + f'{scope_info}' + ) # TODO: maybe we should also call `._res_scope.cancel()` if it # exists to support cancelling any drain loop hangs? # NOTE: this usage actually works here B) @@ -1259,7 +1001,7 @@ class Context: # a ``.open_stream()`` block prior or there was some other # unanticipated error or cancellation from ``trio``. - if ctx._recv_chan._closed: + if ctx._rx_chan._closed: raise trio.ClosedResourceError( 'The underlying channel for this stream was already closed!\n' ) @@ -1279,7 +1021,7 @@ class Context: # stream WAS NOT just closed normally/gracefully. async with MsgStream( ctx=self, - rx_chan=ctx._recv_chan, + rx_chan=ctx._rx_chan, ) as stream: # NOTE: we track all existing streams per portal for @@ -1430,13 +1172,12 @@ class Context: # boxed `StreamOverrun`. This is mostly useful for # supressing such faults during # cancellation/error/final-result handling inside - # `_drain_to_final_msg()` such that we do not + # `msg._ops.drain_to_final_msg()` such that we do not # raise such errors particularly in the case where # `._cancel_called == True`. not raise_overrun_from_self and isinstance(remote_error, RemoteActorError) - - and remote_error.boxed_type_str == 'StreamOverrun' + and remote_error.boxed_type is StreamOverrun # and tuple(remote_error.msgdata['sender']) == our_uid and tuple(remote_error.sender) == our_uid @@ -1506,12 +1247,12 @@ class Context: if self._final_result_is_set(): return self._result - assert self._recv_chan + assert self._rx_chan raise_overrun: bool = not self._allow_overruns if ( self.maybe_error is None and - not self._recv_chan._closed # type: ignore + not self._rx_chan._closed # type: ignore ): # wait for a final context result/error by "draining" # (by more or less ignoring) any bi-dir-stream "yield" @@ -1519,7 +1260,7 @@ class Context: ( return_msg, drained_msgs, - ) = await _drain_to_final_msg( + ) = await msgops.drain_to_final_msg( ctx=self, hide_tb=hide_tb, ) @@ -1805,8 +1546,7 @@ class Context: await self.chan.send(started_msg) # raise any msg type error NO MATTER WHAT! - except msgspec.ValidationError as verr: - from tractor._ipc import _mk_msg_type_err + except ValidationError as verr: raise _mk_msg_type_err( msg=msg_bytes, codec=codec, @@ -1893,7 +1633,7 @@ class Context: - NEVER `return` early before delivering the msg! bc if the error is a ctxc and there is a task waiting on `.result()` we need the msg to be - `send_chan.send_nowait()`-ed over the `._recv_chan` so + `send_chan.send_nowait()`-ed over the `._rx_chan` so that the error is relayed to that waiter task and thus raised in user code! @@ -2204,24 +1944,11 @@ async def open_context_from_portal( # -> it's expected that if there is an error in this phase of # the dialog, the `Error` msg should be raised from the `msg` # handling block below. - msg: Started = await ctx._recv_chan.receive() - try: - # the "first" value here is delivered by the callee's - # ``Context.started()`` call. - # first: Any = msg['started'] - first: Any = msg.pld - ctx._started_called: bool = True - - # except KeyError as src_error: - except AttributeError as src_error: - log.exception('Raising from unexpected msg!\n') - _raise_from_no_key_in_msg( - ctx=ctx, - msg=msg, - src_err=src_error, - log=log, - expect_msg=Started, - ) + first: Any = await ctx._pld_rx.recv_pld( + ctx=ctx, + expect_msg=Started, + ) + ctx._started_called: bool = True uid: tuple = portal.channel.uid cid: str = ctx.cid @@ -2543,7 +2270,7 @@ async def open_context_from_portal( # we tear down the runtime feeder chan last # to avoid premature stream clobbers. if ( - (rxchan := ctx._recv_chan) + (rxchan := ctx._rx_chan) # maybe TODO: yes i know the below check is # touching `trio` memchan internals..BUT, there are @@ -2586,7 +2313,7 @@ async def open_context_from_portal( # underlying feeder channel is # once-and-only-CLOSED! with trio.CancelScope(shield=True): - await ctx._recv_chan.aclose() + await ctx._rx_chan.aclose() # XXX: we always raise remote errors locally and # generally speaking mask runtime-machinery related @@ -2631,9 +2358,9 @@ async def open_context_from_portal( # FINALLY, remove the context from runtime tracking and # exit! log.runtime( - 'Removing IPC ctx opened with peer\n' - f'{uid}\n' - f'|_{ctx}\n' + 'De-allocating IPC ctx opened with {ctx.side!r} peer \n' + f'uid: {uid}\n' + f'cid: {ctx.cid}\n' ) portal.actor._contexts.pop( (uid, cid), @@ -2646,6 +2373,7 @@ def mk_context( nsf: NamespacePath, msg_buffer_size: int = 2**6, + pld_spec: Union[Type] = Any, **kwargs, @@ -2665,12 +2393,18 @@ def mk_context( from .devx._code import find_caller_info caller_info: CallerInfo|None = find_caller_info() + pld_rx = msgops.PldRx( + # _rx_mc=recv_chan, + _msgdec=_codec.mk_dec(spec=pld_spec) + ) + ctx = Context( chan=chan, cid=cid, _actor=current_actor(), _send_chan=send_chan, - _recv_chan=recv_chan, + _rx_chan=recv_chan, + _pld_rx=pld_rx, _nsf=nsf, _task=trio.lowlevel.current_task(), _caller_info=caller_info, diff --git a/tractor/_portal.py b/tractor/_portal.py index 052dd8ef..97268972 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -31,7 +31,7 @@ from typing import ( Any, Callable, AsyncGenerator, - # Type, + TYPE_CHECKING, ) from functools import partial from dataclasses import dataclass @@ -46,12 +46,12 @@ from ._state import ( from ._ipc import Channel from .log import get_logger from .msg import ( - Error, + # Error, NamespacePath, Return, ) from ._exceptions import ( - unpack_error, + # unpack_error, NoResult, ) from ._context import ( @@ -62,42 +62,44 @@ from ._streaming import ( MsgStream, ) +if TYPE_CHECKING: + from ._runtime import Actor log = get_logger(__name__) -# TODO: rename to `unwrap_result()` and use -# `._raise_from_no_key_in_msg()` (after tweak to -# accept a `chan: Channel` arg) in key block! -def _unwrap_msg( - msg: Return|Error, - channel: Channel, +# TODO: remove and/or rework? +# -[ ] rename to `unwrap_result()` and use +# `._raise_from_unexpected_msg()` (after tweak to accept a `chan: +# Channel` arg) in key block?? +# -[ ] pretty sure this is entirely covered by +# `_exceptions._raise_from_unexpected_msg()` so REMOVE! +# def _unwrap_msg( +# msg: Return|Error, +# ctx: Context, - hide_tb: bool = True, +# hide_tb: bool = True, -) -> Any: - ''' - Unwrap a final result from a `{return: }` IPC msg. +# ) -> Any: +# ''' +# Unwrap a final result from a `{return: }` IPC msg. - ''' - __tracebackhide__: bool = hide_tb +# ''' +# __tracebackhide__: bool = hide_tb +# try: +# return msg.pld +# except AttributeError as err: - try: - return msg.pld - # return msg['return'] - # except KeyError as ke: - except AttributeError as err: +# # internal error should never get here +# # assert msg.get('cid'), ( +# assert msg.cid, ( +# "Received internal error at portal?" +# ) - # internal error should never get here - # assert msg.get('cid'), ( - assert msg.cid, ( - "Received internal error at portal?" - ) - - raise unpack_error( - msg, - channel - ) from err +# raise unpack_error( +# msg, +# ctx.chan, +# ) from err class Portal: @@ -123,17 +125,21 @@ class Portal: # connected (peer) actors. cancel_timeout: float = 0.5 - def __init__(self, channel: Channel) -> None: + def __init__( + self, + channel: Channel, + ) -> None: + self.chan = channel # during the portal's lifetime - self._result_msg: dict|None = None + self._final_result: Any|None = None # When set to a ``Context`` (when _submit_for_result is called) # it is expected that ``result()`` will be awaited at some # point. - self._expect_result: Context | None = None + self._expect_result_ctx: Context|None = None self._streams: set[MsgStream] = set() - self.actor = current_actor() + self.actor: Actor = current_actor() @property def channel(self) -> Channel: @@ -147,6 +153,7 @@ class Portal: ) return self.chan + # TODO: factor this out into an `ActorNursery` wrapper async def _submit_for_result( self, ns: str, @@ -154,27 +161,18 @@ class Portal: **kwargs ) -> None: - assert self._expect_result is None, ( - "A pending main result has already been submitted" - ) + if self._expect_result_ctx is not None: + raise RuntimeError( + 'A pending main result has already been submitted' + ) - self._expect_result = await self.actor.start_remote_task( + self._expect_result_ctx = await self.actor.start_remote_task( self.channel, nsf=NamespacePath(f'{ns}:{func}'), kwargs=kwargs, portal=self, ) - async def _return_once( - self, - ctx: Context, - - ) -> Return: - - assert ctx._remote_func_type == 'asyncfunc' # single response - msg: Return = await ctx._recv_chan.receive() - return msg - async def result(self) -> Any: ''' Return the result(s) from the remote actor's "main" task. @@ -188,7 +186,7 @@ class Portal: raise exc # not expecting a "main" result - if self._expect_result is None: + if self._expect_result_ctx is None: log.warning( f"Portal for {self.channel.uid} not expecting a final" " result?\nresult() should only be called if subactor" @@ -196,17 +194,15 @@ class Portal: return NoResult # expecting a "main" result - assert self._expect_result + assert self._expect_result_ctx - if self._result_msg is None: - self._result_msg = await self._return_once( - self._expect_result + if self._final_result is None: + self._final_result: Any = await self._expect_result_ctx._pld_rx.recv_pld( + ctx=self._expect_result_ctx, + expect_msg=Return, ) - return _unwrap_msg( - self._result_msg, - self.channel, - ) + return self._final_result async def _cancel_streams(self): # terminate all locally running async generator @@ -337,11 +333,9 @@ class Portal: kwargs=kwargs, portal=self, ) - ctx._portal: Portal = self - msg: Return = await self._return_once(ctx) - return _unwrap_msg( - msg, - self.channel, + return await ctx._pld_rx.recv_pld( + ctx=ctx, + expect_msg=Return, ) async def run( @@ -391,10 +385,9 @@ class Portal: kwargs=kwargs, portal=self, ) - ctx._portal = self - return _unwrap_msg( - await self._return_once(ctx), - self.channel, + return await ctx._pld_rx.recv_pld( + ctx=ctx, + expect_msg=Return, ) @acm @@ -436,7 +429,7 @@ class Portal: # deliver receive only stream async with MsgStream( ctx=ctx, - rx_chan=ctx._recv_chan, + rx_chan=ctx._rx_chan, ) as rchan: self._streams.add(rchan) yield rchan diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 4d675716..7ab14ab9 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -819,8 +819,8 @@ class Actor: state.max_buffer_size = msg_buffer_size except KeyError: - log.runtime( - f'Creating NEW IPC ctx for\n' + log.debug( + f'Allocate new IPC ctx for\n' f'peer: {chan.uid}\n' f'cid: {cid}\n' ) @@ -908,7 +908,7 @@ class Actor: # this should be immediate and does not (yet) wait for the # remote child task to sync via `Context.started()`. with trio.fail_after(ack_timeout): - first_msg: msgtypes.StartAck = await ctx._recv_chan.receive() + first_msg: msgtypes.StartAck = await ctx._rx_chan.receive() try: functype: str = first_msg.functype except AttributeError: diff --git a/tractor/_streaming.py b/tractor/_streaming.py index 16e32cea..764b7c1e 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -35,7 +35,7 @@ import warnings import trio from ._exceptions import ( - _raise_from_no_key_in_msg, + # _raise_from_no_key_in_msg, ContextCancelled, ) from .log import get_logger @@ -44,8 +44,9 @@ from .trionics import ( BroadcastReceiver, ) from tractor.msg import ( - Return, - Stop, + # Return, + # Stop, + MsgType, Yield, ) @@ -94,24 +95,23 @@ class MsgStream(trio.abc.Channel): self._eoc: bool|trio.EndOfChannel = False self._closed: bool|trio.ClosedResourceError = False + # TODO: could we make this a direct method bind to `PldRx`? + # -> receive_nowait = PldRx.recv_pld + # |_ means latter would have to accept `MsgStream`-as-`self`? + # => should be fine as long as, + # -[ ] both define `._rx_chan` + # -[ ] .ctx is bound into `PldRx` using a `@cm`? + # # delegate directly to underlying mem channel def receive_nowait( self, - allow_msgs: list[str] = Yield, + expect_msg: MsgType = Yield, ): - msg: Yield|Stop = self._rx_chan.receive_nowait() - # TODO: replace msg equiv of this or does the `.pld` - # interface read already satisfy it? I think so, yes? - try: - return msg.pld - except AttributeError as attrerr: - _raise_from_no_key_in_msg( - ctx=self._ctx, - msg=msg, - src_err=attrerr, - log=log, - stream=self, - ) + ctx: Context = self._ctx + return ctx._pld_rx.recv_pld_nowait( + ctx=ctx, + expect_msg=expect_msg, + ) async def receive( self, @@ -146,24 +146,9 @@ class MsgStream(trio.abc.Channel): src_err: Exception|None = None # orig tb try: - try: - msg: Yield = await self._rx_chan.receive() - return msg.pld - # TODO: implement with match: instead? - except AttributeError as attrerr: - # src_err = kerr - src_err = attrerr - - # NOTE: may raise any of the below error types - # includg EoC when a 'stop' msg is found. - _raise_from_no_key_in_msg( - ctx=self._ctx, - msg=msg, - src_err=attrerr, - log=log, - stream=self, - ) + ctx: Context = self._ctx + return await ctx._pld_rx.recv_pld(ctx=ctx) # XXX: the stream terminates on either of: # - via `self._rx_chan.receive()` raising after manual closure @@ -228,7 +213,7 @@ class MsgStream(trio.abc.Channel): # probably want to instead raise the remote error # over the end-of-stream connection error since likely # the remote error was the source cause? - ctx: Context = self._ctx + # ctx: Context = self._ctx ctx.maybe_raise( raise_ctxc_from_self_call=True, ) @@ -292,7 +277,8 @@ class MsgStream(trio.abc.Channel): while not drained: try: maybe_final_msg = self.receive_nowait( - allow_msgs=[Yield, Return], + # allow_msgs=[Yield, Return], + expect_msg=Yield, ) if maybe_final_msg: log.debug( @@ -472,6 +458,9 @@ class MsgStream(trio.abc.Channel): self, # use memory channel size by default self._rx_chan._state.max_buffer_size, # type: ignore + + # TODO: can remove this kwarg right since + # by default behaviour is to do this anyway? receive_afunc=self.receive, ) @@ -517,19 +506,11 @@ class MsgStream(trio.abc.Channel): raise self._closed try: - # await self._ctx.chan.send( - # payload={ - # 'yield': data, - # 'cid': self._ctx.cid, - # }, - # # hide_tb=hide_tb, - # ) await self._ctx.chan.send( payload=Yield( cid=self._ctx.cid, pld=data, ), - # hide_tb=hide_tb, ) except ( trio.ClosedResourceError, @@ -562,7 +543,7 @@ def stream(func: Callable) -> Callable: ''' # TODO: apply whatever solution ``mypy`` ends up picking for this: # https://github.com/python/mypy/issues/2087#issuecomment-769266912 - func._tractor_stream_function = True # type: ignore + func._tractor_stream_function: bool = True # type: ignore sig = inspect.signature(func) params = sig.parameters diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py new file mode 100644 index 00000000..e78b79a4 --- /dev/null +++ b/tractor/msg/_ops.py @@ -0,0 +1,563 @@ +# tractor: structured concurrent "actors". +# Copyright 2018-eternity Tyler Goodlet. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +Near-application abstractions for `MsgType.pld: PayloadT|Raw` +delivery, filtering and type checking as well as generic +operational helpers for processing transaction flows. + +''' +from __future__ import annotations +from contextlib import ( + # asynccontextmanager as acm, + contextmanager as cm, +) +from pprint import pformat +from typing import ( + Any, + Type, + TYPE_CHECKING, + # Union, +) +# ------ - ------ +from msgspec import ( + msgpack, + Raw, + Struct, + ValidationError, +) +import trio +# ------ - ------ +from tractor.log import get_logger +from tractor._exceptions import ( + MessagingError, + InternalError, + _raise_from_unexpected_msg, + MsgTypeError, + _mk_msg_type_err, + pack_from_raise, +) +from ._codec import ( + mk_dec, + MsgDec, +) +from .types import ( + CancelAck, + Error, + MsgType, + PayloadT, + Return, + Started, + Stop, + Yield, + # pretty_struct, +) + + +if TYPE_CHECKING: + from tractor._context import Context + from tractor._streaming import MsgStream + + +log = get_logger(__name__) + + +class PldRx(Struct): + ''' + A "msg payload receiver". + + The pairing of a "feeder" `trio.abc.ReceiveChannel` and an + interchange-specific (eg. msgpack) payload field decoder. The + validation/type-filtering rules are runtime mutable and allow + type constraining the set of `MsgType.pld: Raw|PayloadT` + values at runtime, per IPC task-context. + + This abstraction, being just below "user application code", + allows for the equivalent of our `MsgCodec` (used for + typer-filtering IPC dialog protocol msgs against a msg-spec) + but with granular control around payload delivery (i.e. the + data-values user code actually sees and uses (the blobs that + are "shuttled" by the wrapping dialog prot) such that invalid + `.pld: Raw` can be decoded and handled by IPC-primitive user + code (i.e. that operates on `Context` and `Msgstream` APIs) + without knowledge of the lower level `Channel`/`MsgTransport` + primitives nor the `MsgCodec` in use. Further, lazily decoding + payload blobs allows for topical (and maybe intentionally + "partial") encryption of msg field subsets. + + ''' + # TODO: better to bind it here? + # _rx_mc: trio.MemoryReceiveChannel + _msgdec: MsgDec = mk_dec(spec=Any) + + _ipc: Context|MsgStream|None = None + + @cm + def apply_to_ipc( + self, + ipc_prim: Context|MsgStream, + + ) -> PldRx: + ''' + Apply this payload receiver to an IPC primitive type, one + of `Context` or `MsgStream`. + + ''' + self._ipc = ipc_prim + try: + yield self + finally: + self._ipc = None + + @property + def dec(self) -> msgpack.Decoder: + return self._msgdec.dec + + def recv_pld_nowait( + self, + # TODO: make this `MsgStream` compat as well, see above^ + # ipc_prim: Context|MsgStream, + ctx: Context, + + ipc_msg: MsgType|None = None, + expect_msg: Type[MsgType]|None = None, + + **kwargs, + + ) -> Any|Raw: + + msg: MsgType = ( + ipc_msg + or + + # sync-rx msg from underlying IPC feeder (mem-)chan + ctx._rx_chan.receive_nowait() + ) + return self.dec_msg( + msg, + ctx=ctx, + expect_msg=expect_msg, + ) + + async def recv_pld( + self, + ctx: Context, + ipc_msg: MsgType|None = None, + expect_msg: Type[MsgType]|None = None, + + **kwargs + + ) -> Any|Raw: + ''' + Receive a `MsgType`, then decode and return its `.pld` field. + + ''' + msg: MsgType = ( + ipc_msg + or + + # async-rx msg from underlying IPC feeder (mem-)chan + await ctx._rx_chan.receive() + ) + return self.dec_msg( + msg, + ctx=ctx, + expect_msg=expect_msg, + ) + + def dec_msg( + self, + msg: MsgType, + ctx: Context, + expect_msg: Type[MsgType]|None = None, + + ) -> PayloadT|Raw: + ''' + Decode a msg's payload field: `MsgType.pld: PayloadT|Raw` and + return the value or raise an appropriate error. + + ''' + match msg: + # payload-data shuttle msg; deliver the `.pld` value + # directly to IPC (primitive) client-consumer code. + case ( + Started(pld=pld) # sync phase + |Yield(pld=pld) # streaming phase + |Return(pld=pld) # termination phase + ): + try: + pld: PayloadT = self._msgdec.decode(pld) + log.runtime( + 'Decode msg payload\n\n' + f'{msg}\n\n' + f'{pld}\n' + ) + return pld + + # XXX pld-type failure + except ValidationError as src_err: + msgterr: MsgTypeError = _mk_msg_type_err( + msg=msg, + codec=self._dec, + src_validation_error=src_err, + ) + msg: Error = pack_from_raise( + local_err=msgterr, + cid=msg.cid, + src_uid=ctx.chan.uid, + ) + + # XXX some other decoder specific failure? + # except TypeError as src_error: + # from .devx import mk_pdb + # mk_pdb().set_trace() + # raise src_error + + # a runtime-internal RPC endpoint response. + # always passthrough since (internal) runtime + # responses are generally never exposed to consumer + # code. + case CancelAck( + pld=bool(cancelled) + ): + return cancelled + + case Error(): + src_err = MessagingError( + 'IPC dialog termination by msg' + ) + + case _: + src_err = InternalError( + 'Unknown IPC msg ??\n\n' + f'{msg}\n' + ) + + # fallthrough and raise from `src_err` + _raise_from_unexpected_msg( + ctx=ctx, + msg=msg, + src_err=src_err, + log=log, + expect_msg=expect_msg, + hide_tb=False, + ) + + async def recv_msg_w_pld( + self, + ipc: Context|MsgStream, + + ) -> tuple[MsgType, PayloadT]: + ''' + Retrieve the next avail IPC msg, decode it's payload, and return + the pair of refs. + + ''' + msg: MsgType = await ipc._rx_chan.receive() + + # TODO: is there some way we can inject the decoded + # payload into an existing output buffer for the original + # msg instance? + pld: PayloadT = self.dec_msg( + msg, + ctx=ipc, + ) + return msg, pld + + +async def drain_to_final_msg( + ctx: Context, + + hide_tb: bool = True, + msg_limit: int = 6, + +) -> tuple[ + Return|None, + list[MsgType] +]: + ''' + Drain IPC msgs delivered to the underlying IPC primitive's + rx-mem-chan (eg. `Context._rx_chan`) from the runtime in + search for a final result or error. + + The motivation here is to ideally capture errors during ctxc + conditions where a canc-request/or local error is sent but the + local task also excepts and enters the + `Portal.open_context().__aexit__()` block wherein we prefer to + capture and raise any remote error or ctxc-ack as part of the + `ctx.result()` cleanup and teardown sequence. + + ''' + __tracebackhide__: bool = hide_tb + raise_overrun: bool = not ctx._allow_overruns + + # wait for a final context result by collecting (but + # basically ignoring) any bi-dir-stream msgs still in transit + # from the far end. + pre_result_drained: list[MsgType] = [] + return_msg: Return|None = None + while not ( + ctx.maybe_error + and not ctx._final_result_is_set() + ): + try: + # TODO: can remove? + # await trio.lowlevel.checkpoint() + + # NOTE: this REPL usage actually works here dawg! Bo + # from .devx._debug import pause + # await pause() + + # TODO: bad idea? + # -[ ] wrap final outcome channel wait in a scope so + # it can be cancelled out of band if needed? + # + # with trio.CancelScope() as res_cs: + # ctx._res_scope = res_cs + # msg: dict = await ctx._rx_chan.receive() + # if res_cs.cancelled_caught: + + # TODO: ensure there's no more hangs, debugging the + # runtime pretty preaase! + # from .devx._debug import pause + # await pause() + + # TODO: can remove this finally? + # we have no more need for the sync draining right + # since we're can kinda guarantee the async + # `.receive()` below will never block yah? + # + # if ( + # ctx._cancel_called and ( + # ctx.cancel_acked + # # or ctx.chan._cancel_called + # ) + # # or not ctx._final_result_is_set() + # # ctx.outcome is not + # # or ctx.chan._closed + # ): + # try: + # msg: dict = await ctx._rx_chan.receive_nowait()() + # except trio.WouldBlock: + # log.warning( + # 'When draining already `.cancel_called` ctx!\n' + # 'No final msg arrived..\n' + # ) + # break + # else: + # msg: dict = await ctx._rx_chan.receive() + + # TODO: don't need it right jefe? + # with trio.move_on_after(1) as cs: + # if cs.cancelled_caught: + # from .devx._debug import pause + # await pause() + + # pray to the `trio` gawds that we're corrent with this + # msg: dict = await ctx._rx_chan.receive() + msg, pld = await ctx._pld_rx.recv_msg_w_pld(ipc=ctx) + + # NOTE: we get here if the far end was + # `ContextCancelled` in 2 cases: + # 1. we requested the cancellation and thus + # SHOULD NOT raise that far end error, + # 2. WE DID NOT REQUEST that cancel and thus + # SHOULD RAISE HERE! + except trio.Cancelled: + + # CASE 2: mask the local cancelled-error(s) + # only when we are sure the remote error is + # the source cause of this local task's + # cancellation. + ctx.maybe_raise() + + # CASE 1: we DID request the cancel we simply + # continue to bubble up as normal. + raise + + match msg: + + # final result arrived! + case Return( + # cid=cid, + # pld=res, + ): + # ctx._result: Any = res + ctx._result: Any = pld + log.runtime( + 'Context delivered final draining msg:\n' + f'{pformat(msg)}' + ) + # XXX: only close the rx mem chan AFTER + # a final result is retreived. + # if ctx._rx_chan: + # await ctx._rx_chan.aclose() + # TODO: ^ we don't need it right? + return_msg = msg + break + + # far end task is still streaming to us so discard + # and report depending on local ctx state. + case Yield(): + pre_result_drained.append(msg) + if ( + (ctx._stream.closed + and (reason := 'stream was already closed') + ) + or (ctx.cancel_acked + and (reason := 'ctx cancelled other side') + ) + or (ctx._cancel_called + and (reason := 'ctx called `.cancel()`') + ) + or (len(pre_result_drained) > msg_limit + and (reason := f'"yield" limit={msg_limit}') + ) + ): + log.cancel( + 'Cancelling `MsgStream` drain since ' + f'{reason}\n\n' + f'<= {ctx.chan.uid}\n' + f' |_{ctx._nsf}()\n\n' + f'=> {ctx._task}\n' + f' |_{ctx._stream}\n\n' + + f'{pformat(msg)}\n' + ) + return ( + return_msg, + pre_result_drained, + ) + + # drain up to the `msg_limit` hoping to get + # a final result or error/ctxc. + else: + log.warning( + 'Ignoring "yield" msg during `ctx.result()` drain..\n' + f'<= {ctx.chan.uid}\n' + f' |_{ctx._nsf}()\n\n' + f'=> {ctx._task}\n' + f' |_{ctx._stream}\n\n' + + f'{pformat(msg)}\n' + ) + continue + + # stream terminated, but no result yet.. + # + # TODO: work out edge cases here where + # a stream is open but the task also calls + # this? + # -[ ] should be a runtime error if a stream is open right? + # Stop() + case Stop(): + pre_result_drained.append(msg) + log.cancel( + 'Remote stream terminated due to "stop" msg:\n\n' + f'{pformat(msg)}\n' + ) + continue + + # remote error msg, likely already handled inside + # `Context._deliver_msg()` + case Error(): + # TODO: can we replace this with `ctx.maybe_raise()`? + # -[ ] would this be handier for this case maybe? + # async with maybe_raise_on_exit() as raises: + # if raises: + # log.error('some msg about raising..') + # + re: Exception|None = ctx._remote_error + if re: + assert msg is ctx._cancel_msg + # NOTE: this solved a super duper edge case XD + # this was THE super duper edge case of: + # - local task opens a remote task, + # - requests remote cancellation of far end + # ctx/tasks, + # - needs to wait for the cancel ack msg + # (ctxc) or some result in the race case + # where the other side's task returns + # before the cancel request msg is ever + # rxed and processed, + # - here this surrounding drain loop (which + # iterates all ipc msgs until the ack or + # an early result arrives) was NOT exiting + # since we are the edge case: local task + # does not re-raise any ctxc it receives + # IFF **it** was the cancellation + # requester.. + # + # XXX will raise if necessary but ow break + # from loop presuming any supressed error + # (ctxc) should terminate the context! + ctx._maybe_raise_remote_err( + re, + # NOTE: obvi we don't care if we + # overran the far end if we're already + # waiting on a final result (msg). + # raise_overrun_from_self=False, + raise_overrun_from_self=raise_overrun, + ) + + break # OOOOOF, yeah obvi we need this.. + + # XXX we should never really get here + # right! since `._deliver_msg()` should + # always have detected an {'error': ..} + # msg and already called this right!?! + # elif error := unpack_error( + # msg=msg, + # chan=ctx._portal.channel, + # hide_tb=False, + # ): + # log.critical('SHOULD NEVER GET HERE!?') + # assert msg is ctx._cancel_msg + # assert error.msgdata == ctx._remote_error.msgdata + # assert error.ipc_msg == ctx._remote_error.ipc_msg + # from .devx._debug import pause + # await pause() + # ctx._maybe_cancel_and_set_remote_error(error) + # ctx._maybe_raise_remote_err(error) + + else: + # bubble the original src key error + raise + + # XXX should pretty much never get here unless someone + # overrides the default `MsgType` spec. + case _: + pre_result_drained.append(msg) + # It's definitely an internal error if any other + # msg type without a`'cid'` field arrives here! + if not msg.cid: + raise InternalError( + 'Unexpected cid-missing msg?\n\n' + f'{msg}\n' + ) + + raise RuntimeError('Unknown msg type: {msg}') + + else: + log.cancel( + 'Skipping `MsgStream` drain since final outcome is set\n\n' + f'{ctx.outcome}\n' + ) + + return ( + return_msg, + pre_result_drained, + ) -- 2.34.1 From 74d6ffabf268a08be0c80289c898d375c14f5974 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 24 Apr 2024 12:31:05 -0400 Subject: [PATCH 068/190] Use `Context._stream` in `_raise_from_unexpected_msg()` Instead of expecting it to be passed in (as it was prior), when determining if a `Stop` msg is a valid end-of-channel signal use the `ctx._stream: MsgStream|None` attr which **must** be set by any stream opening API; either of: - `Context.open_stream()` - `Portal.open_stream_from()` Adjust the case block logic to match with fallthrough from any EoC to a closed error if necessary. Change the `_type: str` to match the failing IPC-prim name in the tail case we raise a `MessagingError`. Other: - move `.sender: tuple` uid attr up to `RemoteActorError` since `Error` optionally defines it as a field and for boxed `StreamOverrun`s (an ignore case we check for in the runtime during cancellation) we want it readable from the boxing rae. - drop still unused `InternalActorError`. --- tractor/_exceptions.py | 107 +++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 58 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index b2ba6e84..8d9274fe 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -532,7 +532,8 @@ class RemoteActorError(Exception): self, ) -> BaseException: ''' - Unpack the inner-most source error from it's original IPC msg data. + Unpack the inner-most source error from it's original IPC + msg data. We attempt to reconstruct (as best as we can) the original `Exception` from as it would have been raised in the @@ -570,6 +571,14 @@ class RemoteActorError(Exception): # # boxed_type=get_type_ref(.. # raise NotImplementedError + @property + def sender(self) -> tuple[str, str]|None: + if ( + (msg := self._ipc_msg) + and (value := msg.sender) + ): + return tuple(value) + class ContextCancelled(RemoteActorError): ''' @@ -734,20 +743,6 @@ class StreamOverrun( handled by app code using `MsgStream.send()/.receive()`. ''' - @property - def sender(self) -> tuple[str, str] | None: - value = self._ipc_msg.sender - if value: - return tuple(value) - - -# class InternalActorError(RemoteActorError): -# ''' -# Boxed (Remote) internal `tractor` error indicating failure of some -# primitive, machinery state or lowlevel task that should never -# occur. - -# ''' class TransportClosed(trio.ClosedResourceError): @@ -945,7 +940,6 @@ def _raise_from_unexpected_msg( log: StackLevelAdapter, # caller specific `log` obj expect_msg: str = Yield, - stream: MsgStream | None = None, # allow "deeper" tbs when debugging B^o hide_tb: bool = True, @@ -987,6 +981,8 @@ def _raise_from_unexpected_msg( ) from src_err # TODO: test that shows stream raising an expected error!!! + stream: MsgStream|None + _type: str = 'Context' # raise the error message in a boxed exception type! if isinstance(msg, Error): @@ -1003,55 +999,50 @@ def _raise_from_unexpected_msg( # TODO: does it make more sense to pack # the stream._eoc outside this in the calleer always? # case Stop(): - elif ( - isinstance(msg, Stop) - or ( - stream - and stream._eoc - ) - ): - log.debug( - f'Context[{cid}] stream was stopped by remote side\n' - f'cid: {cid}\n' - ) + elif stream := ctx._stream: + _type: str = 'MsgStream' - # TODO: if the a local task is already blocking on - # a `Context.result()` and thus a `.receive()` on the - # rx-chan, we close the chan and set state ensuring that - # an eoc is raised! + if ( + stream._eoc + or + isinstance(msg, Stop) + ): + log.debug( + f'Context[{cid}] stream was stopped by remote side\n' + f'cid: {cid}\n' + ) - # XXX: this causes ``ReceiveChannel.__anext__()`` to - # raise a ``StopAsyncIteration`` **and** in our catch - # block below it will trigger ``.aclose()``. - eoc = trio.EndOfChannel( - f'Context stream ended due to msg:\n\n' - f'{pformat(msg)}\n' - ) - # XXX: important to set so that a new `.receive()` - # call (likely by another task using a broadcast receiver) - # doesn't accidentally pull the `return` message - # value out of the underlying feed mem chan which is - # destined for the `Context.result()` call during ctx-exit! - stream._eoc: Exception = eoc + # TODO: if the a local task is already blocking on + # a `Context.result()` and thus a `.receive()` on the + # rx-chan, we close the chan and set state ensuring that + # an eoc is raised! - # in case there already is some underlying remote error - # that arrived which is probably the source of this stream - # closure - ctx.maybe_raise() - raise eoc from src_err + # XXX: this causes ``ReceiveChannel.__anext__()`` to + # raise a ``StopAsyncIteration`` **and** in our catch + # block below it will trigger ``.aclose()``. + eoc = trio.EndOfChannel( + f'Context stream ended due to msg:\n\n' + f'{pformat(msg)}\n' + ) + # XXX: important to set so that a new `.receive()` + # call (likely by another task using a broadcast receiver) + # doesn't accidentally pull the `return` message + # value out of the underlying feed mem chan which is + # destined for the `Context.result()` call during ctx-exit! + stream._eoc: Exception = eoc - if ( - stream - and stream._closed - ): - # TODO: our own error subtype? - raise trio.ClosedResourceError( - 'This stream was closed' - ) + # in case there already is some underlying remote error + # that arrived which is probably the source of this stream + # closure + ctx.maybe_raise() + raise eoc from src_err + + if stream._closed: + # TODO: our own error subtype? + raise trio.ClosedResourceError('This stream was closed') # always re-raise the source error if no translation error case # is activated above. - _type: str = 'Stream' if stream else 'Context' raise MessagingError( f"{_type} was expecting a {expect_msg} message" " BUT received a non-error msg:\n" -- 2.34.1 From f0155b4525e8ea31df73463ee8ef3dbce0e56ab5 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 24 Apr 2024 12:43:08 -0400 Subject: [PATCH 069/190] Set `Context._stream` in `Portal.open_stream_from()`.. --- tractor/_portal.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tractor/_portal.py b/tractor/_portal.py index 97268972..f3928657 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -420,7 +420,6 @@ class Portal: kwargs=kwargs, portal=self, ) - ctx._portal = self # ensure receive-only stream entrypoint assert ctx._remote_func_type == 'asyncgen' @@ -430,9 +429,10 @@ class Portal: async with MsgStream( ctx=ctx, rx_chan=ctx._rx_chan, - ) as rchan: - self._streams.add(rchan) - yield rchan + ) as stream: + self._streams.add(stream) + ctx._stream = stream + yield stream finally: @@ -454,7 +454,7 @@ class Portal: # XXX: should this always be done? # await recv_chan.aclose() - self._streams.remove(rchan) + self._streams.remove(stream) # NOTE: impl is found in `._context`` mod to make # reading/groking the details simpler code-org-wise. This -- 2.34.1 From eb5db36013c1492fa481a37acbc2d830e6c9dd4e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 24 Apr 2024 13:07:05 -0400 Subject: [PATCH 070/190] Try out `msgspec` encode-buffer optimization As per the reco: https://jcristharif.com/msgspec/perf-tips.html#reusing-an-output-buffe BUT, seems to cause this error in `pikerd`.. `BufferError: Existing exports of data: object cannot be re-sized` Soo no idea? Maybe there's a tweak needed that we can glean from tests/examples in the `msgspec` repo? Disabling for now. --- tractor/msg/__init__.py | 5 +++++ tractor/msg/_codec.py | 17 ++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index d968f6cf..13739cdb 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -37,6 +37,11 @@ from ._codec import ( MsgDec as MsgDec, current_codec as current_codec, ) +# currently can't bc circular with `._context` +# from ._ops import ( +# PldRx as PldRx, +# _drain_to_final_msg as _drain_to_final_msg, +# ) from .types import ( Msg as Msg, diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 104f7d99..e3540c3d 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -280,17 +280,32 @@ class MsgCodec(Struct): def enc(self) -> msgpack.Encoder: return self._enc + # TODO: reusing encode buffer for perf? + # https://jcristharif.com/msgspec/perf-tips.html#reusing-an-output-buffer + _buf: bytearray = bytearray() + def encode( self, py_obj: Any, + use_buf: bool = False, + # ^-XXX-^ uhh why am i getting this? + # |_BufferError: Existing exports of data: object cannot be re-sized + ) -> bytes: ''' Encode input python objects to `msgpack` bytes for transfer on a tranport protocol connection. + When `use_buf == True` use the output buffer optimization: + https://jcristharif.com/msgspec/perf-tips.html#reusing-an-output-buffer + ''' - return self._enc.encode(py_obj) + if use_buf: + self._enc.encode_into(py_obj, self._buf) + return self._buf + else: + return self._enc.encode(py_obj) @property def dec(self) -> msgpack.Decoder: -- 2.34.1 From 506575e4ca0719b66c072f9b85f047a358f2e50d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 25 Apr 2024 12:33:10 -0400 Subject: [PATCH 071/190] Another `._rpc` mod passthrough - tweaking logging to include more `MsgType` dumps on IPC faults. - removing some commented cruft. - comment formatting / cleanups / add-ons. - more type annots. - fill out some TODO content. --- tractor/_rpc.py | 163 ++++++++++++++++++++++++------------------------ 1 file changed, 80 insertions(+), 83 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 5970a101..17d37564 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -177,12 +177,11 @@ async def _invoke_non_context( # way: using the linked IPC context machinery. failed_resp: bool = False try: - await chan.send( - StartAck( - cid=cid, - functype='asyncfunc', - ) + ack = StartAck( + cid=cid, + functype='asyncfunc', ) + await chan.send(ack) except ( trio.ClosedResourceError, trio.BrokenResourceError, @@ -190,12 +189,11 @@ async def _invoke_non_context( ) as ipc_err: failed_resp = True if is_rpc: - raise + raise ipc_err else: - # TODO: should this be an `.exception()` call? - log.warning( - f'Failed to respond to non-rpc request: {func}\n' - f'{ipc_err}' + log.exception( + f'Failed to respond to runtime RPC request for\n\n' + f'{ack}\n' ) with cancel_scope as cs: @@ -216,20 +214,19 @@ async def _invoke_non_context( and chan.connected() ): try: - await chan.send( - return_msg( - cid=cid, - pld=result, - ) + ret_msg = return_msg( + cid=cid, + pld=result, ) + await chan.send(ret_msg) except ( BrokenPipeError, trio.BrokenResourceError, ): log.warning( - 'Failed to return result:\n' - f'{func}@{actor.uid}\n' - f'remote chan: {chan.uid}' + 'Failed to send RPC result?\n' + f'|_{func}@{actor.uid}() -> {ret_msg}\n\n' + f'x=> peer: {chan.uid}\n' ) @acm @@ -246,7 +243,7 @@ async def _errors_relayed_via_ipc( ] = trio.TASK_STATUS_IGNORED, ) -> None: - __tracebackhide__: bool = hide_tb # TODO: use hide_tb here? + __tracebackhide__: bool = hide_tb try: yield # run RPC invoke body @@ -258,23 +255,19 @@ async def _errors_relayed_via_ipc( KeyboardInterrupt, ) as err: - # always hide this frame from debug REPL if the crash - # originated from an rpc task and we DID NOT fail due to - # an IPC transport error! + # NOTE: always hide this frame from debug REPL call stack + # if the crash originated from an RPC task and we DID NOT + # fail due to an IPC transport error! if ( is_rpc - and chan.connected() + and + chan.connected() ): __tracebackhide__: bool = hide_tb + # TODO: maybe we'll want different "levels" of debugging + # eventualy such as ('app', 'supervisory', 'runtime') ? if not is_multi_cancelled(err): - - # TODO: maybe we'll want different "levels" of debugging - # eventualy such as ('app', 'supervisory', 'runtime') ? - - # if not isinstance(err, trio.ClosedResourceError) and ( - # if not is_multi_cancelled(err) and ( - entered_debug: bool = False if ( ( @@ -306,19 +299,18 @@ async def _errors_relayed_via_ipc( # strange bug in our transport layer itself? Going # to keep this open ended for now. entered_debug = await _debug._maybe_enter_pm(err) - if not entered_debug: log.exception( 'RPC task crashed\n' f'|_{ctx}' ) - # always (try to) ship RPC errors back to caller + # ALWAYS try to ship RPC errors back to parent/caller task if is_rpc: - # + # TODO: tests for this scenario: # - RPC caller closes connection before getting a response - # should **not** crash this actor.. + # should **not** crash this actor.. await try_ship_error_to_remote( chan, err, @@ -327,33 +319,41 @@ async def _errors_relayed_via_ipc( hide_tb=hide_tb, ) - # error is probably from above coro running code *not from - # the target rpc invocation since a scope was never - # allocated around the coroutine await. + # if the ctx cs is NOT allocated, the error is likely from + # above `coro` invocation machinery NOT from inside the + # `coro` itself, i.e. err is NOT a user application error. if ctx._scope is None: # we don't ever raise directly here to allow the # msg-loop-scheduler to continue running for this # channel. task_status.started(err) - # always reraise KBIs so they propagate at the sys-process - # level. + # always reraise KBIs so they propagate at the sys-process level. if isinstance(err, KeyboardInterrupt): raise - - # RPC task bookeeping + # RPC task bookeeping. + # since RPC tasks are scheduled inside a flat + # `Actor._service_n`, we add "handles" to each such that + # they can be individually ccancelled. finally: try: - ctx, func, is_complete = actor._rpc_tasks.pop( + ctx: Context + func: Callable + is_complete: trio.Event + ( + ctx, + func, + is_complete, + ) = actor._rpc_tasks.pop( (chan, ctx.cid) ) is_complete.set() except KeyError: + # If we're cancelled before the task returns then the + # cancel scope will not have been inserted yet if is_rpc: - # If we're cancelled before the task returns then the - # cancel scope will not have been inserted yet log.warning( 'RPC task likely errored or cancelled before start?' f'|_{ctx._task}\n' @@ -368,7 +368,7 @@ async def _errors_relayed_via_ipc( finally: if not actor._rpc_tasks: - log.runtime("All RPC tasks have completed") + log.runtime('All RPC tasks have completed') actor._ongoing_rpc_tasks.set() @@ -410,19 +410,16 @@ async def _invoke( # TODO: possibly a specially formatted traceback # (not sure what typing is for this..)? - # tb = None + # tb: TracebackType = None cancel_scope = CancelScope() - # activated cancel scope ref - cs: CancelScope|None = None - + cs: CancelScope|None = None # ref when activated ctx = actor.get_context( chan=chan, cid=cid, nsf=NamespacePath.from_ref(func), - # TODO: if we wanted to get cray and support it? - # side='callee', + # NOTE: no portal passed bc this is the "child"-side # We shouldn't ever need to pass this through right? # it's up to the soon-to-be called rpc task to @@ -455,8 +452,8 @@ async def _invoke( kwargs['stream'] = ctx + # handle decorated ``@tractor.context`` async function elif getattr(func, '_tractor_context_function', False): - # handle decorated ``@tractor.context`` async function kwargs['ctx'] = ctx context = True @@ -470,7 +467,8 @@ async def _invoke( task_status=task_status, ): if not ( - inspect.isasyncgenfunction(func) or + inspect.isasyncgenfunction(func) + or inspect.iscoroutinefunction(func) ): raise TypeError(f'{func} must be an async function!') @@ -482,8 +480,7 @@ async def _invoke( except TypeError: raise - # TODO: implement all these cases in terms of the - # `Context` one! + # TODO: impl all these cases in terms of the `Context` one! if not context: await _invoke_non_context( actor, @@ -499,7 +496,7 @@ async def _invoke( return_msg, task_status, ) - # below is only for `@context` funcs + # XXX below fallthrough is ONLY for `@context` eps return # our most general case: a remote SC-transitive, @@ -576,9 +573,6 @@ async def _invoke( # itself calls `ctx._maybe_cancel_and_set_remote_error()` # which cancels the scope presuming the input error # is not a `.cancel_acked` pleaser. - # - currently a never-should-happen-fallthrough case - # inside ._context._drain_to_final_msg()`.. - # # TODO: remove this ^ right? if ctx._scope.cancelled_caught: our_uid: tuple = actor.uid @@ -594,9 +588,7 @@ async def _invoke( if cs.cancel_called: canceller: tuple = ctx.canceller - msg: str = ( - 'actor was cancelled by ' - ) + msg: str = 'actor was cancelled by ' # NOTE / TODO: if we end up having # ``Actor._cancel_task()`` call @@ -619,6 +611,8 @@ async def _invoke( else: msg += 'a remote peer' + # TODO: move this "div centering" into + # a helper for use elsewhere! div_chars: str = '------ - ------' div_offset: int = ( round(len(msg)/2)+1 @@ -698,11 +692,9 @@ async def _invoke( ctx: Context = actor._contexts.pop(( chan.uid, cid, - # ctx.side, )) merr: Exception|None = ctx.maybe_error - ( res_type_str, res_str, @@ -716,7 +708,7 @@ async def _invoke( ) log.runtime( f'IPC context terminated with a final {res_type_str}\n\n' - f'{ctx}\n' + f'{ctx}' ) @@ -802,13 +794,19 @@ async def process_messages( and `Actor.cancel()` process-wide-runtime-shutdown requests (as utilized inside `Portal.cancel_actor()` ). - ''' assert actor._service_n # state sanity # TODO: once `trio` get's an "obvious way" for req/resp we # should use it? - # https://github.com/python-trio/trio/issues/467 + # -[ ] existing GH https://github.com/python-trio/trio/issues/467 + # -[ ] for other transports (like QUIC) we can possibly just + # entirely avoid the feeder mem-chans since each msg will be + # delivered with a ctx-id already? + # + # |_ for ex, from `aioquic` which exposed "stream ids": + # - https://github.com/aiortc/aioquic/blob/main/src/aioquic/quic/connection.py#L1175 + # - https://github.com/aiortc/aioquic/blob/main/src/aioquic/quic/connection.py#L659 log.runtime( 'Entering RPC msg loop:\n' f'peer: {chan.uid}\n' @@ -846,7 +844,7 @@ async def process_messages( | Return(cid=cid) | CancelAck(cid=cid) - # `.cid` means RPC-ctx-task specific + # `.cid` indicates RPC-ctx-task scoped | Error(cid=cid) # recv-side `MsgType` decode violation @@ -1042,16 +1040,16 @@ async def process_messages( trio.Event(), ) - # runtime-scoped remote error (since no `.cid`) + # runtime-scoped remote (internal) error + # (^- bc no `Error.cid` -^) + # + # NOTE: this is the non-rpc error case, that + # is, an error NOT raised inside a call to + # `_invoke()` (i.e. no cid was provided in the + # msg - see above). Raise error inline and + # mark the channel as "globally errored" for + # all downstream consuming primitives. case Error(): - # NOTE: this is the non-rpc error case, - # that is, an error **not** raised inside - # a call to ``_invoke()`` (i.e. no cid was - # provided in the msg - see above). Push - # this error to all local channel - # consumers (normally portals) by marking - # the channel as errored - # assert chan.uid chan._exc: Exception = unpack_error( msg, chan=chan, @@ -1107,7 +1105,7 @@ async def process_messages( f'|_{chan.raddr}\n' ) - # transport **was** disconnected + # transport **WAS** disconnected return True except ( @@ -1146,12 +1144,11 @@ async def process_messages( finally: # msg debugging for when he machinery is brokey log.runtime( - 'Exiting IPC msg loop with\n' - f'peer: {chan.uid}\n' + 'Exiting IPC msg loop with final msg\n\n' + f'<= peer: {chan.uid}\n' f'|_{chan}\n\n' - 'final msg:\n' - f'{pformat(msg)}\n' + f'{pformat(msg)}\n\n' ) - # transport **was not** disconnected + # transport **WAS NOT** disconnected return False -- 2.34.1 From efb69f9bf9966d3581375db5c50df9d865e43a82 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 25 Apr 2024 12:36:14 -0400 Subject: [PATCH 072/190] Flip back `StartAck` timeout to `inf`.. --- tractor/_runtime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 7ab14ab9..c12365be 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -852,7 +852,7 @@ class Actor: msg_buffer_size: int|None = None, allow_overruns: bool = False, load_nsf: bool = False, - ack_timeout: float = 3, + ack_timeout: float = float('inf'), ) -> Context: ''' -- 2.34.1 From e17603402f0f6c59729acac9f3978c45f0d6293c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 25 Apr 2024 12:38:05 -0400 Subject: [PATCH 073/190] Add `Context.peer_side: str` property, mk static-meth private. --- tractor/_context.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index f0fc966e..762d8798 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -472,13 +472,17 @@ class Context: return 'parent' if self._portal else 'child' @staticmethod - def peer_side(side: str) -> str: + def _peer_side(side: str) -> str: match side: case 'child': return 'parent' case 'parent': return 'child' + @property + def peer_side(self) -> str: + return self._peer_side(self.side) + # TODO: remove stat! # -[ ] re-implement the `.experiemental._pubsub` stuff # with `MsgStream` and that should be last usage? @@ -512,9 +516,7 @@ class Context: equiv of a `StopIteration`. ''' - await self.chan.send( - Stop(cid=self.cid) - ) + await self.chan.send(Stop(cid=self.cid)) def _maybe_cancel_and_set_remote_error( self, @@ -593,7 +595,6 @@ class Context: # TODO: never do this right? # if self._remote_error: # return - peer_side: str = self.peer_side(self.side) # XXX: denote and set the remote side's error so that # after we cancel whatever task is the opener of this @@ -601,7 +602,7 @@ class Context: # appropriately. log.runtime( 'Setting remote error for ctx\n\n' - f'<= {peer_side!r}: {self.chan.uid}\n' + f'<= {self.peer_side!r}: {self.chan.uid}\n' f'=> {self.side!r}\n\n' f'{error}' ) @@ -623,9 +624,8 @@ class Context: elif isinstance(error, MsgTypeError): msgerr = True - peer_side: str = self.peer_side(self.side) log.error( - f'IPC dialog error due to msg-type caused by {peer_side!r} side\n\n' + f'IPC dialog error due to msg-type caused by {self.peer_side!r} side\n\n' f'{error}\n' f'{pformat(self)}\n' @@ -1070,12 +1070,12 @@ class Context: except trio.EndOfChannel as eoc: if ( eoc - and stream.closed + and + stream.closed ): # sanity, can remove? assert eoc is stream._eoc - # from .devx import pause - # await pause() + log.warning( 'Stream was terminated by EoC\n\n' # NOTE: won't show the error but @@ -1647,10 +1647,9 @@ class Context: side: str = self.side if side == 'child': assert not self._portal - peer_side: str = self.peer_side(side) flow_body: str = ( - f'<= peer {peer_side!r}: {from_uid}\n' + f'<= peer {self.peer_side!r}: {from_uid}\n' f' |_<{nsf}()>\n\n' f'=> {side!r}: {self._task}\n' @@ -1668,7 +1667,7 @@ class Context: log_meth = log.runtime log_meth( - f'Delivering IPC ctx error from {peer_side!r} to {side!r} task\n\n' + f'Delivering IPC ctx error from {self.peer_side!r} to {side!r} task\n\n' f'{flow_body}' @@ -2333,7 +2332,7 @@ async def open_context_from_portal( and ctx.cancel_acked ): log.cancel( - 'Context cancelled by caller task\n' + 'Context cancelled by {ctx.side!r}-side task\n' f'|_{ctx._task}\n\n' f'{repr(scope_err)}\n' @@ -2367,6 +2366,7 @@ async def open_context_from_portal( None, ) + def mk_context( chan: Channel, cid: str, -- 2.34.1 From 32eb2df5aa55c25e3c761723c516955ff7749f27 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 25 Apr 2024 16:19:39 -0400 Subject: [PATCH 074/190] Use `Context.[peer_]side` in ctxc messages --- tractor/_rpc.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 17d37564..595d1352 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -588,7 +588,7 @@ async def _invoke( if cs.cancel_called: canceller: tuple = ctx.canceller - msg: str = 'actor was cancelled by ' + explain: str = f'{ctx.side!r}-side task was cancelled by ' # NOTE / TODO: if we end up having # ``Actor._cancel_task()`` call @@ -598,24 +598,28 @@ async def _invoke( if ctx._cancel_called: # TODO: test for this!!!!! canceller: tuple = our_uid - msg += 'itself ' + explain += 'itself ' # if the channel which spawned the ctx is the # one that cancelled it then we report that, vs. # it being some other random actor that for ex. # some actor who calls `Portal.cancel_actor()` # and by side-effect cancels this ctx. + # + # TODO: determine if the ctx peer task was the + # exact task which cancelled, vs. some other + # task in the same actor. elif canceller == ctx.chan.uid: - msg += 'its caller' + explain += f'its {ctx.peer_side!r}-side peer' else: - msg += 'a remote peer' + explain += 'a remote peer' # TODO: move this "div centering" into # a helper for use elsewhere! div_chars: str = '------ - ------' div_offset: int = ( - round(len(msg)/2)+1 + round(len(explain)/2)+1 + round(len(div_chars)/2)+1 ) @@ -626,11 +630,12 @@ async def _invoke( + f'{div_chars}\n' ) - msg += ( + explain += ( div_str + f'<= canceller: {canceller}\n' - f'=> uid: {our_uid}\n' - f' |_{ctx._task}()' + f'=> cancellee: {our_uid}\n' + # TODO: better repr for ctx tasks.. + f' |_{ctx.side!r} {ctx._task}' # TODO: instead just show the # ctx.__str__() here? @@ -649,7 +654,7 @@ async def _invoke( # task, so relay this cancel signal to the # other side. ctxc = ContextCancelled( - message=msg, + message=explain, boxed_type=trio.Cancelled, canceller=canceller, ) -- 2.34.1 From d83e0eb665a63c78702ad489aea7e9d1728b276d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 25 Apr 2024 20:00:13 -0400 Subject: [PATCH 075/190] Mk `.msg.pretty_struct.Struct.pformat()` a mod func More along the lines of `msgspec.struct` and also far more useful internally for pprinting `MsgTypes`. Of course add method aliases. --- tractor/msg/pretty_struct.py | 110 +++++++++++++++++------------------ 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/tractor/msg/pretty_struct.py b/tractor/msg/pretty_struct.py index a67bbd26..f27fb89c 100644 --- a/tractor/msg/pretty_struct.py +++ b/tractor/msg/pretty_struct.py @@ -102,6 +102,59 @@ def iter_fields(struct: Struct) -> Iterator[ ) +def pformat( + struct: Struct, + field_indent: int = 2, + indent: int = 0, + +) -> str: + ''' + Recursion-safe `pprint.pformat()` style formatting of + a `msgspec.Struct` for sane reading by a human using a REPL. + + ''' + # global whitespace indent + ws: str = ' '*indent + + # field whitespace indent + field_ws: str = ' '*(field_indent + indent) + + # qtn: str = ws + struct.__class__.__qualname__ + qtn: str = struct.__class__.__qualname__ + + obj_str: str = '' # accumulator + fi: structs.FieldInfo + k: str + v: Any + for fi, k, v in iter_fields(struct): + + # TODO: how can we prefer `Literal['option1', 'option2, + # ..]` over .__name__ == `Literal` but still get only the + # latter for simple types like `str | int | None` etc..? + ft: type = fi.type + typ_name: str = getattr(ft, '__name__', str(ft)) + + # recurse to get sub-struct's `.pformat()` output Bo + if isinstance(v, Struct): + val_str: str = v.pformat( + indent=field_indent + indent, + field_indent=indent + field_indent, + ) + + else: # the `pprint` recursion-safe format: + # https://docs.python.org/3.11/library/pprint.html#pprint.saferepr + val_str: str = saferepr(v) + + # TODO: LOLOL use `textwrap.indent()` instead dawwwwwg! + obj_str += (field_ws + f'{k}: {typ_name} = {val_str},\n') + + return ( + f'{qtn}(\n' + f'{obj_str}' + f'{ws})' + ) + + class Struct( _Struct, @@ -140,65 +193,12 @@ class Struct( return sin_props - # TODO: make thisi a mod-func! - def pformat( - self, - field_indent: int = 2, - indent: int = 0, - - ) -> str: - ''' - Recursion-safe `pprint.pformat()` style formatting of - a `msgspec.Struct` for sane reading by a human using a REPL. - - ''' - # global whitespace indent - ws: str = ' '*indent - - # field whitespace indent - field_ws: str = ' '*(field_indent + indent) - - # qtn: str = ws + self.__class__.__qualname__ - qtn: str = self.__class__.__qualname__ - - obj_str: str = '' # accumulator - fi: structs.FieldInfo - k: str - v: Any - for fi, k, v in iter_fields(self): - - # TODO: how can we prefer `Literal['option1', 'option2, - # ..]` over .__name__ == `Literal` but still get only the - # latter for simple types like `str | int | None` etc..? - ft: type = fi.type - typ_name: str = getattr(ft, '__name__', str(ft)) - - # recurse to get sub-struct's `.pformat()` output Bo - if isinstance(v, Struct): - val_str: str = v.pformat( - indent=field_indent + indent, - field_indent=indent + field_indent, - ) - - else: # the `pprint` recursion-safe format: - # https://docs.python.org/3.11/library/pprint.html#pprint.saferepr - val_str: str = saferepr(v) - - # TODO: LOLOL use `textwrap.indent()` instead dawwwwwg! - obj_str += (field_ws + f'{k}: {typ_name} = {val_str},\n') - - return ( - f'{qtn}(\n' - f'{obj_str}' - f'{ws})' - ) - + pformat = pformat + # __str__ = __repr__ = pformat # TODO: use a pprint.PrettyPrinter instance around ONLY rendering # inside a known tty? # def __repr__(self) -> str: # ... - - # __str__ = __repr__ = pformat __repr__ = pformat def copy( -- 2.34.1 From 799416661efda49395eb2b4cda3974aea212bbf7 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 26 Apr 2024 12:45:10 -0400 Subject: [PATCH 076/190] Add more useful `MsgDec.__repr__()` Basically exact same as that for `MsgCodec` with the `.spec` displayed via a better (maybe multi-line) `.spec_str: str` generated from a common new set of helper mod funcs factored out msg-codec meths: - `mk_msgspec_table()` to gen a `MsgType` name -> msg table. - `pformat_msgspec()` to `str`-ify said table values nicely.q Also add a new `MsgCodec.msg_spec_str: str` prop which delegates to the above for the same. --- tractor/msg/_codec.py | 123 ++++++++++++++++++++++++++++-------------- 1 file changed, 82 insertions(+), 41 deletions(-) diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index e3540c3d..901c0da1 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -75,7 +75,7 @@ log = get_logger(__name__) # TODO: unify with `MsgCodec` by making `._dec` part this? class MsgDec(Struct): ''' - An IPC msg decoder. + An IPC msg (payload) decoder. Normally used to decode only a payload: `MsgType.pld: PayloadT` field before delivery to IPC consumer code. @@ -87,6 +87,31 @@ class MsgDec(Struct): def dec(self) -> msgpack.Decoder: return self._dec + def __repr__(self) -> str: + + speclines: str = self.spec_str + + # in multi-typed spec case we stick the list + # all on newlines after the |__pld_spec__:, + # OW it's prolly single type spec-value + # so just leave it on same line. + if '\n' in speclines: + speclines: str = '\n' + textwrap.indent( + speclines, + prefix=' '*3, + ) + + body: str = textwrap.indent( + f'|_dec_hook: {self.dec.dec_hook}\n' + f'|__pld_spec__: {speclines}\n', + prefix=' '*2, + ) + return ( + f'<{type(self).__name__}(\n' + f'{body}' + ')>' + ) + # struct type unions # https://jcristharif.com/msgspec/structs.html#tagged-unions # @@ -137,17 +162,7 @@ class MsgDec(Struct): # TODO: would get moved into `FieldSpec.__str__()` right? @property def spec_str(self) -> str: - - # TODO: could also use match: instead? - spec: Union[Type]|Type = self.spec - - # `typing.Union` case - if getattr(spec, '__args__', False): - return str(spec) - - # just a single type - else: - return spec.__name__ + return pformat_msgspec(codec=self) pld_spec_str = spec_str @@ -168,9 +183,57 @@ def mk_dec( ) -> MsgDec: - return msgpack.Decoder( - type=spec, # like `Msg[Any]` - dec_hook=dec_hook, + return MsgDec( + _dec=msgpack.Decoder( + type=spec, # like `Msg[Any]` + dec_hook=dec_hook, + ) + ) + + +def mk_msgspec_table( + dec: msgpack.Decoder, + msg: MsgType|None = None, + +) -> dict[str, MsgType]|str: + ''' + Fill out a `dict` of `MsgType`s keyed by name + for a given input `msgspec.msgpack.Decoder` + as defined by its `.type: Union[Type]` setting. + + If `msg` is provided, only deliver a `dict` with a single + entry for that type. + + ''' + msgspec: Union[Type]|Type = dec.type + + if not (msgtypes := getattr(msgspec, '__args__', False)): + msgtypes = [msgspec] + + msgt_table: dict[str, MsgType] = { + msgt: str(msgt) + for msgt in msgtypes + } + if msg: + msgt: MsgType = type(msg) + str_repr: str = msgt_table[msgt] + return {msgt: str_repr} + + return msgt_table + + +def pformat_msgspec( + codec: MsgCodec|MsgDec, + msg: MsgType|None = None, + join_char: str = '\n', + +) -> str: + dec: msgpack.Decoder = getattr(codec, 'dec', codec) + return join_char.join( + mk_msgspec_table( + dec=dec, + msg=msg, + ).values() ) # TODO: overall IPC msg-spec features (i.e. in this mod)! @@ -200,7 +263,7 @@ class MsgCodec(Struct): def __repr__(self) -> str: speclines: str = textwrap.indent( - self.pformat_msg_spec(), + pformat_msgspec(codec=self), prefix=' '*3, ) body: str = textwrap.indent( @@ -244,33 +307,11 @@ class MsgCodec(Struct): # NOTE: defined and applied inside `mk_codec()` return self._dec.type - def msg_spec_items( - self, - msg: MsgType|None = None, - - ) -> dict[str, MsgType]|str: - - msgt_table: dict[str, MsgType] = { - msgt: str(msgt) - for msgt in self.msg_spec.__args__ - } - if msg: - msgt: MsgType = type(msg) - str_repr: str = msgt_table[msgt] - return {msgt: str_repr} - - return msgt_table - # TODO: some way to make `pretty_struct.Struct` use this # wrapped field over the `.msg_spec` one? - def pformat_msg_spec( - self, - msg: MsgType|None = None, - join_char: str = '\n', - ) -> str: - return join_char.join( - self.msg_spec_items(msg=msg).values() - ) + @property + def msg_spec_str(self) -> str: + return pformat_msgspec(self.msg_spec) lib: ModuleType = msgspec -- 2.34.1 From f32a9657c01d448b73067777a0188541f70cc184 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 26 Apr 2024 13:03:07 -0400 Subject: [PATCH 077/190] Use new `Msg[Co]Dec` repr meths in `._exceptions` Particularly when logging around `MsgTypeError`s. Other: - make `_raise_from_unexpected_msg()`'s `expect_msg` a non-default value arg, must always be passed by caller. - drop `'canceller'` from `_body_fields` ow it shows up twice for ctxc. - use `.msg.pretty_struct.pformat()`. - parameterize `RemoteActorError.reprol()` (repr-one-line method) to show `RemoteActorError[]( ..` to make obvi the boxed remote error type. - re-impl `.boxed_type_str` as `str`-casting the `.boxed_type` value which is guaranteed to render non-`None`. --- tractor/_exceptions.py | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 8d9274fe..f2ff8c21 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -54,6 +54,7 @@ from tractor.msg import ( from tractor.msg.pretty_struct import ( iter_fields, Struct, + pformat as struct_format, ) if TYPE_CHECKING: @@ -108,6 +109,10 @@ _body_fields: list[str] = list( 'relay_path', '_msg_dict', 'cid', + + # since only ctxc should show it but `Error` does + # have it as an optional field. + 'canceller', } ) @@ -382,6 +387,9 @@ class RemoteActorError(Exception): ''' Error type raised by original remote faulting actor. + When the error has only been relayed a single actor-hop + this will be the same as the `.boxed_type`. + ''' if self._src_type is None: self._src_type = get_err_type( @@ -396,7 +404,8 @@ class RemoteActorError(Exception): String-name of the (last hop's) boxed error type. ''' - return self._ipc_msg.boxed_type_str + bt: Type[BaseException] = self.boxed_type + return str(bt.__name__) @property def boxed_type(self) -> str: @@ -492,7 +501,11 @@ class RemoteActorError(Exception): ''' # TODO: use this matryoshka emjoi XD # => 🪆 - reprol_str: str = f'{type(self).__name__}(' + reprol_str: str = ( + f'{type(self).__name__}' # type name + f'[{self.boxed_type_str}]' # parameterized by boxed type + '(' # init-style look + ) _repr: str = self._mk_fields_str( self.reprol_fields, end_char=' ', @@ -653,8 +666,8 @@ class MsgTypeError( - `Yield` - TODO: any embedded `.pld` type defined by user code? - Normally the source of an error is re-raised from some `.msg._codec` - decode which itself raises in a backend interchange + Normally the source of an error is re-raised from some + `.msg._codec` decode which itself raises in a backend interchange lib (eg. a `msgspec.ValidationError`). ''' @@ -939,7 +952,7 @@ def _raise_from_unexpected_msg( src_err: AttributeError, log: StackLevelAdapter, # caller specific `log` obj - expect_msg: str = Yield, + expect_msg: Type[MsgType], # allow "deeper" tbs when debugging B^o hide_tb: bool = True, @@ -1037,16 +1050,16 @@ def _raise_from_unexpected_msg( ctx.maybe_raise() raise eoc from src_err + # TODO: our own transport/IPC-broke error subtype? if stream._closed: - # TODO: our own error subtype? raise trio.ClosedResourceError('This stream was closed') # always re-raise the source error if no translation error case # is activated above. raise MessagingError( - f"{_type} was expecting a {expect_msg} message" - " BUT received a non-error msg:\n" - f'{pformat(msg)}' + f'{_type} was expecting a {expect_msg.__name__!r} message' + ' BUT received a non-error msg:\n\n' + f'{struct_format(msg)}' ) from src_err @@ -1079,13 +1092,11 @@ def _mk_msg_type_err( # no src error from `msgspec.msgpack.Decoder.decode()` so # prolly a manual type-check on our part. if message is None: - fmt_spec: str = codec.pformat_msg_spec() fmt_stack: str = ( '\n'.join(traceback.format_stack(limit=3)) ) tb_fmt: str = pformat_boxed_tb( tb_str=fmt_stack, - # fields_str=header, field_prefix=' ', indent='', ) @@ -1093,8 +1104,7 @@ def _mk_msg_type_err( f'invalid msg -> {msg}: {type(msg)}\n\n' f'{tb_fmt}\n' f'Valid IPC msgs are:\n\n' - # f' ------ - ------\n' - f'{fmt_spec}\n', + f'{codec.msg_spec_str}\n', ) elif src_type_error: src_message: str = str(src_type_error) -- 2.34.1 From 1d1cd9c51a32dccbcb30f7f564f3ccc0fddf69c2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 26 Apr 2024 13:13:04 -0400 Subject: [PATCH 078/190] More bitty (runtime) logging tweaks --- tractor/_portal.py | 6 +++--- tractor/_streaming.py | 8 ++------ 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/tractor/_portal.py b/tractor/_portal.py index f3928657..e25a6c70 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -254,11 +254,11 @@ class Portal: return False reminfo: str = ( - f'`Portal.cancel_actor()` => {self.channel.uid}\n' - f' |_{chan}\n' + f'Portal.cancel_actor() => {self.channel.uid}\n' + f'|_{chan}\n' ) log.cancel( - f'Sending runtime `.cancel()` request to peer\n\n' + f'Requesting runtime cancel for peer\n\n' f'{reminfo}' ) diff --git a/tractor/_streaming.py b/tractor/_streaming.py index 764b7c1e..dd4cd0e1 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -364,14 +364,10 @@ class MsgStream(trio.abc.Channel): if not self._eoc: message: str = ( - f'Context stream closed by {self._ctx.side!r}\n' + f'Stream self-closed by {self._ctx.side!r}-side before EoC\n' f'|_{self}\n' ) - log.cancel( - 'Stream self-closed before receiving EoC\n\n' - + - message - ) + log.cancel(message) self._eoc = trio.EndOfChannel(message) # ?XXX WAIT, why do we not close the local mem chan `._rx_chan` XXX? -- 2.34.1 From 753724252d8d161ba3583520703503019a0ddfc2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 26 Apr 2024 13:18:06 -0400 Subject: [PATCH 079/190] Make `.msg.types.Msg.pld: Raw` only, since `PldRx`.. --- tractor/msg/types.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index cb124324..63c0a467 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -56,6 +56,7 @@ log = get_logger('tractor.msgspec') PayloadT = TypeVar('PayloadT') +# TODO: PayloadMsg class Msg( Struct, Generic[PayloadT], @@ -81,7 +82,7 @@ class Msg( tree. ''' - cid: str|None # call/context-id + cid: str # call/context-id # ^-TODO-^: more explicit type? # -[ ] use UNSET here? # https://jcristharif.com/msgspec/supported-types.html#unset @@ -106,7 +107,7 @@ class Msg( # TODO: could also be set to `msgspec.Raw` if the sub-decoders # approach is preferred over the generic parameterization # approach as take by `mk_msg_spec()` below. - pld: PayloadT|Raw + pld: Raw class Aid( -- 2.34.1 From a67975f8f56bb7536f6ef3d4dc805ce95cf27df8 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 26 Apr 2024 15:29:50 -0400 Subject: [PATCH 080/190] First draft payload-spec limit API Add new task-scope oriented `PldRx.pld_spec` management API similar to `.msg._codec.limit_msg_spec()`, but obvi built to process and filter `MsgType.pld` values. New API related changes include: - new per-task singleton getter `msg._ops.current_pldrx()` which delivers the current (global) payload receiver via a new `_ctxvar_PldRx: ContextVar` configured with a default `_def_any_pldec: MsgDec[Any]` decoder. - a `PldRx.limit_plds()` which sets the decoder (`.type` underneath) for the specific payload rx instance. - `.msg._ops.limit_plds()` which obtains the current task-scoped `PldRx` and applies the pld spec via a new `PldRx.limit_plds()`. - rename `PldRx._msgdec` -> `._pldec`. - add `.pld_dec` as pub attr for -^ Unrelated adjustments: - use `.msg.pretty_struct.pformat()` where handy. - always pass `expect_msg: MsgType`. - add a `case Stop()` to `PldRx.dec_msg()` which will `log.warning()` when a stop is received by no stream was open on this receiving side since we rarely want that to raise since it's prolly just a runtime race or mistake in user code. Other: --- tractor/msg/_ops.py | 162 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 145 insertions(+), 17 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index e78b79a4..5a9ab46a 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -25,12 +25,12 @@ from contextlib import ( # asynccontextmanager as acm, contextmanager as cm, ) -from pprint import pformat +from contextvars import ContextVar from typing import ( Any, Type, TYPE_CHECKING, - # Union, + Union, ) # ------ - ------ from msgspec import ( @@ -63,7 +63,7 @@ from .types import ( Started, Stop, Yield, - # pretty_struct, + pretty_struct, ) @@ -75,6 +75,9 @@ if TYPE_CHECKING: log = get_logger(__name__) +_def_any_pldec: MsgDec = mk_dec() + + class PldRx(Struct): ''' A "msg payload receiver". @@ -101,10 +104,13 @@ class PldRx(Struct): ''' # TODO: better to bind it here? # _rx_mc: trio.MemoryReceiveChannel - _msgdec: MsgDec = mk_dec(spec=Any) - + _pldec: MsgDec _ipc: Context|MsgStream|None = None + @property + def pld_dec(self) -> MsgDec: + return self._pldec + @cm def apply_to_ipc( self, @@ -122,9 +128,29 @@ class PldRx(Struct): finally: self._ipc = None + @cm + def limit_plds( + self, + spec: Union[Type[Struct]], + + ) -> MsgDec: + ''' + Type-limit the loadable msg payloads via an applied + `MsgDec` given an input spec, revert to prior decoder on + exit. + + ''' + orig_dec: MsgDec = self._pldec + limit_dec: MsgDec = mk_dec(spec=spec) + try: + self._pldec = limit_dec + yield limit_dec + finally: + self._pldec = orig_dec + @property def dec(self) -> msgpack.Decoder: - return self._msgdec.dec + return self._pldec.dec def recv_pld_nowait( self, @@ -182,7 +208,7 @@ class PldRx(Struct): self, msg: MsgType, ctx: Context, - expect_msg: Type[MsgType]|None = None, + expect_msg: Type[MsgType]|None, ) -> PayloadT|Raw: ''' @@ -199,11 +225,11 @@ class PldRx(Struct): |Return(pld=pld) # termination phase ): try: - pld: PayloadT = self._msgdec.decode(pld) + pld: PayloadT = self._pldec.decode(pld) log.runtime( - 'Decode msg payload\n\n' - f'{msg}\n\n' - f'{pld}\n' + 'Decoded msg payload\n\n' + f'{msg}\n' + f'|_pld={pld!r}' ) return pld @@ -237,9 +263,42 @@ class PldRx(Struct): case Error(): src_err = MessagingError( - 'IPC dialog termination by msg' + 'IPC ctx dialog terminated without `Return`-ing a result' ) + case Stop(cid=cid): + message: str = ( + f'{ctx.side!r}-side of ctx received stream-`Stop` from ' + f'{ctx.peer_side!r} peer ?\n' + f'|_cid: {cid}\n\n' + + f'{pretty_struct.pformat(msg)}\n' + ) + if ctx._stream is None: + explain: str = ( + f'BUT, no `MsgStream` (was) open(ed) on this ' + f'{ctx.side!r}-side of the IPC ctx?\n' + f'Maybe check your code for streaming phase race conditions?\n' + ) + log.warning( + message + + + explain + ) + # let caller decide what to do when only one + # side opened a stream, don't raise. + return msg + + else: + explain: str = ( + 'Received a `Stop` when it should NEVER be possible!?!?\n' + ) + # TODO: this is constructed inside + # `_raise_from_unexpected_msg()` but maybe we + # should pass it in? + # src_err = trio.EndOfChannel(explain) + src_err = None + case _: src_err = InternalError( 'Unknown IPC msg ??\n\n' @@ -259,6 +318,7 @@ class PldRx(Struct): async def recv_msg_w_pld( self, ipc: Context|MsgStream, + expect_msg: MsgType, ) -> tuple[MsgType, PayloadT]: ''' @@ -274,10 +334,75 @@ class PldRx(Struct): pld: PayloadT = self.dec_msg( msg, ctx=ipc, + expect_msg=expect_msg, ) return msg, pld +# Always maintain a task-context-global `PldRx` +_def_pld_rx: PldRx = PldRx( + _pldec=_def_any_pldec, +) +_ctxvar_PldRx: ContextVar[PldRx] = ContextVar( + 'pld_rx', + default=_def_pld_rx, +) + + +def current_pldrx() -> PldRx: + ''' + Return the current `trio.Task.context`'s msg-payload + receiver, the post IPC but pre-app code `MsgType.pld` + filter. + + Modification of the current payload spec via `limit_plds()` + allows an application to contextually filter typed IPC msg + content delivered via wire transport. + + ''' + return _ctxvar_PldRx.get() + + +@cm +def limit_plds( + spec: Union[Type[Struct]], + **kwargs, + +) -> MsgDec: + ''' + Apply a `MsgCodec` that will natively decode the SC-msg set's + `Msg.pld: Union[Type[Struct]]` payload fields using + tagged-unions of `msgspec.Struct`s from the `payload_types` + for all IPC contexts in use by the current `trio.Task`. + + ''' + __tracebackhide__: bool = True + try: + # sanity on orig settings + orig_pldrx: PldRx = current_pldrx() + orig_pldec: MsgDec = orig_pldrx.pld_dec + + with orig_pldrx.limit_plds( + spec=spec, + **kwargs, + ) as pldec: + log.info( + 'Applying payload-decoder\n\n' + f'{pldec}\n' + ) + yield pldec + finally: + log.info( + 'Reverted to previous payload-decoder\n\n' + f'{orig_pldec}\n' + ) + assert ( + (pldrx := current_pldrx()) is orig_pldrx + and + pldrx.pld_dec is orig_pldec + ) + + async def drain_to_final_msg( ctx: Context, @@ -368,7 +493,10 @@ async def drain_to_final_msg( # pray to the `trio` gawds that we're corrent with this # msg: dict = await ctx._rx_chan.receive() - msg, pld = await ctx._pld_rx.recv_msg_w_pld(ipc=ctx) + msg, pld = await ctx._pld_rx.recv_msg_w_pld( + ipc=ctx, + expect_msg=Return, + ) # NOTE: we get here if the far end was # `ContextCancelled` in 2 cases: @@ -399,7 +527,7 @@ async def drain_to_final_msg( ctx._result: Any = pld log.runtime( 'Context delivered final draining msg:\n' - f'{pformat(msg)}' + f'{pretty_struct.pformat(msg)}' ) # XXX: only close the rx mem chan AFTER # a final result is retreived. @@ -435,7 +563,7 @@ async def drain_to_final_msg( f'=> {ctx._task}\n' f' |_{ctx._stream}\n\n' - f'{pformat(msg)}\n' + f'{pretty_struct.pformat(msg)}\n' ) return ( return_msg, @@ -452,7 +580,7 @@ async def drain_to_final_msg( f'=> {ctx._task}\n' f' |_{ctx._stream}\n\n' - f'{pformat(msg)}\n' + f'{pretty_struct.pformat(msg)}\n' ) continue @@ -467,7 +595,7 @@ async def drain_to_final_msg( pre_result_drained.append(msg) log.cancel( 'Remote stream terminated due to "stop" msg:\n\n' - f'{pformat(msg)}\n' + f'{pretty_struct.pformat(msg)}\n' ) continue -- 2.34.1 From 74b6871bfd633fbbd77fdc3e003767cfb5090b0c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 30 Apr 2024 12:15:46 -0400 Subject: [PATCH 081/190] Mk `process_messages()` return last msg; summary logging Not sure it's **that** useful (yet) but in theory would allow avoiding certain log level usage around transient RPC requests for discovery methods (like `.register_actor()` and friends); can't hurt to be able to introspect that last message for other future cases I'd imagine as well. Adjust the calling code in `._runtime` to match; other spots are using the `trio.Nursery.start()` schedule style and are fine as is. Improve a bunch more log messages throughout a few mods mostly by going to a "summary" single-emission style where possible/appropriate: - in `._runtime` more "single summary" status style log emissions: |_mk `Actor.load_modules()` render a single mod loaded summary. |_use a summary `con_status: str` for `Actor._stream_handler()` conn setup and an equiv (`con_teardown_status`) for connection teardowns. |_similar thing in `Actor.wait_for_actor()`. - generally more usage of `.msg.pretty_struct` apis throughout `._runtime`. --- tractor/_entry.py | 2 +- tractor/_portal.py | 19 +++-- tractor/_rpc.py | 51 +++++------ tractor/_runtime.py | 203 +++++++++++++++++++++++--------------------- 4 files changed, 147 insertions(+), 128 deletions(-) diff --git a/tractor/_entry.py b/tractor/_entry.py index bf719abb..78f83283 100644 --- a/tractor/_entry.py +++ b/tractor/_entry.py @@ -146,7 +146,7 @@ def _trio_main( finally: log.info( - 'Actor terminated\n' + 'Subactor terminated\n' + actor_info ) diff --git a/tractor/_portal.py b/tractor/_portal.py index e25a6c70..806dcc7b 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -435,7 +435,6 @@ class Portal: yield stream finally: - # cancel the far end task on consumer close # NOTE: this is a special case since we assume that if using # this ``.open_fream_from()`` api, the stream is one a one @@ -496,7 +495,7 @@ class LocalPortal: async def open_portal( channel: Channel, - nursery: trio.Nursery|None = None, + tn: trio.Nursery|None = None, start_msg_loop: bool = True, shield: bool = False, @@ -504,15 +503,19 @@ async def open_portal( ''' Open a ``Portal`` through the provided ``channel``. - Spawns a background task to handle message processing (normally - done by the actor-runtime implicitly). + Spawns a background task to handle RPC processing, normally + done by the actor-runtime implicitly via a call to + `._rpc.process_messages()`. just after connection establishment. ''' actor = current_actor() assert actor was_connected: bool = False - async with maybe_open_nursery(nursery, shield=shield) as nursery: + async with maybe_open_nursery( + tn, + shield=shield, + ) as tn: if not channel.connected(): await channel.connect() @@ -524,7 +527,7 @@ async def open_portal( msg_loop_cs: trio.CancelScope|None = None if start_msg_loop: from ._runtime import process_messages - msg_loop_cs = await nursery.start( + msg_loop_cs = await tn.start( partial( process_messages, actor, @@ -544,7 +547,7 @@ async def open_portal( await channel.aclose() # cancel background msg loop task - if msg_loop_cs: + if msg_loop_cs is not None: msg_loop_cs.cancel() - nursery.cancel_scope.cancel() + tn.cancel_scope.cancel() diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 595d1352..56d91534 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -61,11 +61,13 @@ from .msg import ( current_codec, MsgCodec, NamespacePath, + pretty_struct, ) from tractor.msg.types import ( CancelAck, Error, Msg, + MsgType, Return, Start, StartAck, @@ -770,7 +772,10 @@ async def process_messages( shield: bool = False, task_status: TaskStatus[CancelScope] = trio.TASK_STATUS_IGNORED, -) -> bool: +) -> ( + bool, # chan diconnected + MsgType, # last msg +): ''' This is the low-level, per-IPC-channel, RPC task scheduler loop. @@ -812,11 +817,6 @@ async def process_messages( # |_ for ex, from `aioquic` which exposed "stream ids": # - https://github.com/aiortc/aioquic/blob/main/src/aioquic/quic/connection.py#L1175 # - https://github.com/aiortc/aioquic/blob/main/src/aioquic/quic/connection.py#L659 - log.runtime( - 'Entering RPC msg loop:\n' - f'peer: {chan.uid}\n' - f'|_{chan}\n' - ) nursery_cancelled_before_task: bool = False msg: Msg|None = None try: @@ -830,12 +830,15 @@ async def process_messages( async for msg in chan: log.transport( # type: ignore - f'<= IPC msg from peer: {chan.uid}\n\n' + f'IPC msg from peer\n' + f'<= {chan.uid}\n\n' # TODO: avoid fmting depending on loglevel for perf? - # -[ ] specifically `pformat()` sub-call..? + # -[ ] specifically `pretty_struct.pformat()` sub-call..? + # - how to only log-level-aware actually call this? # -[ ] use `.msg.pretty_struct` here now instead! - f'{pformat(msg)}\n' + # f'{pretty_struct.pformat(msg)}\n' + f'{msg}\n' ) match msg: @@ -949,10 +952,11 @@ async def process_messages( uid=actorid, ): log.runtime( - 'Handling RPC `Start` request from\n' - f'peer: {actorid}\n' - '\n' - f'=> {ns}.{funcname}({kwargs})\n' + 'Handling RPC `Start` request\n' + f'<= peer: {actorid}\n' + f' |_{ns}.{funcname}({kwargs})\n\n' + + f'{pretty_struct.pformat(msg)}\n' ) # runtime-internal endpoint: `Actor.` @@ -1093,25 +1097,24 @@ async def process_messages( parent_chan=chan, ) - except ( - TransportClosed, - ): + except TransportClosed: # channels "breaking" (for TCP streams by EOF or 104 # connection-reset) is ok since we don't have a teardown # handshake for them (yet) and instead we simply bail out of # the message loop and expect the teardown sequence to clean # up.. - # TODO: add a teardown handshake? and, + # + # TODO: maybe add a teardown handshake? and, # -[ ] don't show this msg if it's an ephemeral discovery ep call? # -[ ] figure out how this will break with other transports? log.runtime( - f'channel closed abruptly with\n' - f'peer: {chan.uid}\n' - f'|_{chan.raddr}\n' + f'IPC channel closed abruptly\n' + f'<=x peer: {chan.uid}\n' + f' |_{chan.raddr}\n' ) # transport **WAS** disconnected - return True + return (True, msg) except ( Exception, @@ -1151,9 +1154,9 @@ async def process_messages( log.runtime( 'Exiting IPC msg loop with final msg\n\n' f'<= peer: {chan.uid}\n' - f'|_{chan}\n\n' - f'{pformat(msg)}\n\n' + f' |_{chan}\n\n' + f'{pretty_struct.pformat(msg)}' ) # transport **WAS NOT** disconnected - return False + return (False, msg) diff --git a/tractor/_runtime.py b/tractor/_runtime.py index c12365be..84940222 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -50,6 +50,7 @@ from pprint import pformat import signal import sys from typing import ( + Any, Callable, TYPE_CHECKING, ) @@ -68,7 +69,7 @@ from tractor.msg import ( pretty_struct, NamespacePath, types as msgtypes, - Msg, + MsgType, ) from ._ipc import Channel from ._context import ( @@ -92,19 +93,6 @@ from ._rpc import ( process_messages, try_ship_error_to_remote, ) -# from tractor.msg.types import ( -# Aid, -# SpawnSpec, -# Start, -# StartAck, -# Started, -# Yield, -# Stop, -# Return, -# Error, -# ) - - if TYPE_CHECKING: @@ -311,29 +299,32 @@ class Actor: self._reg_addrs = addrs async def wait_for_peer( - self, uid: tuple[str, str] + self, + uid: tuple[str, str], + ) -> tuple[trio.Event, Channel]: ''' - Wait for a connection back from a spawned actor with a `uid` - using a `trio.Event` for sync. + Wait for a connection back from a (spawned sub-)actor with + a `uid` using a `trio.Event` for sync. ''' - log.runtime(f"Waiting for peer {uid} to connect") + log.debug(f'Waiting for peer {uid!r} to connect') event = self._peer_connected.setdefault(uid, trio.Event()) await event.wait() - log.runtime(f"{uid} successfully connected back to us") + log.debug(f'{uid!r} successfully connected back to us') return event, self._peers[uid][-1] def load_modules( self, - debug_mode: bool = False, + # debug_mode: bool = False, ) -> None: ''' - Load enabled RPC py-modules locally (after process fork/spawn). + Load explicitly enabled python modules from local fs after + process spawn. Since this actor may be spawned on a different machine from the original nursery we need to try and load the local module - code (presuming it exists). + code manually (presuming it exists). ''' try: @@ -346,16 +337,21 @@ class Actor: _mp_fixup_main._fixup_main_from_path( parent_data['init_main_from_path']) + status: str = 'Attempting to import enabled modules:\n' for modpath, filepath in self.enable_modules.items(): # XXX append the allowed module to the python path which # should allow for relative (at least downward) imports. sys.path.append(os.path.dirname(filepath)) - log.runtime(f"Attempting to import {modpath}@{filepath}") - mod = importlib.import_module(modpath) + status += ( + f'|_{modpath!r} -> {filepath!r}\n' + ) + mod: ModuleType = importlib.import_module(modpath) self._mods[modpath] = mod if modpath == '__main__': self._mods['__mp_main__'] = mod + log.runtime(status) + except ModuleNotFoundError: # it is expected the corresponding `ModuleNotExposed` error # will be raised later @@ -415,21 +411,23 @@ class Actor: else: log.runtime(f'New connection to us @{chan.raddr}') - con_msg: str = '' + con_status: str = '' + + # TODO: remove this branch since can never happen? + # NOTE: `.uid` is only set after first contact if their_uid: - # NOTE: `.uid` is only set after first contact - con_msg = ( - 'IPC Re-connection from already known peer? ' + con_status = ( + 'IPC Re-connection from already known peer?\n' ) else: - con_msg = ( - 'New IPC connection to us ' + con_status = ( + 'New inbound IPC connection <=\n' ) - con_msg += ( - f'<= @{chan.raddr}\n' + con_status += ( f'|_{chan}\n' # f' |_@{chan.raddr}\n\n' + # ^-TODO-^ remove since alfready in chan.__repr__()? ) # send/receive initial handshake response try: @@ -449,13 +447,13 @@ class Actor: # a bound listener on the "arbiter" addr. the reset will be # because the handshake was never meant took place. log.warning( - con_msg + con_status + ' -> But failed to handshake? Ignoring..\n' ) return - con_msg += ( + con_status += ( f' -> Handshake with actor `{uid[0]}[{uid[1][-6:]}]` complete\n' ) # IPC connection tracking for both peers and new children: @@ -468,7 +466,7 @@ class Actor: None, ) if event: - con_msg += ( + con_status += ( ' -> Waking subactor spawn waiters: ' f'{event.statistics().tasks_waiting}\n' f' -> Registered IPC chan for child actor {uid}@{chan.raddr}\n' @@ -479,7 +477,7 @@ class Actor: event.set() else: - con_msg += ( + con_status += ( f' -> Registered IPC chan for peer actor {uid}@{chan.raddr}\n' ) # type: ignore @@ -493,13 +491,18 @@ class Actor: # TODO: can we just use list-ref directly? chans.append(chan) - log.runtime(con_msg) + con_status += ' -> Entering RPC msg loop..\n' + log.runtime(con_status) # Begin channel management - respond to remote requests and # process received reponses. disconnected: bool = False + last_msg: MsgType try: - disconnected: bool = await process_messages( + ( + disconnected, + last_msg, + ) = await process_messages( self, chan, ) @@ -600,16 +603,24 @@ class Actor: # that the IPC layer may have failed # unexpectedly since it may be the cause of # other downstream errors. - entry = local_nursery._children.get(uid) + entry: tuple|None = local_nursery._children.get(uid) if entry: proc: trio.Process _, proc, _ = entry if ( (poll := getattr(proc, 'poll', None)) - and poll() is None + and + poll() is None # proc still alive ): - log.cancel( + # TODO: change log level based on + # detecting whether chan was created for + # ephemeral `.register_actor()` request! + # -[ ] also, that should be avoidable by + # re-using any existing chan from the + # `._discovery.get_registry()` call as + # well.. + log.runtime( f'Peer IPC broke but subproc is alive?\n\n' f'<=x {chan.uid}@{chan.raddr}\n' @@ -618,17 +629,17 @@ class Actor: # ``Channel`` teardown and closure sequence # drop ref to channel so it can be gc-ed and disconnected - log.runtime( - f'Disconnected IPC channel:\n' - f'uid: {chan.uid}\n' - f'|_{pformat(chan)}\n' + con_teardown_status: str = ( + f'IPC channel disconnected:\n' + f'<=x uid: {chan.uid}\n' + f' |_{pformat(chan)}\n\n' ) chans.remove(chan) # TODO: do we need to be this pedantic? if not chans: - log.runtime( - f'No more channels with {chan.uid}' + con_teardown_status += ( + f'-> No more channels with {chan.uid}' ) self._peers.pop(uid, None) @@ -642,15 +653,16 @@ class Actor: f' |_[{i}] {pformat(chan)}\n' ) - log.runtime( - f'Remaining IPC {len(self._peers)} peers:\n' - + peers_str + con_teardown_status += ( + f'-> Remaining IPC {len(self._peers)} peers: {peers_str}\n' ) # No more channels to other actors (at all) registered # as connected. if not self._peers: - log.runtime("Signalling no more peer channel connections") + con_teardown_status += ( + 'Signalling no more peer channel connections' + ) self._no_more_peers.set() # NOTE: block this actor from acquiring the @@ -725,13 +737,16 @@ class Actor: # TODO: figure out why this breaks tests.. db_cs.cancel() + log.runtime(con_teardown_status) + # finally block closure + # TODO: rename to `._deliver_payload()` since this handles # more then just `result` msgs now obvi XD async def _deliver_ctx_payload( self, chan: Channel, cid: str, - msg: Msg|MsgTypeError, + msg: MsgType|MsgTypeError, ) -> None|bool: ''' @@ -756,7 +771,7 @@ class Actor: # XXX don't need right since it's always in msg? # f'=> cid: {cid}\n\n' - f'{pretty_struct.Struct.pformat(msg)}\n' + f'{pretty_struct.pformat(msg)}\n' ) return @@ -898,9 +913,11 @@ class Actor: cid=cid, ) log.runtime( - 'Sending RPC start msg\n\n' + 'Sending RPC `Start`\n\n' f'=> peer: {chan.uid}\n' - f' |_ {ns}.{func}({kwargs})\n' + f' |_ {ns}.{func}({kwargs})\n\n' + + f'{pretty_struct.pformat(msg)}' ) await chan.send(msg) @@ -957,31 +974,29 @@ class Actor: if self._spawn_method == "trio": - # Receive runtime state from our parent - # parent_data: dict[str, Any] - # parent_data = await chan.recv() - - # TODO: maybe we should just wrap this directly - # in a `Actor.spawn_info: SpawnInfo` struct? + # Receive post-spawn runtime state from our parent. spawnspec: msgtypes.SpawnSpec = await chan.recv() self._spawn_spec = spawnspec - # TODO: eventually all these msgs as - # `msgspec.Struct` with a special mode that - # pformats them in multi-line mode, BUT only - # if "trace"/"util" mode is enabled? log.runtime( 'Received runtime spec from parent:\n\n' - f'{pformat(spawnspec)}\n' + + # TODO: eventually all these msgs as + # `msgspec.Struct` with a special mode that + # pformats them in multi-line mode, BUT only + # if "trace"/"util" mode is enabled? + f'{pretty_struct.pformat(spawnspec)}\n' ) - # accept_addrs: list[tuple[str, int]] = parent_data.pop('bind_addrs') accept_addrs: list[tuple[str, int]] = spawnspec.bind_addrs - # rvs = parent_data.pop('_runtime_vars') - rvs = spawnspec._runtime_vars + # TODO: another `Struct` for rtvs.. + rvs: dict[str, Any] = spawnspec._runtime_vars if rvs['_debug_mode']: try: - log.info( + # TODO: maybe return some status msgs upward + # to that we can emit them in `con_status` + # instead? + log.devx( 'Enabling `stackscope` traces on SIGUSR1' ) from .devx import enable_stack_on_sig @@ -991,7 +1006,6 @@ class Actor: '`stackscope` not installed for use in debug mode!' ) - log.runtime(f'Runtime vars are: {rvs}') rvs['_is_root'] = False _state._runtime_vars.update(rvs) @@ -1008,18 +1022,12 @@ class Actor: for val in spawnspec.reg_addrs ] - # for attr, value in parent_data.items(): + # TODO: better then monkey patching.. + # -[ ] maybe read the actual f#$-in `._spawn_spec` XD for _, attr, value in pretty_struct.iter_fields( spawnspec, ): setattr(self, attr, value) - # if ( - # attr == 'reg_addrs' - # and value - # ): - # self.reg_addrs = [tuple(val) for val in value] - # else: - # setattr(self, attr, value) return ( chan, @@ -1028,12 +1036,11 @@ class Actor: except OSError: # failed to connect log.warning( - f'Failed to connect to parent!?\n\n' - 'Closing IPC [TCP] transport server to\n' - f'{parent_addr}\n' + f'Failed to connect to spawning parent actor!?\n' + f'x=> {parent_addr}\n' f'|_{self}\n\n' ) - await self.cancel(chan=None) # self cancel + await self.cancel(req_chan=None) # self cancel raise async def _serve_forever( @@ -1111,8 +1118,7 @@ class Actor: # chan whose lifetime limits the lifetime of its remotely # requested and locally spawned RPC tasks - similar to the # supervision semantics of a nursery wherein the actual - # implementation does start all such tasks in - # a sub-nursery. + # implementation does start all such tasks in a sub-nursery. req_chan: Channel|None, ) -> bool: @@ -1153,7 +1159,7 @@ class Actor: # other) repr fields instead of doing this all manual.. msg: str = ( f'Runtime cancel request from {requester_type}:\n\n' - f'<= .cancel(): {requesting_uid}\n' + f'<= .cancel(): {requesting_uid}\n\n' ) # TODO: what happens here when we self-cancel tho? @@ -1168,8 +1174,8 @@ class Actor: dbcs = _debug.DebugStatus.req_cs if dbcs is not None: msg += ( - '>> Cancelling active debugger request..\n' - f'|_{_debug.Lock}\n' + '-> Cancelling active debugger request..\n' + f'|_{_debug.Lock.pformat()}' ) dbcs.cancel() @@ -1420,7 +1426,12 @@ class Actor: ''' if self._server_n: - log.runtime("Shutting down channel server") + # TODO: obvi a different server type when we eventually + # support some others XD + server_prot: str = 'TCP' + log.runtime( + f'Cancelling {server_prot} server' + ) self._server_n.cancel_scope.cancel() return True @@ -1604,6 +1615,7 @@ async def async_main( assert accept_addrs try: + # TODO: why is this not with the root nursery? actor._server_n = await service_nursery.start( partial( actor._serve_forever, @@ -1888,13 +1900,13 @@ class Arbiter(Actor): sockaddrs: list[tuple[str, int]] = [] sockaddr: tuple[str, int] - for (aname, _), sockaddr in self._registry.items(): - log.runtime( - f'Actor mailbox info:\n' - f'aname: {aname}\n' - f'sockaddr: {sockaddr}\n' + mailbox_info: str = 'Actor registry contact infos:\n' + for uid, sockaddr in self._registry.items(): + mailbox_info += ( + f'|_uid: {uid}\n' + f'|_sockaddr: {sockaddr}\n\n' ) - if name == aname: + if name == uid[0]: sockaddrs.append(sockaddr) if not sockaddrs: @@ -1906,6 +1918,7 @@ class Arbiter(Actor): if not isinstance(uid, trio.Event): sockaddrs.append(self._registry[uid]) + log.runtime(mailbox_info) return sockaddrs async def register_actor( -- 2.34.1 From 45f499cf3affac52e0adf58c46b13ebd8e1f8730 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 30 Apr 2024 12:37:17 -0400 Subject: [PATCH 082/190] Reorg frames pformatters, add `Context.repr_state` A better spot for the pretty-formatting of frame text (and thus tracebacks) is in the new `.devx._code` module: - move from `._exceptions` -> `.devx._code.pformat_boxed_tb()`. - add new `pformat_caller_frame()` factored out the use case in `._exceptions._mk_msg_type_err()` where we dump a stack trace for bad `.send()` side IPC msgs. Add some new pretty-format methods to `Context`: - explicitly implement `.pformat()` and allow an `extra_fields: dict` which can be used to inject additional fields (maybe eventually by default) such as is now used inside `._maybe_cancel_and_set_remote_error()` when reporting the internal `._scope` state in cancel logging. - add a new `.repr_state -> str` which provides a single string status depending on the internal state of the IPC ctx in terms of the shuttle protocol's "phase"; use it from `.pformat()` for the `|_state:`. - set `.started(complain_no_parity=False)` now since we presume decoding with `.pld: Raw` now with the new `PldRx` design. - use new `msgops.current_pldrx()` in `mk_context()`. --- tractor/_context.py | 163 +++++++++++++++++++++++++++++------------ tractor/_exceptions.py | 83 +++------------------ tractor/devx/_code.py | 102 ++++++++++++++++++++++++++ 3 files changed, 227 insertions(+), 121 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 762d8798..20584979 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -61,7 +61,6 @@ from ._exceptions import ( ) from .log import get_logger from .msg import ( - _codec, Error, MsgType, MsgCodec, @@ -103,7 +102,6 @@ class Unresolved: a final return value or raised error is resolved. ''' - ... # TODO: make this a .msg.types.Struct! @@ -116,19 +114,19 @@ class Context: NB: This class should **never be instatiated directly**, it is allocated by the runtime in 2 ways: - - by entering ``Portal.open_context()`` which is the primary - public API for any "caller" task or, + - by entering `Portal.open_context()` which is the primary + public API for any "parent" task or, - by the RPC machinery's `._rpc._invoke()` as a `ctx` arg - to a remotely scheduled "callee" function. + to a remotely scheduled "child" function. - AND is always constructed using the below ``mk_context()``. + AND is always constructed using the below `mk_context()`. Allows maintaining task or protocol specific state between 2 cancel-scope-linked, communicating and parallel executing `trio.Task`s. Contexts are allocated on each side of any task RPC-linked msg dialog, i.e. for every request to a remote actor from a `Portal`. On the "callee" side a context is - always allocated inside ``._rpc._invoke()``. + always allocated inside `._rpc._invoke()`. TODO: more detailed writeup on cancellation, error and streaming semantics.. @@ -262,7 +260,13 @@ class Context: _strict_started: bool = False _cancel_on_msgerr: bool = True - def __str__(self) -> str: + def pformat( + self, + extra_fields: dict[str, Any]|None = None, + # ^-TODO-^ some built-in extra state fields + # we'll want in some devx specific cases? + + ) -> str: ds: str = '=' # ds: str = ': ' @@ -279,11 +283,7 @@ class Context: outcome_str: str = self.repr_outcome( show_error_fields=True ) - outcome_typ_str: str = self.repr_outcome( - type_only=True - ) - - return ( + fmtstr: str = ( f'\n' ) # NOTE: making this return a value that can be passed to @@ -335,7 +345,8 @@ class Context: # logging perspective over `eval()`-ability since we do NOT # target serializing non-struct instances! # def __repr__(self) -> str: - __repr__ = __str__ + __str__ = pformat + __repr__ = pformat @property def cancel_called(self) -> bool: @@ -615,10 +626,10 @@ class Context: whom: str = ( 'us' if error.canceller == self._actor.uid - else 'peer' + else 'a remote peer (not us)' ) log.cancel( - f'IPC context cancelled by {whom}!\n\n' + f'IPC context was cancelled by {whom}!\n\n' f'{error}' ) @@ -626,7 +637,6 @@ class Context: msgerr = True log.error( f'IPC dialog error due to msg-type caused by {self.peer_side!r} side\n\n' - f'{error}\n' f'{pformat(self)}\n' ) @@ -696,24 +706,23 @@ class Context: else: message: str = 'NOT cancelling `Context._scope` !\n\n' - scope_info: str = 'No `self._scope: CancelScope` was set/used ?' + fmt_str: str = 'No `self._scope: CancelScope` was set/used ?' if cs: - scope_info: str = ( - f'self._scope: {cs}\n' - f'|_ .cancel_called: {cs.cancel_called}\n' - f'|_ .cancelled_caught: {cs.cancelled_caught}\n' - f'|_ ._cancel_status: {cs._cancel_status}\n\n' + fmt_str: str = self.pformat( + extra_fields={ + '._is_self_cancelled()': self._is_self_cancelled(), + '._cancel_on_msgerr': self._cancel_on_msgerr, - f'{self}\n' - f'|_ ._is_self_cancelled(): {self._is_self_cancelled()}\n' - f'|_ ._cancel_on_msgerr: {self._cancel_on_msgerr}\n\n' - - f'msgerr: {msgerr}\n' + '._scope': cs, + '._scope.cancel_called': cs.cancel_called, + '._scope.cancelled_caught': cs.cancelled_caught, + '._scope._cancel_status': cs._cancel_status, + } ) log.cancel( message + - f'{scope_info}' + fmt_str ) # TODO: maybe we should also call `._res_scope.cancel()` if it # exists to support cancelling any drain loop hangs? @@ -751,7 +760,7 @@ class Context: ) return ( # f'{self._nsf}() -{{{codec}}}-> {repr(self.outcome)}:' - f'{self._nsf}() -> {outcome_str}:' + f'{self._nsf}() -> {outcome_str}' ) @property @@ -839,7 +848,7 @@ class Context: if not self._portal: raise InternalError( 'No portal found!?\n' - 'Why is this supposed caller context missing it?' + 'Why is this supposed {self.side!r}-side ctx task missing it?!?' ) cid: str = self.cid @@ -1277,11 +1286,11 @@ class Context: ) log.cancel( - 'Ctx drained pre-result msgs:\n' - f'{pformat(drained_msgs)}\n\n' + 'Ctx drained to final result msgs\n' + f'{return_msg}\n\n' - f'Final return msg:\n' - f'{return_msg}\n' + f'pre-result drained msgs:\n' + f'{pformat(drained_msgs)}\n' ) self.maybe_raise( @@ -1446,6 +1455,65 @@ class Context: repr(self._result) ) + @property + def repr_state(self) -> str: + ''' + A `str`-status describing the current state of this + inter-actor IPC context in terms of the current "phase" state + of the SC shuttling dialog protocol. + + ''' + merr: Exception|None = self.maybe_error + outcome: Unresolved|Exception|Any = self.outcome + + match ( + outcome, + merr, + ): + case ( + Unresolved, + ContextCancelled(), + ) if self.cancel_acked: + status = 'self-cancelled' + + case ( + Unresolved, + ContextCancelled(), + ) if ( + self.canceller + and not self._cancel_called + ): + status = 'peer-cancelled' + + case ( + Unresolved, + BaseException(), + ) if self.canceller: + status = 'errored' + + case ( + _, # any non-unresolved value + None, + ) if self._final_result_is_set(): + status = 'returned' + + case ( + Unresolved, # noqa (weird.. ruff) + None, + ): + if stream := self._stream: + if stream.closed: + status = 'streaming-finished' + else: + status = 'streaming' + elif self._started_called: + status = 'started' + + case _: + status = 'unknown!?' + + return status + async def started( self, @@ -1454,7 +1522,11 @@ class Context: value: PayloadT|None = None, strict_parity: bool = False, - complain_no_parity: bool = True, + + # TODO: this will always emit now that we do `.pld: Raw` + # passthrough.. so maybe just only complain when above strict + # flag is set? + complain_no_parity: bool = False, ) -> None: ''' @@ -1514,18 +1586,19 @@ class Context: ) raise RuntimeError( 'Failed to roundtrip `Started` msg?\n' - f'{pformat(rt_started)}\n' + f'{pretty_struct.pformat(rt_started)}\n' ) if rt_started != started_msg: # TODO: break these methods out from the struct subtype? + # TODO: make that one a mod func too.. diff = pretty_struct.Struct.__sub__( rt_started, started_msg, ) complaint: str = ( - 'Started value does not match after codec rountrip?\n\n' + 'Started value does not match after roundtrip?\n\n' f'{diff}' ) @@ -1541,8 +1614,6 @@ class Context: else: log.warning(complaint) - # started_msg = rt_started - await self.chan.send(started_msg) # raise any msg type error NO MATTER WHAT! @@ -2357,7 +2428,7 @@ async def open_context_from_portal( # FINALLY, remove the context from runtime tracking and # exit! log.runtime( - 'De-allocating IPC ctx opened with {ctx.side!r} peer \n' + f'De-allocating IPC ctx opened with {ctx.side!r} peer \n' f'uid: {uid}\n' f'cid: {ctx.cid}\n' ) @@ -2393,10 +2464,8 @@ def mk_context( from .devx._code import find_caller_info caller_info: CallerInfo|None = find_caller_info() - pld_rx = msgops.PldRx( - # _rx_mc=recv_chan, - _msgdec=_codec.mk_dec(spec=pld_spec) - ) + # TODO: when/how do we apply `.limit_plds()` from here? + pld_rx: msgops.PldRx = msgops.current_pldrx() ctx = Context( chan=chan, diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index f2ff8c21..af653f92 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -46,7 +46,7 @@ from tractor.msg import ( Error, MsgType, Stop, - Yield, + # Yield, types as msgtypes, MsgCodec, MsgDec, @@ -140,71 +140,6 @@ def get_err_type(type_name: str) -> BaseException|None: return type_ref -def pformat_boxed_tb( - tb_str: str, - fields_str: str|None = None, - field_prefix: str = ' |_', - - tb_box_indent: int|None = None, - tb_body_indent: int = 1, - -) -> str: - if ( - fields_str - and - field_prefix - ): - fields: str = textwrap.indent( - fields_str, - prefix=field_prefix, - ) - else: - fields = fields_str or '' - - tb_body = tb_str - if tb_body_indent: - tb_body: str = textwrap.indent( - tb_str, - prefix=tb_body_indent * ' ', - ) - - tb_box: str = ( - - # orig - # f' |\n' - # f' ------ - ------\n\n' - # f'{tb_str}\n' - # f' ------ - ------\n' - # f' _|\n' - - f'|\n' - f' ------ - ------\n\n' - # f'{tb_str}\n' - f'{tb_body}' - f' ------ - ------\n' - f'_|\n' - ) - tb_box_indent: str = ( - tb_box_indent - or - 1 - - # (len(field_prefix)) - # ? ^-TODO-^ ? if you wanted another indent level - ) - if tb_box_indent > 0: - tb_box: str = textwrap.indent( - tb_box, - prefix=tb_box_indent * ' ', - ) - - return ( - fields - + - tb_box - ) - - def pack_from_raise( local_err: ( ContextCancelled @@ -504,12 +439,15 @@ class RemoteActorError(Exception): reprol_str: str = ( f'{type(self).__name__}' # type name f'[{self.boxed_type_str}]' # parameterized by boxed type - '(' # init-style look ) + _repr: str = self._mk_fields_str( self.reprol_fields, end_char=' ', ) + if _repr: + reprol_str += '(' # init-style call + return ( reprol_str + @@ -521,6 +459,7 @@ class RemoteActorError(Exception): Nicely formatted boxed error meta data + traceback. ''' + from tractor.devx._code import pformat_boxed_tb fields: str = self._mk_fields_str( _body_fields + @@ -1092,14 +1031,10 @@ def _mk_msg_type_err( # no src error from `msgspec.msgpack.Decoder.decode()` so # prolly a manual type-check on our part. if message is None: - fmt_stack: str = ( - '\n'.join(traceback.format_stack(limit=3)) - ) - tb_fmt: str = pformat_boxed_tb( - tb_str=fmt_stack, - field_prefix=' ', - indent='', + from tractor.devx._code import ( + pformat_caller_frame, ) + tb_fmt: str = pformat_caller_frame(stack_limit=3) message: str = ( f'invalid msg -> {msg}: {type(msg)}\n\n' f'{tb_fmt}\n' diff --git a/tractor/devx/_code.py b/tractor/devx/_code.py index 01d64cd1..8d55212b 100644 --- a/tractor/devx/_code.py +++ b/tractor/devx/_code.py @@ -23,6 +23,8 @@ from __future__ import annotations import inspect # import msgspec # from pprint import pformat +import textwrap +import traceback from types import ( FrameType, FunctionType, @@ -175,3 +177,103 @@ def find_caller_info( ) return None + + +def pformat_boxed_tb( + tb_str: str, + fields_str: str|None = None, + field_prefix: str = ' |_', + + tb_box_indent: int|None = None, + tb_body_indent: int = 1, + +) -> str: + ''' + Create a "boxed" looking traceback string. + + Useful for emphasizing traceback text content as being an + embedded attribute of some other object (like + a `RemoteActorError` or other boxing remote error shuttle + container). + + Any other parent/container "fields" can be passed in the + `fields_str` input along with other prefix/indent settings. + + ''' + if ( + fields_str + and + field_prefix + ): + fields: str = textwrap.indent( + fields_str, + prefix=field_prefix, + ) + else: + fields = fields_str or '' + + tb_body = tb_str + if tb_body_indent: + tb_body: str = textwrap.indent( + tb_str, + prefix=tb_body_indent * ' ', + ) + + tb_box: str = ( + + # orig + # f' |\n' + # f' ------ - ------\n\n' + # f'{tb_str}\n' + # f' ------ - ------\n' + # f' _|\n' + + f'|\n' + f' ------ - ------\n\n' + # f'{tb_str}\n' + f'{tb_body}' + f' ------ - ------\n' + f'_|\n' + ) + tb_box_indent: str = ( + tb_box_indent + or + 1 + + # (len(field_prefix)) + # ? ^-TODO-^ ? if you wanted another indent level + ) + if tb_box_indent > 0: + tb_box: str = textwrap.indent( + tb_box, + prefix=tb_box_indent * ' ', + ) + + return ( + fields + + + tb_box + ) + + +def pformat_caller_frame( + stack_limit: int = 1, + box_tb: bool = True, +) -> str: + ''' + Capture and return the traceback text content from + `stack_limit` call frames up. + + ''' + tb_str: str = ( + '\n'.join( + traceback.format_stack(limit=stack_limit) + ) + ) + if box_tb: + tb_str: str = pformat_boxed_tb( + tb_str=tb_str, + field_prefix=' ', + indent='', + ) + return tb_str -- 2.34.1 From 5d1a0da5e67a47c1537046e830fb0a6f03204a83 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 30 Apr 2024 12:55:46 -0400 Subject: [PATCH 083/190] Fix attr name error, use public `MsgDec.dec` --- tractor/msg/_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 5a9ab46a..4cf20496 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -229,7 +229,7 @@ class PldRx(Struct): log.runtime( 'Decoded msg payload\n\n' f'{msg}\n' - f'|_pld={pld!r}' + f'|_pld={pld!r}\n' ) return pld @@ -237,7 +237,7 @@ class PldRx(Struct): except ValidationError as src_err: msgterr: MsgTypeError = _mk_msg_type_err( msg=msg, - codec=self._dec, + codec=self.dec, src_validation_error=src_err, ) msg: Error = pack_from_raise( -- 2.34.1 From 9f3a00c65ea99881834cd2660334a82a1fbae722 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 30 Apr 2024 12:59:38 -0400 Subject: [PATCH 084/190] Type annot the proc from `trio.lowlevel.open_process()` --- tractor/_spawn.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tractor/_spawn.py b/tractor/_spawn.py index 824f41f3..3f886c01 100644 --- a/tractor/_spawn.py +++ b/tractor/_spawn.py @@ -455,10 +455,9 @@ async def trio_proc( proc: trio.Process|None = None try: try: - # TODO: needs ``trio_typing`` patch? - proc = await trio.lowlevel.open_process(spawn_cmd) + proc: trio.Process = await trio.lowlevel.open_process(spawn_cmd) log.runtime( - 'Started new sub-proc\n' + 'Started new child\n' f'|_{proc}\n' ) -- 2.34.1 From 8347a78276b92626811f14a0626baca3b7d45eb5 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 30 Apr 2024 13:01:07 -0400 Subject: [PATCH 085/190] Add todo for rigorous struct-type spec of `SpawnSpec` fields --- tractor/msg/types.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 63c0a467..7e10dab0 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -144,6 +144,8 @@ class SpawnSpec( `Aid` msg. ''' + # TODO: similar to the `Start` kwargs spec needed below, we need + # a hard `Struct` def for all of these fields! _parent_main_data: dict _runtime_vars: dict[str, Any] -- 2.34.1 From 77764aceef3dbf06a46b33ffb7af3d240f08686e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 6 May 2024 12:55:16 -0400 Subject: [PATCH 086/190] Change to `RemoteActorError.pformat()` For more sane manual calls as needed in logging purposes. Obvi remap the dunder methods to it. Other: - drop `hide_tb: bool` from `unpack_error()`, shouldn't need it since frame won't ever be part of any tb raised from returned error. - add a `is_invalid_payload: bool` to `_raise_from_unexpected_msg()` to be used from `PldRx` where we don't need to decode the IPC msg, just the payload; make the error message reflect this case. - drop commented `._portal._unwrap_msg()` since we've replaced it with `PldRx`'s delegation to newer `._raise_from_unexpected_msg()`. - hide the `Portal.result()` frame by default, again. --- tractor/_exceptions.py | 156 +++++++++++++++++++++++------------------ tractor/_portal.py | 37 +--------- 2 files changed, 89 insertions(+), 104 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index af653f92..83675069 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -46,7 +46,6 @@ from tractor.msg import ( Error, MsgType, Stop, - # Yield, types as msgtypes, MsgCodec, MsgDec, @@ -212,6 +211,8 @@ class RemoteActorError(Exception): ) -> None: super().__init__(message) + # for manual display without having to muck with `Exception.args` + self._message: str = message # TODO: maybe a better name? # - .errtype # - .retype @@ -454,32 +455,46 @@ class RemoteActorError(Exception): _repr ) - def __repr__(self) -> str: + def pformat(self) -> str: ''' - Nicely formatted boxed error meta data + traceback. + Nicely formatted boxed error meta data + traceback, OR just + the normal message from `.args` (for eg. as you'd want shown + by a locally raised `ContextCancelled`). ''' - from tractor.devx._code import pformat_boxed_tb - fields: str = self._mk_fields_str( - _body_fields - + - self.extra_body_fields, - ) - body: str = pformat_boxed_tb( - tb_str=self.tb_str, - fields_str=fields, - field_prefix=' |_', - # ^- is so that it's placed like so, - # just after ' ) + __repr__ = pformat + __str__ = pformat + def unwrap( self, ) -> BaseException: @@ -809,12 +824,9 @@ def pack_error( def unpack_error( msg: Error, - - chan: Channel|None = None, + chan: Channel, box_type: RemoteActorError = RemoteActorError, - hide_tb: bool = True, - ) -> None|Exception: ''' Unpack an 'error' message from the wire @@ -824,12 +836,10 @@ def unpack_error( which is the responsibilitiy of the caller. ''' - __tracebackhide__: bool = hide_tb - if not isinstance(msg, Error): return None - # retrieve the remote error's encoded details from fields + # retrieve the remote error's msg-encoded details tb_str: str = msg.tb_str message: str = ( f'{chan.uid}\n' @@ -858,7 +868,6 @@ def unpack_error( # original source error. elif boxed_type_str == 'RemoteActorError': assert boxed_type is RemoteActorError - # assert len(error_dict['relay_path']) >= 1 assert len(msg.relay_path) >= 1 exc = box_type( @@ -943,8 +952,6 @@ def _raise_from_unexpected_msg( raise unpack_error( msg, ctx.chan, - hide_tb=hide_tb, - ) from src_err # `MsgStream` termination msg. @@ -1014,6 +1021,7 @@ def _mk_msg_type_err( src_validation_error: ValidationError|None = None, src_type_error: TypeError|None = None, + is_invalid_payload: bool = False, ) -> MsgTypeError: ''' @@ -1028,12 +1036,12 @@ def _mk_msg_type_err( '`codec` must be a `MsgCodec` for send-side errors?' ) + from tractor.devx import ( + pformat_caller_frame, + ) # no src error from `msgspec.msgpack.Decoder.decode()` so # prolly a manual type-check on our part. if message is None: - from tractor.devx._code import ( - pformat_caller_frame, - ) tb_fmt: str = pformat_caller_frame(stack_limit=3) message: str = ( f'invalid msg -> {msg}: {type(msg)}\n\n' @@ -1071,47 +1079,57 @@ def _mk_msg_type_err( # `Channel.recv()` case else: - # decode the msg-bytes using the std msgpack - # interchange-prot (i.e. without any - # `msgspec.Struct` handling) so that we can - # determine what `.msg.types.Msg` is the culprit - # by reporting the received value. - msg_dict: dict = msgpack.decode(msg) - msg_type_name: str = msg_dict['msg_type'] - msg_type = getattr(msgtypes, msg_type_name) - message: str = ( - f'invalid `{msg_type_name}` IPC msg\n\n' - ) + if is_invalid_payload: + msg_type: str = type(msg) + message: str = ( + f'invalid `{msg_type.__qualname__}` payload\n\n' + f'<{type(msg).__qualname__}(\n' + f' |_pld: {codec.pld_spec_str} = {msg.pld!r}' + f')>\n' + ) + + else: + # decode the msg-bytes using the std msgpack + # interchange-prot (i.e. without any + # `msgspec.Struct` handling) so that we can + # determine what `.msg.types.Msg` is the culprit + # by reporting the received value. + msg_dict: dict = msgpack.decode(msg) + msg_type_name: str = msg_dict['msg_type'] + msg_type = getattr(msgtypes, msg_type_name) + message: str = ( + f'invalid `{msg_type_name}` IPC msg\n\n' + ) + # XXX be "fancy" and see if we can determine the exact + # invalid field such that we can comprehensively report + # the specific field's type problem. + msgspec_msg: str = src_validation_error.args[0].rstrip('`') + msg, _, maybe_field = msgspec_msg.rpartition('$.') + obj = object() + if (field_val := msg_dict.get(maybe_field, obj)) is not obj: + field_name_expr: str = ( + f' |_{maybe_field}: {codec.pld_spec_str} = ' + ) + fmt_val_lines: list[str] = pformat(field_val).splitlines() + fmt_val: str = ( + f'{fmt_val_lines[0]}\n' + + + textwrap.indent( + '\n'.join(fmt_val_lines[1:]), + prefix=' '*len(field_name_expr), + ) + ) + message += ( + f'{msg.rstrip("`")}\n\n' + f'<{msg_type.__qualname__}(\n' + # f'{".".join([msg_type.__module__, msg_type.__qualname__])}\n' + f'{field_name_expr}{fmt_val}\n' + f')>' + ) + if verb_header: message = f'{verb_header} ' + message - # XXX see if we can determine the exact invalid field - # such that we can comprehensively report the - # specific field's type problem - msgspec_msg: str = src_validation_error.args[0].rstrip('`') - msg, _, maybe_field = msgspec_msg.rpartition('$.') - obj = object() - if (field_val := msg_dict.get(maybe_field, obj)) is not obj: - field_name_expr: str = ( - f' |_{maybe_field}: {codec.pld_spec_str} = ' - ) - fmt_val_lines: list[str] = pformat(field_val).splitlines() - fmt_val: str = ( - f'{fmt_val_lines[0]}\n' - + - textwrap.indent( - '\n'.join(fmt_val_lines[1:]), - prefix=' '*len(field_name_expr), - ) - ) - message += ( - f'{msg.rstrip("`")}\n\n' - f'<{msg_type.__qualname__}(\n' - # f'{".".join([msg_type.__module__, msg_type.__qualname__])}\n' - f'{field_name_expr}{fmt_val}\n' - f')>' - ) - msgtyperr = MsgTypeError.from_decode( message=message, msgdict=msg_dict, diff --git a/tractor/_portal.py b/tractor/_portal.py index 806dcc7b..79a9dc5d 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -68,40 +68,6 @@ if TYPE_CHECKING: log = get_logger(__name__) -# TODO: remove and/or rework? -# -[ ] rename to `unwrap_result()` and use -# `._raise_from_unexpected_msg()` (after tweak to accept a `chan: -# Channel` arg) in key block?? -# -[ ] pretty sure this is entirely covered by -# `_exceptions._raise_from_unexpected_msg()` so REMOVE! -# def _unwrap_msg( -# msg: Return|Error, -# ctx: Context, - -# hide_tb: bool = True, - -# ) -> Any: -# ''' -# Unwrap a final result from a `{return: }` IPC msg. - -# ''' -# __tracebackhide__: bool = hide_tb -# try: -# return msg.pld -# except AttributeError as err: - -# # internal error should never get here -# # assert msg.get('cid'), ( -# assert msg.cid, ( -# "Received internal error at portal?" -# ) - -# raise unpack_error( -# msg, -# ctx.chan, -# ) from err - - class Portal: ''' A 'portal' to a memory-domain-separated `Actor`. @@ -173,12 +139,13 @@ class Portal: portal=self, ) + # @api_frame async def result(self) -> Any: ''' Return the result(s) from the remote actor's "main" task. ''' - # __tracebackhide__ = True + __tracebackhide__ = True # Check for non-rpc errors slapped on the # channel for which we always raise exc = self.channel._exc -- 2.34.1 From 15cf54fc4563a08117a17b485691db7b945a77f9 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 6 May 2024 13:04:58 -0400 Subject: [PATCH 087/190] Move pformatters into new `.devx.pformat` Since `._code` is prolly gonna get renamed (to something "frame & stack tools" related) and to give a bit better organization. Also adds a new `add_div()` helper, factored out of ctxc message creation in `._rpc._invoke()`, for adding a little "header line" divider under a given `message: str` with a little math to center it. --- tractor/devx/pformat.py | 135 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 tractor/devx/pformat.py diff --git a/tractor/devx/pformat.py b/tractor/devx/pformat.py new file mode 100644 index 00000000..0b35feee --- /dev/null +++ b/tractor/devx/pformat.py @@ -0,0 +1,135 @@ +# tractor: structured concurrent "actors". +# Copyright 2018-eternity Tyler Goodlet. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +Pretty formatters for use throughout the code base. +Mostly handy for logging and exception message content. + +''' +import textwrap +import traceback + + +def add_div( + message: str, + div_str: str = '------ - ------', + +) -> str: + ''' + Add a "divider string" to the input `message` with + a little math to center it underneath. + + ''' + div_offset: int = ( + round(len(message)/2)+1 + - + round(len(div_str)/2)+1 + ) + div_str: str = ( + '\n' + ' '*div_offset + f'{div_str}\n' + ) + return div_str + + +def pformat_boxed_tb( + tb_str: str, + fields_str: str|None = None, + field_prefix: str = ' |_', + + tb_box_indent: int|None = None, + tb_body_indent: int = 1, + +) -> str: + ''' + Create a "boxed" looking traceback string. + + Useful for emphasizing traceback text content as being an + embedded attribute of some other object (like + a `RemoteActorError` or other boxing remote error shuttle + container). + + Any other parent/container "fields" can be passed in the + `fields_str` input along with other prefix/indent settings. + + ''' + if ( + fields_str + and + field_prefix + ): + fields: str = textwrap.indent( + fields_str, + prefix=field_prefix, + ) + else: + fields = fields_str or '' + + tb_body = tb_str + if tb_body_indent: + tb_body: str = textwrap.indent( + tb_str, + prefix=tb_body_indent * ' ', + ) + + tb_box: str = ( + f'|\n' + f' ------ - ------\n' + f'{tb_body}' + f' ------ - ------\n' + f'_|\n' + ) + tb_box_indent: str = ( + tb_box_indent + or + 1 + + # (len(field_prefix)) + # ? ^-TODO-^ ? if you wanted another indent level + ) + if tb_box_indent > 0: + tb_box: str = textwrap.indent( + tb_box, + prefix=tb_box_indent * ' ', + ) + + return ( + fields + + + tb_box + ) + + +def pformat_caller_frame( + stack_limit: int = 1, + box_tb: bool = True, +) -> str: + ''' + Capture and return the traceback text content from + `stack_limit` call frames up. + + ''' + tb_str: str = ( + '\n'.join( + traceback.format_stack(limit=stack_limit) + ) + ) + if box_tb: + tb_str: str = pformat_boxed_tb( + tb_str=tb_str, + field_prefix=' ', + indent='', + ) + return tb_str -- 2.34.1 From f992b9f2e83cc7b7ed5e276874b72f8594b31401 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 6 May 2024 13:12:44 -0400 Subject: [PATCH 088/190] "Icons" in `._entry`'s subactor `.info()` messages Add a little `>` or `X` supervision icon indicating the spawning or termination of each sub-actor respectively. --- tractor/_entry.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tractor/_entry.py b/tractor/_entry.py index 78f83283..750dc59f 100644 --- a/tractor/_entry.py +++ b/tractor/_entry.py @@ -20,6 +20,7 @@ Sub-process entry points. """ from __future__ import annotations from functools import partial +# import textwrap from typing import ( Any, TYPE_CHECKING, @@ -91,7 +92,7 @@ def _mp_main( pass # handle it the same way trio does? finally: - log.info(f"Actor {actor.uid} terminated") + log.info(f"Subactor {actor.uid} terminated") def _trio_main( @@ -125,9 +126,11 @@ def _trio_main( f' loglevel: {actor.loglevel}\n' ) log.info( - 'Started new trio process:\n' + 'Started new trio subactor:\n' + - actor_info + '>\n' # like a "started/play"-icon from super perspective + + + actor_info, ) try: @@ -148,5 +151,7 @@ def _trio_main( log.info( 'Subactor terminated\n' + + 'x\n' # like a "crossed-out/killed" from super perspective + + actor_info ) -- 2.34.1 From 70d974fc99404b07aa5ff3457222e9937518773c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 6 May 2024 13:27:00 -0400 Subject: [PATCH 089/190] Mk `drain_to_final_msg()` never raise from `Error` Since we usually want them raised from some (internal) call to `Context.maybe_raise()` and NOT directly from the drainage call, make it possible via a new `raise_error: bool` to both `PldRx.recv_msg_w_pld()` and `.dec_msg()`. In support, - rename `return_msg` -> `result_msg` since we expect to return `Error`s. - do a `result_msg` assign and `break` in the `case Error()`. - add `**dec_msg_kwargs` passthrough for other `.dec_msg()` calling methods. Other, - drop/aggregate todo-notes around the main loop's `ctx._pld_rx.recv_msg_w_pld()` call. - add (configurable) frame hiding to most payload receive meths. --- tractor/msg/_ops.py | 146 ++++++++++++++++++++++---------------------- 1 file changed, 72 insertions(+), 74 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 4cf20496..1ba623db 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -161,9 +161,10 @@ class PldRx(Struct): ipc_msg: MsgType|None = None, expect_msg: Type[MsgType]|None = None, - **kwargs, + **dec_msg_kwargs, ) -> Any|Raw: + __tracebackhide__: bool = True msg: MsgType = ( ipc_msg @@ -176,6 +177,7 @@ class PldRx(Struct): msg, ctx=ctx, expect_msg=expect_msg, + **dec_msg_kwargs, ) async def recv_pld( @@ -183,14 +185,16 @@ class PldRx(Struct): ctx: Context, ipc_msg: MsgType|None = None, expect_msg: Type[MsgType]|None = None, + hide_tb: bool = True, - **kwargs + **dec_msg_kwargs, ) -> Any|Raw: ''' Receive a `MsgType`, then decode and return its `.pld` field. ''' + __tracebackhide__: bool = hide_tb msg: MsgType = ( ipc_msg or @@ -199,9 +203,10 @@ class PldRx(Struct): await ctx._rx_chan.receive() ) return self.dec_msg( - msg, + msg=msg, ctx=ctx, expect_msg=expect_msg, + **dec_msg_kwargs, ) def dec_msg( @@ -210,12 +215,16 @@ class PldRx(Struct): ctx: Context, expect_msg: Type[MsgType]|None, + raise_error: bool = True, + hide_tb: bool = True, + ) -> PayloadT|Raw: ''' Decode a msg's payload field: `MsgType.pld: PayloadT|Raw` and return the value or raise an appropriate error. ''' + __tracebackhide__: bool = hide_tb match msg: # payload-data shuttle msg; deliver the `.pld` value # directly to IPC (primitive) client-consumer code. @@ -228,7 +237,8 @@ class PldRx(Struct): pld: PayloadT = self._pldec.decode(pld) log.runtime( 'Decoded msg payload\n\n' - f'{msg}\n' + f'{msg}\n\n' + f'where payload is\n' f'|_pld={pld!r}\n' ) return pld @@ -237,8 +247,9 @@ class PldRx(Struct): except ValidationError as src_err: msgterr: MsgTypeError = _mk_msg_type_err( msg=msg, - codec=self.dec, + codec=self.pld_dec, src_validation_error=src_err, + is_invalid_payload=True, ) msg: Error = pack_from_raise( local_err=msgterr, @@ -263,8 +274,29 @@ class PldRx(Struct): case Error(): src_err = MessagingError( - 'IPC ctx dialog terminated without `Return`-ing a result' + 'IPC ctx dialog terminated without `Return`-ing a result\n' + f'Instead it raised {msg.boxed_type_str!r}!' ) + # XXX NOTE XXX another super subtle runtime-y thing.. + # + # - when user code (transitively) calls into this + # func (usually via a `Context/MsgStream` API) we + # generally want errors to propagate immediately + # and directly so that the user can define how it + # wants to handle them. + # + # HOWEVER, + # + # - for certain runtime calling cases, we don't want to + # directly raise since the calling code might have + # special logic around whether to raise the error + # or supress it silently (eg. a `ContextCancelled` + # received from the far end which was requested by + # this side, aka a self-cancel). + # + # SO, we offer a flag to control this. + if not raise_error: + return src_err case Stop(cid=cid): message: str = ( @@ -305,6 +337,9 @@ class PldRx(Struct): f'{msg}\n' ) + # TODO: maybe use the new `.add_note()` from 3.11? + # |_https://docs.python.org/3.11/library/exceptions.html#BaseException.add_note + # # fallthrough and raise from `src_err` _raise_from_unexpected_msg( ctx=ctx, @@ -312,7 +347,7 @@ class PldRx(Struct): src_err=src_err, log=log, expect_msg=expect_msg, - hide_tb=False, + hide_tb=hide_tb, ) async def recv_msg_w_pld( @@ -320,6 +355,8 @@ class PldRx(Struct): ipc: Context|MsgStream, expect_msg: MsgType, + **kwargs, + ) -> tuple[MsgType, PayloadT]: ''' Retrieve the next avail IPC msg, decode it's payload, and return @@ -335,6 +372,7 @@ class PldRx(Struct): msg, ctx=ipc, expect_msg=expect_msg, + **kwargs, ) return msg, pld @@ -433,70 +471,33 @@ async def drain_to_final_msg( # basically ignoring) any bi-dir-stream msgs still in transit # from the far end. pre_result_drained: list[MsgType] = [] - return_msg: Return|None = None + result_msg: Return|Error|None = None while not ( ctx.maybe_error and not ctx._final_result_is_set() ): try: - # TODO: can remove? - # await trio.lowlevel.checkpoint() - - # NOTE: this REPL usage actually works here dawg! Bo - # from .devx._debug import pause - # await pause() - - # TODO: bad idea? - # -[ ] wrap final outcome channel wait in a scope so - # it can be cancelled out of band if needed? - # - # with trio.CancelScope() as res_cs: - # ctx._res_scope = res_cs - # msg: dict = await ctx._rx_chan.receive() - # if res_cs.cancelled_caught: - - # TODO: ensure there's no more hangs, debugging the - # runtime pretty preaase! - # from .devx._debug import pause - # await pause() - - # TODO: can remove this finally? - # we have no more need for the sync draining right - # since we're can kinda guarantee the async - # `.receive()` below will never block yah? - # - # if ( - # ctx._cancel_called and ( - # ctx.cancel_acked - # # or ctx.chan._cancel_called - # ) - # # or not ctx._final_result_is_set() - # # ctx.outcome is not - # # or ctx.chan._closed - # ): - # try: - # msg: dict = await ctx._rx_chan.receive_nowait()() - # except trio.WouldBlock: - # log.warning( - # 'When draining already `.cancel_called` ctx!\n' - # 'No final msg arrived..\n' - # ) - # break - # else: - # msg: dict = await ctx._rx_chan.receive() - - # TODO: don't need it right jefe? - # with trio.move_on_after(1) as cs: - # if cs.cancelled_caught: - # from .devx._debug import pause - # await pause() - - # pray to the `trio` gawds that we're corrent with this - # msg: dict = await ctx._rx_chan.receive() + # receive all msgs, scanning for either a final result + # or error; the underlying call should never raise any + # remote error directly! msg, pld = await ctx._pld_rx.recv_msg_w_pld( ipc=ctx, expect_msg=Return, + raise_error=False, ) + # ^-TODO-^ some bad ideas? + # -[ ] wrap final outcome .receive() in a scope so + # it can be cancelled out of band if needed? + # |_with trio.CancelScope() as res_cs: + # ctx._res_scope = res_cs + # msg: dict = await ctx._rx_chan.receive() + # if res_cs.cancelled_caught: + # + # -[ ] make sure pause points work here for REPLing + # the runtime itself; i.e. ensure there's no hangs! + # |_from tractor.devx._debug import pause + # await pause() + # NOTE: we get here if the far end was # `ContextCancelled` in 2 cases: @@ -504,7 +505,7 @@ async def drain_to_final_msg( # SHOULD NOT raise that far end error, # 2. WE DID NOT REQUEST that cancel and thus # SHOULD RAISE HERE! - except trio.Cancelled: + except trio.Cancelled as taskc: # CASE 2: mask the local cancelled-error(s) # only when we are sure the remote error is @@ -514,7 +515,7 @@ async def drain_to_final_msg( # CASE 1: we DID request the cancel we simply # continue to bubble up as normal. - raise + raise taskc match msg: @@ -534,7 +535,7 @@ async def drain_to_final_msg( # if ctx._rx_chan: # await ctx._rx_chan.aclose() # TODO: ^ we don't need it right? - return_msg = msg + result_msg = msg break # far end task is still streaming to us so discard @@ -565,10 +566,7 @@ async def drain_to_final_msg( f'{pretty_struct.pformat(msg)}\n' ) - return ( - return_msg, - pre_result_drained, - ) + break # drain up to the `msg_limit` hoping to get # a final result or error/ctxc. @@ -604,9 +602,9 @@ async def drain_to_final_msg( case Error(): # TODO: can we replace this with `ctx.maybe_raise()`? # -[ ] would this be handier for this case maybe? - # async with maybe_raise_on_exit() as raises: - # if raises: - # log.error('some msg about raising..') + # |_async with maybe_raise_on_exit() as raises: + # if raises: + # log.error('some msg about raising..') # re: Exception|None = ctx._remote_error if re: @@ -640,7 +638,7 @@ async def drain_to_final_msg( # raise_overrun_from_self=False, raise_overrun_from_self=raise_overrun, ) - + result_msg = msg break # OOOOOF, yeah obvi we need this.. # XXX we should never really get here @@ -686,6 +684,6 @@ async def drain_to_final_msg( ) return ( - return_msg, + result_msg, pre_result_drained, ) -- 2.34.1 From 51de6bd1bcb823646c263813be84fca44ae9681c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 7 May 2024 09:20:43 -0400 Subject: [PATCH 090/190] Add a "current IPC `Context`" `ContextVar` Expose it from `._state.current_ipc_ctx()` and set it inside `._rpc._invoke()` for child and inside `Portal.open_context()` for parent. Still need to write a few more tests (particularly demonstrating usage throughout multiple nested nurseries on each side) but this suffices as a proto for testing with some debugger request-from-subactor stuff. Other, - use new `.devx.pformat.add_div()` for ctxc messages. - add a block to always traceback dump on corrupted cs stacks. - better handle non-RAEs exception output-formatting in context termination summary log message. - use a summary for `start_status` for msg logging in RPC loop. --- tests/test_context_stream_semantics.py | 4 + tractor/_rpc.py | 133 +++++++++++++++---------- tractor/_state.py | 23 +++++ 3 files changed, 109 insertions(+), 51 deletions(-) diff --git a/tests/test_context_stream_semantics.py b/tests/test_context_stream_semantics.py index cedddf73..8edea510 100644 --- a/tests/test_context_stream_semantics.py +++ b/tests/test_context_stream_semantics.py @@ -25,6 +25,7 @@ from tractor._exceptions import ( StreamOverrun, ContextCancelled, ) +from tractor._state import current_ipc_ctx from tractor._testing import ( tractor_test, @@ -144,6 +145,8 @@ async def simple_setup_teardown( global _state _state = True + assert current_ipc_ctx() is ctx + # signal to parent that we're up await ctx.started(data + 1) @@ -204,6 +207,7 @@ def test_simple_context( block_forever=callee_blocks_forever, ) as (ctx, sent), ): + assert current_ipc_ctx() is ctx assert sent == 11 if callee_blocks_forever: diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 56d91534..b8dc42b6 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -54,7 +54,10 @@ from ._exceptions import ( pack_error, unpack_error, ) -from .devx import _debug +from .devx import ( + _debug, + add_div, +) from . import _state from .log import get_logger from .msg import ( @@ -246,6 +249,9 @@ async def _errors_relayed_via_ipc( ) -> None: __tracebackhide__: bool = hide_tb + # TODO: a debug nursery when in debug mode! + # async with maybe_open_debugger_nursery() as debug_tn: + # => see matching comment in side `._debug._pause()` try: yield # run RPC invoke body @@ -269,6 +275,8 @@ async def _errors_relayed_via_ipc( # TODO: maybe we'll want different "levels" of debugging # eventualy such as ('app', 'supervisory', 'runtime') ? + # + # -[ ] this if check is duplicate with `._maybe_enter_pm()`.. if not is_multi_cancelled(err): entered_debug: bool = False if ( @@ -292,7 +300,6 @@ async def _errors_relayed_via_ipc( ) ) ): - # await _debug.pause() # XXX QUESTION XXX: is there any case where we'll # want to debug IPC disconnects as a default? # => I can't think of a reason that inspecting this @@ -300,7 +307,14 @@ async def _errors_relayed_via_ipc( # recovery logic - the only case is some kind of # strange bug in our transport layer itself? Going # to keep this open ended for now. - entered_debug = await _debug._maybe_enter_pm(err) + log.debug( + 'RPC task crashed, attempting to enter debugger\n' + f'|_{ctx}' + ) + entered_debug = await _debug._maybe_enter_pm( + err, + api_frame=inspect.currentframe(), + ) if not entered_debug: log.exception( 'RPC task crashed\n' @@ -430,6 +444,8 @@ async def _invoke( ) context: bool = False + assert not _state._ctxvar_Context.get() + # TODO: deprecate this style.. if getattr(func, '_tractor_stream_function', False): # handle decorated ``@tractor.stream`` async functions @@ -553,6 +569,7 @@ async def _invoke( async with trio.open_nursery() as tn: ctx._scope_nursery = tn ctx._scope = tn.cancel_scope + _state._ctxvar_Context.set(ctx) task_status.started(ctx) # TODO: should would be nice to have our @@ -588,7 +605,6 @@ async def _invoke( cs: CancelScope = ctx._scope if cs.cancel_called: - canceller: tuple = ctx.canceller explain: str = f'{ctx.side!r}-side task was cancelled by ' @@ -617,23 +633,9 @@ async def _invoke( else: explain += 'a remote peer' - # TODO: move this "div centering" into - # a helper for use elsewhere! - div_chars: str = '------ - ------' - div_offset: int = ( - round(len(explain)/2)+1 - + - round(len(div_chars)/2)+1 - ) - div_str: str = ( - '\n' - + - ' '*div_offset - + - f'{div_chars}\n' - ) explain += ( - div_str + + add_div(message=explain) + + f'<= canceller: {canceller}\n' f'=> cancellee: {our_uid}\n' # TODO: better repr for ctx tasks.. @@ -660,10 +662,10 @@ async def _invoke( boxed_type=trio.Cancelled, canceller=canceller, ) - # assign local error so that the `.outcome` - # resolves to an error for both reporting and - # state checks. - ctx._local_error = ctxc + # does this matter other then for + # consistentcy/testing? |_ no user code should be + # in this scope at this point.. + # ctx._local_error = ctxc raise ctxc # XXX: do we ever trigger this block any more? @@ -673,6 +675,13 @@ async def _invoke( BaseException, ) as scope_error: + if ( + isinstance(scope_error, RuntimeError) + and scope_error.args + and 'Cancel scope stack corrupted' in scope_error.args[0] + ): + log.exception('Cancel scope stack corrupted!?\n') + # _debug.mk_pdb().set_trace() # always set this (child) side's exception as the # local error on the context @@ -706,17 +715,32 @@ async def _invoke( res_type_str, res_str, ) = ( - ('error', f'{type(merr)}',) - if merr + ('error', f'{type(merr)}',) if merr else ( 'result', f'`{repr(ctx.outcome)}`', ) ) - log.runtime( + message: str = ( f'IPC context terminated with a final {res_type_str}\n\n' f'{ctx}' ) + if merr: + from tractor import RemoteActorError + if not isinstance(merr, RemoteActorError): + fmt_merr: str = ( + f'\n{merr!r}\n' + # f'{merr.args[0]!r}\n' + ) + else: + fmt_merr = f'\n{merr!r}' + log.error( + message + + + fmt_merr + ) + else: + log.runtime(message) async def try_ship_error_to_remote( @@ -951,12 +975,19 @@ async def process_messages( kwargs=kwargs, # type-spec this? see `msg.types` uid=actorid, ): - log.runtime( + start_status: str = ( 'Handling RPC `Start` request\n' - f'<= peer: {actorid}\n' - f' |_{ns}.{funcname}({kwargs})\n\n' + f'<= peer: {actorid}\n\n' + f' |_{chan}\n' + f' |_cid: {cid}\n\n' + # f' |_{ns}.{funcname}({kwargs})\n' + f'>> {actor.uid}\n' + f' |_{actor}\n' + f' -> nsp: `{ns}.{funcname}({kwargs})`\n' - f'{pretty_struct.pformat(msg)}\n' + # f' |_{ns}.{funcname}({kwargs})\n\n' + + # f'{pretty_struct.pformat(msg)}\n' ) # runtime-internal endpoint: `Actor.` @@ -985,6 +1016,10 @@ async def process_messages( await chan.send(err_msg) continue + start_status += ( + f' -> func: {func}\n' + ) + # schedule a task for the requested RPC function # in the actor's main "service nursery". # @@ -992,18 +1027,8 @@ async def process_messages( # supervision isolation? would avoid having to # manage RPC tasks individually in `._rpc_tasks` # table? - log.runtime( - f'Spawning task for RPC request\n' - f'<= caller: {chan.uid}\n' - f' |_{chan}\n\n' - # ^-TODO-^ maddr style repr? - # f' |_@ /ipv4/{chan.raddr}/tcp/{chan.rport}/' - # f'cid="{cid[-16:]} .."\n\n' - - f'=> {actor}\n' - f' |_cid: {cid}\n' - f' |>> {func}()\n' - ) + start_status += ' -> scheduling new task..\n' + log.runtime(start_status) try: ctx: Context = await actor._service_n.start( partial( @@ -1031,8 +1056,9 @@ async def process_messages( # scoped exception from ``_invoke()`` itself. if isinstance(err := ctx, Exception): log.warning( - 'Task for RPC failed?' - f'|_ {func}()\n\n' + start_status + + + ' -> task for RPC failed?\n\n' f'{err}' ) continue @@ -1151,12 +1177,17 @@ async def process_messages( finally: # msg debugging for when he machinery is brokey - log.runtime( - 'Exiting IPC msg loop with final msg\n\n' - f'<= peer: {chan.uid}\n' - f' |_{chan}\n\n' - f'{pretty_struct.pformat(msg)}' - ) + if msg is None: + message: str = 'Exiting IPC msg loop without receiving a msg?' + else: + message: str = ( + 'Exiting IPC msg loop with final msg\n\n' + f'<= peer: {chan.uid}\n' + f' |_{chan}\n\n' + f'{pretty_struct.pformat(msg)}' + ) + + log.runtime(message) # transport **WAS NOT** disconnected return (False, msg) diff --git a/tractor/_state.py b/tractor/_state.py index 30346a6a..a3729833 100644 --- a/tractor/_state.py +++ b/tractor/_state.py @@ -19,13 +19,19 @@ Per process state """ from __future__ import annotations +from contextvars import ( + ContextVar, +) from typing import ( Any, TYPE_CHECKING, ) +from trio.lowlevel import current_task + if TYPE_CHECKING: from ._runtime import Actor + from ._context import Context _current_actor: Actor|None = None # type: ignore # noqa @@ -110,3 +116,20 @@ def debug_mode() -> bool: def is_root_process() -> bool: return _runtime_vars['_is_root'] + + +_ctxvar_Context: ContextVar[Context] = ContextVar( + 'ipc_context', + default=None, +) + + +def current_ipc_ctx() -> Context: + ctx: Context = _ctxvar_Context.get() + if not ctx: + from ._exceptions import InternalError + raise InternalError( + 'No IPC context has been allocated for this task yet?\n' + f'|_{current_task()}\n' + ) + return ctx -- 2.34.1 From f17fd35ccb1eb6cb76ca724c744841c2adfff4df Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 8 May 2024 08:50:16 -0400 Subject: [PATCH 091/190] Allow `Stop` passthrough from `PldRx.recv_msg_w_pld()` Since we need to allow it (at the least) inside `drain_until_final_msg()` for handling stream-phase termination races where we don't want to have to handle a raised error from something like `Context.result()`. Expose the passthrough option via a `passthrough_non_pld_msgs: bool` kwarg. Add comprehensive comment to `current_pldrx()`. --- tractor/msg/_ops.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 1ba623db..3b0b8339 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -355,6 +355,9 @@ class PldRx(Struct): ipc: Context|MsgStream, expect_msg: MsgType, + # NOTE: generally speaking only for handling `Stop`-msgs that + # arrive during a call to `drain_to_final_msg()` above! + passthrough_non_pld_msgs: bool = True, **kwargs, ) -> tuple[MsgType, PayloadT]: @@ -365,6 +368,11 @@ class PldRx(Struct): ''' msg: MsgType = await ipc._rx_chan.receive() + if passthrough_non_pld_msgs: + match msg: + case Stop(): + return msg, None + # TODO: is there some way we can inject the decoded # payload into an existing output buffer for the original # msg instance? @@ -389,15 +397,30 @@ _ctxvar_PldRx: ContextVar[PldRx] = ContextVar( def current_pldrx() -> PldRx: ''' - Return the current `trio.Task.context`'s msg-payload - receiver, the post IPC but pre-app code `MsgType.pld` - filter. + Return the current `trio.Task.context`'s msg-payload-receiver. + + A payload receiver is the IPC-msg processing sub-sys which + filters inter-actor-task communicated payload data, i.e. the + `PayloadMsg.pld: PayloadT` field value, AFTER it's container + shuttlle msg (eg. `Started`/`Yield`/`Return) has been delivered + up from `tractor`'s transport layer but BEFORE the data is + yielded to application code, normally via an IPC primitive API + like, for ex., `pld_data: PayloadT = MsgStream.receive()`. Modification of the current payload spec via `limit_plds()` - allows an application to contextually filter typed IPC msg - content delivered via wire transport. + allows a `tractor` application to contextually filter IPC + payload content with a type specification as supported by + the interchange backend. + + - for `msgspec` see . + + NOTE that the `PldRx` itself is a per-`Context` global sub-system + that normally does not change other then the applied pld-spec + for the current `trio.Task`. ''' + # ctx: context = current_ipc_ctx() + # return ctx._pld_rx return _ctxvar_PldRx.get() -- 2.34.1 From ee03b8a214855899ce78726e94566b4718c6bfd8 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 8 May 2024 09:08:01 -0400 Subject: [PATCH 092/190] Big debugger rework, more tolerance for internal err-hangs Since i was running into them (internal errors) during lock request machinery dev and was getting all sorts of difficult to understand hangs whenever i intro-ed a bug to either side of the ipc ctx; this all while trying to get the msg-spec working for `Lock` requesting subactors.. Deats: - hideframes for `@acm`s and `trio.Event.wait()`, `Lock.release()`. - better detail out the `Lock.acquire/release()` impls - drop `Lock.remote_task_in_debug`, use new `.ctx_in_debug`. - add a `Lock.release(force: bool)`. - move most of what was `_acquire_debug_lock_from_root_task()` and some of the `lock_tty_for_child().__a[enter/exit]()` logic into `Lock.[acquire/release]()` including bunch more logging. - move `lock_tty_for_child()` up in the module to below `Lock`, with some rework: - drop `subactor_uid: tuple` arg since we can just use the `ctx`.. - add exception handler blocks for reporting internal (impl) errors and always force release the lock in such cases. - extend `DebugStatus` (prolly will rename to `DebugRequest` btw): - add `.req_ctx: Context` for subactor side. - add `.req_finished: trio.Event` to sub to signal request task exit. - extend `.shield_sigint()` doc-str. - add `.release()` to encaps all the state mgmt previously strewn about inside `._pause()`.. - use new `DebugStatus.release()` to replace all the duplication: - inside `PdbREPL.set_[continue/quit]()`. - inside `._pause()` for the subactor branch on internal repl-invocation error cases, - in the `_enter_repl_sync()` closure on error, - replace `apply_debug_codec()` -> `apply_debug_pldec()` in tandem with the new `PldRx` sub-sys which handles the new `__pld_spec__`. - add a new `pformat_cs()` helper orig to help debug cs stack a corruption; going to move to `.devx.pformat` obvi. - rename `wait_for_parent_stdin_hijack()` -> `request_root_stdio_lock()` with improvements: - better doc-str and add todos, - use `DebugStatus` more stringently to encaps all subactor req state. - error handling blocks for cancellation and straight up impl errors directly around the `.open_context()` block with the latter doing a `ctx.cancel()` to avoid hanging in the shielded `.req_cs` scope. - similar exc blocks for the func's overall body with explicit `log.exception()` reporting. - only set the new `DebugStatus.req_finished: trio.Event` in `finally`. - rename `mk_mpdb()` -> `mk_pdb()` and don't cal `.shield_sigint()` implicitly since the caller usage does matter for this. - factor out `any_connected_locker_child()` from the SIGINT handler. - rework SIGINT handler to better handle any stale-lock/hang cases: - use new `Lock.ctx_in_debug: Context` to detect subactor-in-debug. and use it to cancel any lock request instead of the lower level - use `problem: str` summary approach to log emissions. - rework `_pause()` given all of the above, stuff not yet mentioned: - don't take `shield: bool` input and proxy to `debug_func()` (for now). - drop `extra_frames_up_when_async: int` usage, expect `**debug_func_kwargs` to passthrough an `api_frame: Frametype` (more on this later). - lotsa asserts around the request ctx vs. task-in-debug ctx using new `current_ipc_ctx()`. - asserts around `DebugStatus` state. - rework and simplify the `debug_func` hooks, `_set_trace()`/`_post_mortem()`: - make them accept a non-optional `repl: PdbRepl` and `api_frame: FrameType` which should be used to set the current frame when the REPL engages. - always hide the hook frames. - always accept a `tb: TracebackType` to `_post_mortem()`. |_ copy and re-impl what was the delegation to `pdbp.xpm()`/`pdbp.post_mortem()` and instead call the underlying `Pdb.interaction()` ourselves with a `caller_frame` and tb instance. - adjust the public `.pause()` impl: - accept optional `hide_tb` and `api_frame` inputs. - mask opening a cancel-scope for now (can cause `trio` stack corruption, see notes) and thus don't use the `shield` input other then to eventually passthrough to `_post_mortem()`? |_ thus drop `task_status` support for now as well. |_ pretty sure correct soln is a debug-nursery around `._invoke()`. - since no longer using `extra_frames_up_when_async` inside `debug_func()`s ensure all public apis pass a `api_frame`. - re-impl our `tractor.post_mortem()` to directly call into `._pause()` instead of binding in via `partial` and mk it take similar input as `.pause()`. - drop `Lock.release()` from `_maybe_enter_pm()`, expose and pass expected frame and tb. - use necessary changes from all the above within `maybe_wait_for_debugger()` and `acquire_debug_lock()`. Lel, sorry thought that would be shorter.. There's still a lot more re-org to do particularly with `DebugStatus` encapsulation but it's coming in follow up. --- tractor/devx/__init__.py | 13 +- tractor/devx/_debug.py | 1729 ++++++++++++++++++++++---------------- 2 files changed, 993 insertions(+), 749 deletions(-) diff --git a/tractor/devx/__init__.py b/tractor/devx/__init__.py index c1a93878..bb72d1f9 100644 --- a/tractor/devx/__init__.py +++ b/tractor/devx/__init__.py @@ -30,17 +30,19 @@ from ._debug import ( open_crash_handler as open_crash_handler, maybe_open_crash_handler as maybe_open_crash_handler, post_mortem as post_mortem, + mk_pdb as mk_pdb, ) from ._stackscope import ( enable_stack_on_sig as enable_stack_on_sig, ) -# from .pformat import ( -# add_div as add_div, -# pformat_caller_frame as pformat_caller_frame, -# pformat_boxed_tb as pformat_boxed_tb, -# ) +from .pformat import ( + add_div as add_div, + pformat_caller_frame as pformat_caller_frame, + pformat_boxed_tb as pformat_boxed_tb, +) +# TODO, move this to a new `.devx._pdbp` mod? def _enable_readline_feats() -> str: ''' Handle `readline` when compiled with `libedit` to avoid breaking @@ -72,5 +74,4 @@ def _enable_readline_feats() -> str: return 'readline' -# TODO, move this to a new `.devx._pdbp` mod? _enable_readline_feats() diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index e4ab7d83..0567e42a 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -26,11 +26,13 @@ from contextlib import ( contextmanager as cm, nullcontext, _GeneratorContextManager, + _AsyncGeneratorContextManager, ) from functools import ( partial, cached_property, ) +import inspect import os import signal import sys @@ -48,13 +50,14 @@ from typing import ( from types import ( FrameType, ModuleType, + TracebackType, ) from msgspec import Struct import pdbp import sniffio -import tractor import trio +from trio import CancelScope from trio.lowlevel import ( current_task, Task, @@ -62,26 +65,25 @@ from trio.lowlevel import ( from trio import ( TaskStatus, ) - +import tractor from tractor.log import get_logger -from tractor.msg import ( - _codec, -) from tractor._state import ( current_actor, is_root_process, debug_mode, + current_ipc_ctx, ) -from tractor._exceptions import ( - is_multi_cancelled, - ContextCancelled, -) -from tractor._ipc import Channel +# from .pformat import pformat_caller_frame if TYPE_CHECKING: + from tractor._ipc import Channel + from tractor._context import Context from tractor._runtime import ( Actor, ) + from tractor.msg import ( + _codec, + ) log = get_logger(__name__) @@ -115,6 +117,8 @@ log = get_logger(__name__) pdbp.hideframe(trio._core._run.NurseryManager.__aexit__) pdbp.hideframe(trio._core._run.CancelScope.__exit__) pdbp.hideframe(_GeneratorContextManager.__exit__) +pdbp.hideframe(_AsyncGeneratorContextManager.__aexit__) +pdbp.hideframe(trio.Event.wait) __all__ = [ 'breakpoint', @@ -141,14 +145,14 @@ class LockRelease( cid: str -__msg_spec__: TypeAlias = LockStatus|LockRelease +__pld_spec__: TypeAlias = LockStatus|LockRelease class Lock: ''' - Actor global debug lock state. + Actor-tree-global debug lock state, exists only in a root process. - Mostly to avoid a lot of ``global`` declarations for now XD. + Mostly to avoid a lot of global declarations for now XD. ''' # XXX local ref to the `Pbp` instance, ONLY set in the @@ -157,30 +161,17 @@ class Lock: # that does not have this lock acquired in the root proc. repl: PdbREPL|None = None - # placeholder for function to set a ``trio.Event`` on debugger exit - # pdb_release_hook: Callable | None = None - - remote_task_in_debug: str|None = None - @staticmethod - def get_locking_task_cs() -> trio.CancelScope|None: - if is_root_process(): - return Lock._locking_task_cs - - raise RuntimeError( - '`Lock.locking_task_cs` is invalid in subactors!' - ) - - @staticmethod - def set_locking_task_cs( - cs: trio.CancelScope, - ) -> None: + def get_locking_task_cs() -> CancelScope|None: if not is_root_process(): raise RuntimeError( '`Lock.locking_task_cs` is invalid in subactors!' ) - Lock._locking_task_cs = cs + if ctx := Lock.ctx_in_debug: + return ctx._scope + + return None # ROOT ONLY # ------ - ------- @@ -195,12 +186,14 @@ class Lock: # * in case it needs to be manually cancelled in root due to # a stale lock condition (eg. IPC failure with the locking # child - global_actor_in_debug: tuple[str, str]|None = None - no_remote_has_tty: trio.Event|None = None - _locking_task_cs: trio.CancelScope|None = None + ctx_in_debug: Context|None = None + no_remote_has_tty: trio.Event|None = None _debug_lock: trio.StrictFIFOLock = trio.StrictFIFOLock() - _blocked: set[tuple[str, str]] = set() # `Actor.uid` block list + _blocked: set[ + tuple[str, str] # `Actor.uid` for per actor + |str # Context.cid for per task + ] = set() @classmethod def repr(cls) -> str: @@ -213,12 +206,11 @@ class Lock: if is_root_process(): lock_stats: trio.LockStatistics = cls._debug_lock.statistics() fields += ( - f'global_actor_in_debug: {cls.global_actor_in_debug}\n' f'no_remote_has_tty: {cls.no_remote_has_tty}\n' - f'remote_task_in_debug: {cls.remote_task_in_debug}\n' - f'_locking_task_cs: {cls.get_locking_task_cs()}\n' f'_blocked: {cls._blocked}\n\n' + f'ctx_in_debug: {cls.ctx_in_debug}\n\n' + f'_debug_lock: {cls._debug_lock}\n' f'lock_stats: {lock_stats}\n' ) @@ -234,16 +226,29 @@ class Lock: ) @classmethod - def release(cls): + @pdbp.hideframe + def release( + cls, + force: bool = False, + ): + lock: trio.StrictFIFOLock = cls._debug_lock try: - if not DebugStatus.is_main_trio_thread(): - trio.from_thread.run_sync( - cls._debug_lock.release - ) + if lock.locked(): + if not DebugStatus.is_main_trio_thread(): + trio.from_thread.run_sync( + cls._debug_lock.release + ) + else: + cls._debug_lock.release() + + message: str = 'TTY lock released for child\n' else: - cls._debug_lock.release() + message: str = 'TTY lock not held by any child\n' except RuntimeError as rte: + message: str = 'TTY lock FAILED to release for child??\n' + log.exception(message) + # uhhh makes no sense but been seeing the non-owner # release error even though this is definitely the task # that locked? @@ -256,7 +261,7 @@ class Lock: # raise RuntimeError( # 'Stale `Lock` detected, no remote task active!?\n' # f'|_{owner}\n' - # # f'{Lock}' + # # f'{cls}' # ) from rte if owner: @@ -266,23 +271,265 @@ class Lock: # something somethin corrupts a cancel-scope # somewhere.. + finally: + # IFF there are no more requesting tasks queued up fire, the + # "tty-unlocked" event thereby alerting any monitors of the lock that + # we are now back in the "tty unlocked" state. This is basically + # and edge triggered signal around an empty queue of sub-actor + # tasks that may have tried to acquire the lock. + stats = cls._debug_lock.statistics() + if ( + not stats.owner + or force + # and cls.no_remote_has_tty is not None + ): + message += '-> No more child ctx tasks hold the TTY lock!\n' + + # set and release + if cls.no_remote_has_tty is not None: + cls.no_remote_has_tty.set() + cls.no_remote_has_tty = None + + # cls.remote_task_in_debug = None + + else: + message += ( + f'-> Not signalling `Lock.no_remote_has_tty` since it has value:{cls.no_remote_has_tty}\n' + ) + + else: + # wakeup any waiters since the lock was released + # (presumably) temporarily. + if no_remote_has_tty := cls.no_remote_has_tty: + no_remote_has_tty.set() + no_remote_has_tty = trio.Event() + + message += ( + f'-> A child ctx task still owns the `Lock` ??\n' + f' |_owner task: {stats.owner}\n' + ) + + cls.ctx_in_debug = None + + @classmethod + @acm + async def acquire( + cls, + ctx: Context, + # subactor_uid: tuple[str, str], + # remote_task_uid: str, + + ) -> AsyncIterator[trio.StrictFIFOLock]: + ''' + Acquire a root-actor local FIFO lock which tracks mutex access of + the process tree's global debugger breakpoint. + + This lock avoids tty clobbering (by preventing multiple processes + reading from stdstreams) and ensures multi-actor, sequential access + to the ``pdb`` repl. + + ''' + if not is_root_process(): + raise RuntimeError('Only callable by a root actor task!') + + # subactor_uid: tuple[str, str] = ctx.chan.uid + we_acquired: bool = False + log.runtime( + f'Attempting to acquire TTY lock for sub-actor\n' + f'{ctx}' + ) try: - # sometimes the ``trio`` might already be terminated in - # which case this call will raise. - if DebugStatus.repl_release is not None: - DebugStatus.repl_release.set() + pre_msg: str = ( + f'Entering lock checkpoint for sub-actor\n' + f'{ctx}' + ) + stats = cls._debug_lock.statistics() + if owner := stats.owner: + # and cls.no_remote_has_tty is not None + pre_msg += ( + f'\n' + f'`Lock` already held by local task?\n' + f'{owner}\n\n' + # f'On behalf of task: {cls.remote_task_in_debug!r}\n' + f'On behalf of IPC ctx\n' + f'{ctx}' + ) + log.runtime(pre_msg) + + # NOTE: if the surrounding cancel scope from the + # `lock_tty_for_child()` caller is cancelled, this line should + # unblock and NOT leave us in some kind of + # a "child-locked-TTY-but-child-is-uncontactable-over-IPC" + # condition. + await cls._debug_lock.acquire() + cls.ctx_in_debug = ctx + we_acquired = True + if cls.no_remote_has_tty is None: + # mark the tty lock as being in use so that the runtime + # can try to avoid clobbering any connection from a child + # that's currently relying on it. + cls.no_remote_has_tty = trio.Event() + # cls.remote_task_in_debug = remote_task_uid + + log.runtime( + f'TTY lock acquired for sub-actor\n' + f'{ctx}' + ) + + # NOTE: critical section: this yield is unshielded! + + # IF we received a cancel during the shielded lock entry of some + # next-in-queue requesting task, then the resumption here will + # result in that ``trio.Cancelled`` being raised to our caller + # (likely from ``lock_tty_for_child()`` below)! In + # this case the ``finally:`` below should trigger and the + # surrounding caller side context should cancel normally + # relaying back to the caller. + + yield cls._debug_lock finally: - cls.repl = None - cls.global_actor_in_debug = None + message :str = 'Exiting `Lock.acquire()` on behalf of sub-actor\n' + if ( + we_acquired + # and + # cls._debug_lock.locked() + ): + message += '-> TTY lock released by child\n' + cls.release() - # restore original sigint handler - DebugStatus.unshield_sigint() - # actor-local state, irrelevant for non-root. - DebugStatus.repl_task = None + else: + message += '-> TTY lock never acquired by child??\n' + + log.runtime( + f'{message}\n' + f'{ctx}' + ) -# TODO: actually use this instead throughout for subs! +@tractor.context +async def lock_tty_for_child( + + ctx: Context, + subactor_task_uid: tuple[str, int], + +) -> LockStatus|LockRelease: + ''' + Lock the TTY in the root process of an actor tree in a new + inter-actor-context-task such that the ``pdbp`` debugger console + can be mutex-allocated to the calling sub-actor for REPL control + without interference by other processes / threads. + + NOTE: this task must be invoked in the root process of the actor + tree. It is meant to be invoked as an rpc-task and should be + highly reliable at releasing the mutex complete! + + ''' + subactor_uid: tuple[str, str] = ctx.chan.uid + # NOTE: we use the IPC ctx's cancel scope directly in order to + # ensure that on any transport failure, or cancellation request + # from the child we expect + # `Context._maybe_cancel_and_set_remote_error()` to cancel this + # scope despite the shielding we apply below. + debug_lock_cs: CancelScope = ctx._scope + + try: + if ctx.cid in Lock._blocked: + raise RuntimeError( + f'Double lock request!?\n' + f'The same remote task already has an active request for TTY lock ??\n\n' + f'subactor uid: {subactor_uid}\n\n' + + 'This might be mean that the requesting task ' + 'in `request_root_stdio_lock()` may have crashed?\n' + 'Consider that an internal bug exists given the TTY ' + '`Lock`ing IPC dialog..\n' + ) + + root_task_name: str = current_task().name + if tuple(subactor_uid) in Lock._blocked: + log.warning( + f'Subactor is blocked from acquiring debug lock..\n' + f'subactor_uid: {subactor_uid}\n' + f'remote task: {subactor_task_uid}\n' + ) + ctx._enter_debugger_on_cancel: bool = False + await ctx.cancel(f'Debug lock blocked for {subactor_uid}') + # TODO: remove right? + # return LockStatus( + # subactor_uid=subactor_uid, + # cid=ctx.cid, + # locked=False, + # ) + + # TODO: when we get to true remote debugging + # this will deliver stdin data? + + log.debug( + 'Subactor attempting to acquire TTY lock\n' + f'root task: {root_task_name}\n' + f'subactor_uid: {subactor_uid}\n' + f'remote task: {subactor_task_uid}\n' + ) + DebugStatus.shield_sigint() + Lock._blocked.add(ctx.cid) + with ( + # enable the locking msgspec + apply_debug_pldec(), + ): + async with Lock.acquire(ctx=ctx): + debug_lock_cs.shield = True + + # indicate to child that we've locked stdio + await ctx.started( + LockStatus( + subactor_uid=subactor_uid, + cid=ctx.cid, + locked=True, + ) + ) + + log.debug( f'Actor {subactor_uid} acquired TTY lock') + + # wait for unlock pdb by child + async with ctx.open_stream() as stream: + release_msg: LockRelease = await stream.receive() + + # TODO: security around only releasing if + # these match? + log.pdb( + f'TTY lock released requested\n\n' + f'{release_msg}\n' + ) + assert release_msg.cid == ctx.cid + assert release_msg.subactor_uid == tuple(subactor_uid) + + log.debug(f'Actor {subactor_uid} released TTY lock') + + return LockStatus( + subactor_uid=subactor_uid, + cid=ctx.cid, + locked=False, + ) + + except BaseException: + log.exception( + 'Errored during root TTY-lock dialog?\n' + 'Forcing release since an internal error caused this!\n' + ) + Lock.release(force=True) + raise + + finally: + Lock._blocked.remove(ctx.cid) + if (no_locker := Lock.no_remote_has_tty): + no_locker.set() + + DebugStatus.unshield_sigint() + + +# TODO: rename to ReplState or somethin? +# DebugRequest, make it a singleton instance? class DebugStatus: ''' Singleton-state for debugging machinery in a subactor. @@ -297,26 +544,26 @@ class DebugStatus: ''' repl: PdbREPL|None = None repl_task: Task|None = None - req_cs: trio.CancelScope|None = None + req_ctx: Context|None = None + req_cs: CancelScope|None = None repl_release: trio.Event|None = None - + req_finished: trio.Event|None = None lock_status: LockStatus|None = None - _orig_sigint_handler: Callable | None = None + _orig_sigint_handler: Callable|None = None _trio_handler: ( Callable[[int, FrameType|None], Any] |int | None ) = None - @classmethod def repr(cls) -> str: fields: str = ( f'repl: {cls.repl}\n' f'repl_task: {cls.repl_task}\n' f'repl_release: {cls.repl_release}\n' - f'req_cs: {cls.req_cs}\n' + f'req_ctx: {cls.req_ctx}\n' ) body: str = textwrap.indent( fields, @@ -328,19 +575,37 @@ class DebugStatus: ')>' ) + # TODO: how do you get this to work on a non-inited class? + # __repr__ = classmethod(repr) + # __str__ = classmethod(repr) + @classmethod def shield_sigint(cls): ''' Shield out SIGINT handling (which by default triggers - `trio.Task` cancellation) in subactors when the `pdb` REPL + `trio.Task` cancellation) in subactors when a `pdb` REPL is active. - Avoids cancellation of the current actor (task) when the - user mistakenly sends ctl-c or a signal is received from - an external request; explicit runtime cancel requests are - allowed until the use exits the REPL session using - 'continue' or 'quit', at which point the orig SIGINT - handler is restored. + Avoids cancellation of the current actor (task) when the user + mistakenly sends ctl-c or via a recevied signal (from an + external request). Explicit runtime cancel requests are + allowed until the current REPL-session (the blocking call + `Pdb.interaction()`) exits, normally via the 'continue' or + 'quit' command - at which point the orig SIGINT handler is + restored via `.unshield_sigint()` below. + + Impl notes: + ----------- + - we prefer that `trio`'s default handler is always used when + SIGINT is unshielded (hence disabling the `pdb.Pdb` + defaults in `mk_pdb()`) such that reliable KBI cancellation + is always enforced. + + - we always detect whether we're running from a non-main + thread, in which case schedule the SIGINT shielding override + to in the main thread as per, + + https://docs.python.org/3/library/signal.html#signals-and-threads ''' # @@ -364,6 +629,12 @@ class DebugStatus: @classmethod @pdbp.hideframe # XXX NOTE XXX see below in `.pause_from_sync()` def unshield_sigint(cls): + ''' + Un-shield SIGINT for REPL-active (su)bactor. + + See details in `.shield_sigint()`. + + ''' # always restore ``trio``'s sigint handler. see notes below in # the pdb factory about the nightmare that is that code swapping # out the handler when the repl activates... @@ -374,6 +645,11 @@ class DebugStatus: cls._trio_handler, ) else: + trio_h: Callable = cls._trio_handler + # XXX should never really happen XXX + if not trio_h: + mk_pdb().set_trace() + signal.signal( signal.SIGINT, cls._trio_handler, @@ -411,6 +687,36 @@ class DebugStatus: # is not threading.main_thread() # ) + @classmethod + @pdbp.hideframe + def release( + cls, + cancel_req_task: bool = True, + ): + try: + # sometimes the task might already be terminated in + # which case this call will raise an RTE? + if cls.repl_release is not None: + cls.repl_release.set() + + finally: + # if req_ctx := cls.req_ctx: + # req_ctx._scope.cancel() + + if ( + cancel_req_task + and + (req_cs := cls.req_cs) + ): + req_cs.cancel() + + # restore original sigint handler + cls.unshield_sigint() + + # actor-local state, irrelevant for non-root. + cls.repl_task = None + cls.repl = None + class TractorConfig(pdbp.DefaultConfig): ''' @@ -466,13 +772,24 @@ class PdbREPL(pdbp.Pdb): try: super().set_continue() finally: - Lock.release() + DebugStatus.release() + + # NOTE: for subactors the stdio lock is released via the + # allocated RPC locker task, so for root we have to do it + # manually. + if is_root_process(): + Lock.release() def set_quit(self): try: super().set_quit() finally: - Lock.release() + DebugStatus.release( + cancel_req_task=False, + ) + + if is_root_process(): + Lock.release() # TODO: special handling where we just want the next LOC and # not to resume to the next pause/crash point? @@ -515,413 +832,297 @@ class PdbREPL(pdbp.Pdb): return None -@acm -async def _acquire_debug_lock_from_root_task( - subactor_uid: tuple[str, str], - remote_task_uid: str, - -) -> AsyncIterator[trio.StrictFIFOLock]: - ''' - Acquire a root-actor local FIFO lock which tracks mutex access of - the process tree's global debugger breakpoint. - - This lock avoids tty clobbering (by preventing multiple processes - reading from stdstreams) and ensures multi-actor, sequential access - to the ``pdb`` repl. - - ''' - # task_name: str = current_task().name - we_acquired: bool = False - - log.runtime( - f'Attempting to acquire TTY lock for,\n' - f'subactor_uid: {subactor_uid}\n' - f'remote task: {remote_task_uid}\n' - ) - try: - pre_msg: str = ( - f'Entering lock checkpoint for sub-actor\n' - f'subactor_uid: {subactor_uid}\n' - f'remote task: {remote_task_uid}\n' - ) - stats = Lock._debug_lock.statistics() - if owner := stats.owner: - # and Lock.no_remote_has_tty is not None - pre_msg += ( - f'\n' - f'`Lock` already held by local task\n' - f'{owner}\n\n' - f'On behalf of remote task: {Lock.remote_task_in_debug!r}\n' - ) - log.runtime(pre_msg) - - # NOTE: if the surrounding cancel scope from the - # `lock_tty_for_child()` caller is cancelled, this line should - # unblock and NOT leave us in some kind of - # a "child-locked-TTY-but-child-is-uncontactable-over-IPC" - # condition. - await Lock._debug_lock.acquire() - we_acquired = True - - if Lock.no_remote_has_tty is None: - # mark the tty lock as being in use so that the runtime - # can try to avoid clobbering any connection from a child - # that's currently relying on it. - Lock.no_remote_has_tty = trio.Event() - Lock.remote_task_in_debug = remote_task_uid - - Lock.global_actor_in_debug = subactor_uid - log.runtime( - f'TTY lock acquired for,\n' - f'subactor_uid: {subactor_uid}\n' - f'remote task: {remote_task_uid}\n' - ) - - # NOTE: critical section: this yield is unshielded! - - # IF we received a cancel during the shielded lock entry of some - # next-in-queue requesting task, then the resumption here will - # result in that ``trio.Cancelled`` being raised to our caller - # (likely from ``lock_tty_for_child()`` below)! In - # this case the ``finally:`` below should trigger and the - # surrounding caller side context should cancel normally - # relaying back to the caller. - - yield Lock._debug_lock - - finally: - if ( - we_acquired - and - Lock._debug_lock.locked() - ): - Lock._debug_lock.release() - - # IFF there are no more requesting tasks queued up fire, the - # "tty-unlocked" event thereby alerting any monitors of the lock that - # we are now back in the "tty unlocked" state. This is basically - # and edge triggered signal around an empty queue of sub-actor - # tasks that may have tried to acquire the lock. - stats = Lock._debug_lock.statistics() - if ( - not stats.owner - # and Lock.no_remote_has_tty is not None - ): - # log.runtime( - log.info( - f'No more child ctx tasks hold the TTY lock!\n' - f'last subactor: {subactor_uid}\n' - f'remote task: {remote_task_uid}\n' - ) - if Lock.no_remote_has_tty is not None: - # set and release - Lock.no_remote_has_tty.set() - Lock.no_remote_has_tty = None - Lock.remote_task_in_debug = None - else: - log.warning( - 'Not signalling `Lock.no_remote_has_tty` since it has value:\n' - f'{Lock.no_remote_has_tty}\n' - ) - else: - log.info( - f'A child ctx tasks still holds the TTY lock ??\n' - f'last subactor: {subactor_uid}\n' - f'remote task: {remote_task_uid}\n' - f'current local owner task: {stats.owner}\n' - ) - - Lock.global_actor_in_debug = None - log.runtime( - 'TTY lock released by child\n' - f'last subactor: {subactor_uid}\n' - f'remote task: {remote_task_uid}\n' - ) - - -@tractor.context -async def lock_tty_for_child( - - ctx: tractor.Context, - - # TODO: when we finally get a `Start.params: ParamSpec` - # working it'd sure be nice to have `msgspec` auto-decode this - # to an actual tuple XD - subactor_uid: tuple[str, str], - subactor_task_uid: tuple[str, int], - -) -> LockStatus|LockRelease: - ''' - Lock the TTY in the root process of an actor tree in a new - inter-actor-context-task such that the ``pdbp`` debugger console - can be mutex-allocated to the calling sub-actor for REPL control - without interference by other processes / threads. - - NOTE: this task must be invoked in the root process of the actor - tree. It is meant to be invoked as an rpc-task and should be - highly reliable at releasing the mutex complete! - - ''' - req_task_uid: tuple = tuple(subactor_task_uid) - if req_task_uid in Lock._blocked: - raise RuntimeError( - f'Double lock request!?\n' - f'The same remote task already has an active request for TTY lock ??\n\n' - f'task uid: {req_task_uid}\n' - f'subactor uid: {subactor_uid}\n\n' - - 'This might be mean that the requesting task ' - 'in `wait_for_parent_stdin_hijack()` may have crashed?\n' - 'Consider that an internal bug exists given the TTY ' - '`Lock`ing IPC dialog..\n' - ) - - root_task_name: str = current_task().name - if tuple(subactor_uid) in Lock._blocked: - log.warning( - f'Subactor is blocked from acquiring debug lock..\n' - f'subactor_uid: {subactor_uid}\n' - f'remote task: {subactor_task_uid}\n' - ) - ctx._enter_debugger_on_cancel: bool = False - await ctx.cancel(f'Debug lock blocked for {subactor_uid}') - return LockStatus( - subactor_uid=subactor_uid, - cid=ctx.cid, - locked=False, - ) - - # TODO: when we get to true remote debugging - # this will deliver stdin data? - - log.debug( - 'Subactor attempting to acquire TTY lock\n' - f'root task: {root_task_name}\n' - f'subactor_uid: {subactor_uid}\n' - f'remote task: {subactor_task_uid}\n' - ) - DebugStatus.shield_sigint() - try: - Lock._blocked.add(req_task_uid) - with ( - # NOTE: though a cs is created for every subactor lock - # REQUEST in this ctx-child task, only the root-task - # holding the `Lock` (on behalf of the ctx parent task - # in a subactor) will set - # `Lock._locking_task_cs` such that if the - # lock holdingn task ever needs to be cancelled (since - # it's shielded by default) that global ref can be - # used to do so! - trio.CancelScope(shield=True) as debug_lock_cs, - - # TODO: make this ONLY limit the pld_spec such that we - # can on-error-decode-`.pld: Raw` fields in - # `Context._deliver_msg()`? - _codec.limit_msg_spec( - payload_spec=__msg_spec__, - ) as codec, - ): - # sanity? - # TODO: don't need the ref right? - assert codec is _codec.current_codec() - - async with _acquire_debug_lock_from_root_task( - subactor_uid, - subactor_task_uid, - ): - # XXX SUPER IMPORTANT BELOW IS ON THIS LINE XXX - # without that the root cs might be, - # - set and then removed in the finally block by - # a task that never acquired the lock, leaving - # - the task that DID acquire the lock STUCK since - # it's original cs was GC-ed bc the first task - # already set the global ref to `None` - Lock.set_locking_task_cs(debug_lock_cs) - - # indicate to child that we've locked stdio - await ctx.started( - LockStatus( - subactor_uid=subactor_uid, - cid=ctx.cid, - locked=True, - ) - ) - - log.debug( f'Actor {subactor_uid} acquired TTY lock') - - # wait for unlock pdb by child - async with ctx.open_stream() as stream: - release_msg: LockRelease = await stream.receive() - - # TODO: security around only releasing if - # these match? - log.pdb( - f'TTY lock released requested\n\n' - f'{release_msg}\n' - ) - assert release_msg.cid == ctx.cid - assert release_msg.subactor_uid == tuple(subactor_uid) - - log.debug(f'Actor {subactor_uid} released TTY lock') - - return LockStatus( - subactor_uid=subactor_uid, - cid=ctx.cid, - locked=False, - ) - - finally: - debug_lock_cs.cancel() - Lock._blocked.remove(req_task_uid) - Lock.set_locking_task_cs(None) - DebugStatus.unshield_sigint() - - @cm -def apply_debug_codec() -> _codec.MsgCodec: +def apply_debug_pldec() -> _codec.MsgCodec: ''' Apply the subactor TTY `Lock`-ing protocol's msgspec temporarily (only in the current task). ''' - with ( - _codec.limit_msg_spec( - payload_spec=__msg_spec__, - ) as debug_codec, - ): - assert debug_codec is _codec.current_codec() - log.pdb( - 'Applied `.devx._debug` msg-spec via codec\n' - f'{debug_codec}\n' - ) - yield debug_codec - log.pdb( - 'REMOVED `.devx._debug` msg-spec via codec\n' - f'{debug_codec}\n' + from tractor.msg import ( + _ops as msgops, + ) + orig_plrx: msgops.PldRx = msgops.current_pldrx() + orig_pldec: msgops.MsgDec = orig_plrx.pld_dec + + try: + with msgops.limit_plds( + spec=__pld_spec__, + ) as debug_dec: + assert debug_dec is msgops.current_pldrx().pld_dec + log.runtime( + 'Applied `.devx._debug` pld-spec\n\n' + f'{debug_dec}\n' + ) + yield debug_dec + + finally: + assert ( + (plrx := msgops.current_pldrx()) is orig_plrx + and + plrx.pld_dec is orig_pldec + ) + log.runtime( + 'Reverted to previous pld-spec\n\n' + f'{orig_pldec}\n' + ) + +# TODO: add this formatter to `.devx.pformat()`! +def pformat_cs( + cs: CancelScope, + var_name: str = 'cs', +) -> str: + return ( + f'{var_name}: {cs}\n' + f'{var_name}.cancel_called = {cs.cancel_called}\n' + f'{var_name}.cancelled_caught = {cs.cancelled_caught}\n' + f'{var_name}._cancel_status = {cs._cancel_status}\n' + f'{var_name}.shield = {cs.shield}\n' ) -async def wait_for_parent_stdin_hijack( +async def request_root_stdio_lock( actor_uid: tuple[str, str], task_uid: tuple[str, int], - task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED + task_status: TaskStatus[CancelScope] = trio.TASK_STATUS_IGNORED ): ''' - Connect to the root actor via a ``Context`` and invoke a task which - locks a root-local TTY lock: ``lock_tty_for_child()``; this func - should be called in a new task from a child actor **and never the - root*. + Connect to the root actor of this process tree and RPC-invoke + a task which acquires a std-streams global `Lock`: a actor tree + global mutex which prevents other subactors from entering + a `PdbREPL` at the same time as any other. - This function is used by any sub-actor to acquire mutex access to - the ``pdb`` REPL and thus the root's TTY for interactive debugging - (see below inside ``pause()``). It can be used to ensure that - an intermediate nursery-owning actor does not clobber its children - if they are in debug (see below inside - ``maybe_wait_for_debugger()``). + The actual `Lock` singleton exists ONLY in the root actor's + memory and does nothing more then set process-tree global state. + The actual `PdbREPL` interaction is completely isolated to each + sub-actor and with the `Lock` merely providing the multi-process + syncing mechanism to avoid any subactor (or the root itself) from + entering the REPL at the same time. ''' - from .._discovery import get_root + # TODO: likely we can implement this mutex more generally as + # a `._sync.Lock`? + # -[ ] simply add the wrapping needed for the debugger specifics? + # - the `__pld_spec__` impl and maybe better APIs for the client + # vs. server side state tracking? (`Lock` + `DebugStatus`) + # -[ ] for eg. `mp` has a multi-proc lock via the manager + # - https://docs.python.org/3.8/library/multiprocessing.html#synchronization-primitives + # -[ ] technically we need a `RLock` since re-acquire should be a noop + # - https://docs.python.org/3.8/library/multiprocessing.html#multiprocessing.RLock + DebugStatus.req_finished = trio.Event() + try: + from tractor._discovery import get_root + with ( + # NOTE: we need this to ensure that this task exits + # BEFORE the REPl instance raises an error like + # `bdb.BdbQuit` directly, OW you get a trio cs stack + # corruption! + # Further, the since this task is spawned inside the + # `Context._scope_nursery: trio.Nursery`, once an RPC + # task errors that cs is cancel_called and so if we want + # to debug the TPC task that failed we need to shield + # against that expected `.cancel()` call and instead + # expect all of the `PdbREPL`.set_[continue/quit/]()` + # methods to unblock this task by setting the + # `.repl_release: # trio.Event`. + trio.CancelScope(shield=True) as req_cs, - with ( - trio.CancelScope(shield=True) as cs, - apply_debug_codec(), - ): - DebugStatus.req_cs = cs - try: - # TODO: merge into sync async with ? - async with get_root() as portal: - # this syncs to child's ``Context.started()`` call. - async with portal.open_context( - lock_tty_for_child, - subactor_uid=actor_uid, - subactor_task_uid=task_uid, + # NOTE: set it here in the locker request task bc it's + # possible for multiple such requests for the lock in any + # single sub-actor AND there will be a race between when the + # root locking task delivers the `Started(pld=LockStatus)` + # and when the REPL is actually entered by the requesting + # application task who called + # `.pause()`/`.post_mortem()`. + # + # SO, applying the pld-spec here means it is only applied to + # this IPC-ctx request task, NOT any other task(s) + # including the one that actually enters the REPL. This + # is oc desired bc ow the debugged task will msg-type-error. + # + apply_debug_pldec() as debug_dec, + ): + log.critical( + 'Request cancel-scope is:\n\n' + f'{pformat_cs(req_cs, var_name="req_cs")}\n\n' - ) as (ctx, resp): - log.pdb( - 'Subactor locked TTY with msg\n\n' - f'{resp}\n' - ) - assert resp.subactor_uid == actor_uid - assert resp.cid + ) + DebugStatus.req_cs = req_cs + try: + # TODO: merge into single async with ? + async with get_root() as portal: - async with ctx.open_stream() as stream: - try: # to unblock local caller + async with portal.open_context( + lock_tty_for_child, + subactor_task_uid=task_uid, + ) as (ctx, status): + + DebugStatus.req_ctx = ctx + + from tractor.msg import ( + _ops as msgops, + ) + assert ( + msgops.current_pldrx().pld_dec is debug_dec + ) + log.debug( + 'Subactor locked TTY with msg\n\n' + f'{status}\n' + ) + + # mk_pdb().set_trace() + assert status.subactor_uid == actor_uid + assert status.cid + + # set last rxed lock dialog status. + DebugStatus.lock_status = status + + async with ctx.open_stream() as stream: assert DebugStatus.repl_release - task_status.started(cs) + task_status.started(ctx) - # wait for local task to exit and - # release the REPL + # wait for local task to exit its + # `PdbREPL.interaction()`, call + # `DebugStatus.release()` and then + # unblock here. await DebugStatus.repl_release.wait() - - finally: await stream.send( LockRelease( subactor_uid=actor_uid, - cid=resp.cid, + cid=status.cid, ) ) - # sync with callee termination - status: LockStatus = await ctx.result() - assert not status.locked + # sync with child-side root locker task + # completion + status: LockStatus = await ctx.result() + assert not status.locked + DebugStatus.lock_status = status - log.pdb( - 'TTY lock was released for subactor with msg\n\n' - f'{status}\n\n' - 'Exitting {ctx.side!r} side locking of locking ctx' + log.pdb( + 'TTY lock was released for subactor with msg\n\n' + f'{status}\n\n' + f'Exitting {ctx.side!r}-side of locking ctx' + ) + + except ( + tractor.ContextCancelled, + trio.Cancelled, + ): + log.exception( + 'Debug lock request CANCELLED?\n\n' + f'{pformat_cs(req_cs, var_name="req_cs")}\n\n' + f'{pformat_cs(ctx._scope, var_name="ctx._scope")}\n\n' + f'{ctx}' ) + raise - except ContextCancelled: - log.warning('Root actor cancelled debug lock') - raise + except ( + BaseException, + ): + log.exception( + 'Failed during root TTY-lock dialog?\n' + f'{ctx}\n' - finally: - DebugStatus.repl_task = None - log.debug('Exiting debugger TTY lock request func from child') + f'Cancelling IPC ctx!\n' + ) + await ctx.cancel() + raise - log.cancel('Reverting SIGINT handler!') - DebugStatus.unshield_sigint() + except ( + tractor.ContextCancelled, + trio.Cancelled, + ): + log.cancel( + 'Debug lock request CANCELLED?\n' + f'{ctx}\n' + ) + raise + + except BaseException: + log.exception('Errored during root TTY-lock dialog?') + raise + + finally: + log.debug('Exiting debugger TTY lock request func from child') + # signal request task exit + DebugStatus.req_finished.set() - -def mk_mpdb() -> PdbREPL: +def mk_pdb() -> PdbREPL: ''' - Deliver a new `PdbREPL`: a multi-process safe `pdbp` - REPL using the magic of SC! + Deliver a new `PdbREPL`: a multi-process safe `pdbp.Pdb`-variant + using the magic of `tractor`'s SC-safe IPC. + + B) Our `pdb.Pdb` subtype accomplishes multi-process safe debugging by: - - mutexing access to the root process' TTY & stdstreams - via an IPC managed `Lock` singleton per process tree. + - mutexing access to the root process' std-streams (& thus parent + process TTY) via an IPC managed `Lock` singleton per + actor-process tree. - - temporarily overriding any subactor's SIGINT handler to shield during - live REPL sessions in sub-actors such that cancellation is - never (mistakenly) triggered by a ctrl-c and instead only - by either explicit requests in the runtime or + - temporarily overriding any subactor's SIGINT handler to shield + during live REPL sessions in sub-actors such that cancellation + is never (mistakenly) triggered by a ctrl-c and instead only by + explicit runtime API requests or after the + `pdb.Pdb.interaction()` call has returned. + + FURTHER, the `pdbp.Pdb` instance is configured to be `trio` + "compatible" from a SIGINT handling perspective; we mask out + the default `pdb` handler and instead apply `trio`s default + which mostly addresses all issues described in: + + - https://github.com/python-trio/trio/issues/1155 + + The instance returned from this factory should always be + preferred over the default `pdb[p].set_trace()` whenever using + a `pdb` REPL inside a `trio` based runtime. ''' pdb = PdbREPL() - # Always shield out SIGINTs for subactors when REPL is active. - # - # XXX detect whether we're running from a non-main thread - # in which case schedule the SIGINT shielding override - # to in the main thread. - # https://docs.python.org/3/library/signal.html#signals-and-threads - DebugStatus.shield_sigint() - # XXX: These are the important flags mentioned in # https://github.com/python-trio/trio/issues/1155 # which resolve the traceback spews to console. pdb.allow_kbdint = True pdb.nosigint = True - return pdb +def any_connected_locker_child() -> bool: + ''' + Predicate to determine if a reported child subactor in debug + is actually connected. + + Useful to detect stale `Lock` requests after IPC failure. + + ''' + actor: Actor = current_actor() + + if not is_root_process(): + raise RuntimeError('This is a root-actor only API!') + + if ( + (ctx := Lock.ctx_in_debug) + and + (uid_in_debug := ctx.chan.uid) + ): + chans: list[tractor.Channel] = actor._peers.get( + tuple(uid_in_debug) + ) + if chans: + return any( + chan.connected() + for chan in chans + ) + + return False + + def shield_sigint_handler( signum: int, frame: 'frame', # type: ignore # noqa @@ -938,10 +1139,7 @@ def shield_sigint_handler( ''' __tracebackhide__: bool = True - uid_in_debug: tuple[str, str]|None = Lock.global_actor_in_debug - actor: Actor = current_actor() - case_handled: bool = False def do_cancel(): # If we haven't tried to cancel the runtime then do that instead @@ -956,28 +1154,8 @@ def shield_sigint_handler( else: raise KeyboardInterrupt - # try to see if the supposed (sub)actor in debug still - # has an active connection to *this* actor, and if not - # it's likely they aren't using the TTY lock / debugger - # and we should propagate SIGINT normally. - any_connected: bool = False - if uid_in_debug is not None: - chans: list[tractor.Channel] = actor._peers.get( - tuple(uid_in_debug) - ) - if chans: - any_connected = any(chan.connected() for chan in chans) - if not any_connected: - log.warning( - 'A global actor reported to be in debug ' - 'but no connection exists for this child!?\n' - f'subactor_uid: {uid_in_debug}\n\n' - 'Allowing SIGINT propagation..' - ) - return do_cancel() - # only set in the actor actually running the REPL - repl: PdbREPL|None = Lock.repl + repl: PdbREPL|None = DebugStatus.repl # TODO: maybe we should flatten out all these cases using # a match/case? @@ -985,98 +1163,102 @@ def shield_sigint_handler( # root actor branch that reports whether or not a child # has locked debugger. if is_root_process(): - lock_cs: trio.CancelScope = Lock.get_locking_task_cs() + # try to see if the supposed (sub)actor in debug still + # has an active connection to *this* actor, and if not + # it's likely they aren't using the TTY lock / debugger + # and we should propagate SIGINT normally. + any_connected: bool = any_connected_locker_child() + # if not any_connected: + # return do_cancel() - log.warning( + problem = ( f'root {actor.uid} handling SIGINT\n' f'any_connected: {any_connected}\n\n' f'{Lock.repr()}\n' ) - maybe_stale_lock_cs: bool = ( - lock_cs is not None - # and not lock_cs.cancel_called - and uid_in_debug is None - ) - if maybe_stale_lock_cs: - log.warning( - 'Stale `Lock._locking_task_cs: CancelScope` DETECTED?\n' - f'|_{lock_cs}\n\n' - ) - lock_cs.cancel() - - if uid_in_debug: # "someone" is (ostensibly) using debug `Lock` + if ( + (ctx := Lock.ctx_in_debug) + and + (uid_in_debug := ctx.chan.uid) # "someone" is (ostensibly) using debug `Lock` + ): name_in_debug: str = uid_in_debug[0] - if ( - not repl # but it's NOT us, the root actor. - ): - # sanity: since no repl ref is set, we def shouldn't - # be the lock owner! - assert name_in_debug != 'root' + assert not repl + # if not repl: # but it's NOT us, the root actor. + # sanity: since no repl ref is set, we def shouldn't + # be the lock owner! + assert name_in_debug != 'root' + # IDEAL CASE: child has REPL as expected + if any_connected: # there are subactors we can contact # XXX: only if there is an existing connection to the # (sub-)actor in debug do we ignore SIGINT in this # parent! Otherwise we may hang waiting for an actor # which has already terminated to unlock. - if any_connected: # there are subactors we can contact - # NOTE: don't emit this with `.pdb()` level in - # root without a higher level. - log.debug( - f'Ignoring SIGINT while debug REPL in use by child\n' - f'subactor: {uid_in_debug}\n' - ) - # returns here minus tail logic - case_handled = True - - else: - message: str = ( - f'Ignoring SIGINT while debug REPL SUPPOSEDLY in use by child\n' - f'subactor: {uid_in_debug}\n\n' - f'BUT, no child actors are contactable!?!?\n\n' - - # f'Reverting to def `trio` SIGINT handler..\n' - ) - - if maybe_stale_lock_cs: - lock_cs.cancel() - message += ( - 'Maybe `Lock._locking_task_cs: CancelScope` is stale?\n' - f'|_{lock_cs}\n\n' - ) - - log.warning(message) - # Lock.unshield_sigint() - DebugStatus.unshield_sigint() - case_handled = True + # + # NOTE: don't emit this with `.pdb()` level in + # root without a higher level. + log.runtime( + f'Ignoring SIGINT while debug REPL in use by child ' + f'{uid_in_debug}\n' + ) + problem = None else: - assert name_in_debug == 'root' # we are the registered locker - assert repl # we have a pdb REPL engaged - log.pdb( - f'Ignoring SIGINT while debug REPL in use\n' - f'root actor: {uid_in_debug}\n' + problem += ( + '\n' + f'A `pdb` REPL is SUPPOSEDLY in use by child {uid_in_debug}\n' + f'BUT, no child actors are IPC contactable!?!?\n' ) - # returns here minus tail logic - case_handled = True - # root actor still has this SIGINT handler active without - # an actor using the `Lock` (a bug state) ?? - # => so immediately cancel any stale lock cs and revert - # the handler! + # IDEAL CASE: root has REPL as expected else: - # XXX revert back to ``trio`` handler since this handler shouldn't - # be enabled withtout an actor using a debug REPL! - log.warning( - 'Ignoring SIGINT in root actor but no actor using a `pdb` REPL?\n' - 'Reverting SIGINT handler to `trio` default!\n' - ) + # root actor still has this SIGINT handler active without + # an actor using the `Lock` (a bug state) ?? + # => so immediately cancel any stale lock cs and revert + # the handler! + if not repl: + # TODO: WHEN should we revert back to ``trio`` + # handler if this one is stale? + # -[ ] maybe after a counts work of ctl-c mashes? + # -[ ] use a state var like `stale_handler: bool`? + problem += ( + '\n' + 'No subactor is using a `pdb` REPL according `Lock.ctx_in_debug`?\n' + 'BUT, the root should be using it, WHY this handler ??\n' + ) + else: + log.pdb( + 'Ignoring SIGINT while pdb REPL in use by root actor..\n' + ) + problem = None + # XXX if one is set it means we ARE NOT operating an ideal + # case where a child subactor or us (the root) has the + # lock without any other detected problems. + if problem: + + # detect, report and maybe clear a stale lock request + # cancel scope. + lock_cs: trio.CancelScope = Lock.get_locking_task_cs() + maybe_stale_lock_cs: bool = ( + lock_cs is not None + and not lock_cs.cancel_called + ) if maybe_stale_lock_cs: + problem += ( + '\n' + 'Stale `Lock.ctx_in_debug._scope: CancelScope` detected?\n' + f'{Lock.ctx_in_debug}\n\n' + + '-> Calling ctx._scope.cancel()!\n' + ) lock_cs.cancel() - DebugStatus.unshield_sigint() - case_handled = True + # TODO: wen do we actually want/need this, see above. + # DebugStatus.unshield_sigint() + log.warning(problem) # child actor that has locked the debugger elif not is_root_process(): @@ -1092,14 +1274,13 @@ def shield_sigint_handler( not rent_chan.connected() ): log.warning( - 'A global sub-actor reported to be in debug ' + 'This sub-actor thinks it is debugging ' 'but it has no connection to its parent ??\n' - f'{uid_in_debug}\n' + f'{actor.uid}\n' 'Allowing SIGINT propagation..' ) DebugStatus.unshield_sigint() # do_cancel() - case_handled = True task: str|None = DebugStatus.repl_task if ( @@ -1107,13 +1288,11 @@ def shield_sigint_handler( and repl ): - # if repl: log.pdb( f'Ignoring SIGINT while local task using debug REPL\n' f'|_{task}\n' f' |_{repl}\n' ) - case_handled = True else: msg: str = ( 'SIGINT shield handler still active BUT, \n\n' @@ -1136,7 +1315,6 @@ def shield_sigint_handler( 'Reverting handler to `trio` default!\n' ) DebugStatus.unshield_sigint() - case_handled = True # XXX ensure that the reverted-to-handler actually is # able to rx what should have been **this** KBI ;) @@ -1156,7 +1334,7 @@ def shield_sigint_handler( # we want to alert the user that more input is expect since # nothing has been done dur to ignoring sigint. if ( - repl # only when this actor has a REPL engaged + repl # only when current actor has a REPL engaged ): # XXX: yah, mega hack, but how else do we catch this madness XD if repl.shname == 'xonsh': @@ -1174,72 +1352,19 @@ def shield_sigint_handler( # https://github.com/goodboy/tractor/issues/130#issuecomment-663752040 # https://github.com/prompt-toolkit/python-prompt-toolkit/blob/c2c6af8a0308f9e5d7c0e28cb8a02963fe0ce07a/prompt_toolkit/patch_stdout.py - if not case_handled: - log.critical( - f'{actor.uid} UNHANDLED SIGINT !?!?\n' - # TODO: pprint for `Lock`? - ) + # XXX only for tracing this handler + # log.warning('exiting SIGINT') _pause_msg: str = 'Attaching to pdb REPL in actor' -def _set_trace( - actor: tractor.Actor|None = None, - pdb: PdbREPL|None = None, - shield: bool = False, - - extra_frames_up_when_async: int = 1, - hide_tb: bool = True, -): - __tracebackhide__: bool = hide_tb - - actor: tractor.Actor = ( - actor - or - current_actor() - ) - - # always start 1 level up from THIS in user code. - frame: FrameType|None - if frame := sys._getframe(): - frame: FrameType = frame.f_back # type: ignore - - if ( - frame - and ( - pdb - and actor is not None - ) - ): - # TODO: maybe print the actor supervion tree up to the - # root here? Bo - - log.pdb( - f'{_pause_msg}\n' - '|\n' - # TODO: make an `Actor.__repr()__` - f'|_ {current_task()} @ {actor.uid}\n' - ) - # no f!#$&* idea, but when we're in async land - # we need 2x frames up? - for i in range(extra_frames_up_when_async): - frame: FrameType = frame.f_back - log.debug( - f'Going up frame_{i}:\n|_{frame}\n' - ) - - # engage ze REPL - # B~() - pdb.set_trace(frame=frame) - - async def _pause( - debug_func: Callable = _set_trace, + debug_func: Callable|None, # NOTE: must be passed in the `.pause_from_sync()` case! - pdb: PdbREPL|None = None, + repl: PdbREPL|None = None, # TODO: allow caller to pause despite task cancellation, # exactly the same as wrapping with: @@ -1249,11 +1374,15 @@ async def _pause( # is always show in the debugger on entry.. and there seems to # be no way to override it?.. # - shield: bool = False, + # shield: bool = False, hide_tb: bool = True, - extra_frames_up_when_async: int = 4, - task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED + # bc, `debug_func()`, `_enter_repl_sync()` and `_pause()` + # extra_frames_up_when_async: int = 3, + + task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED, + + **debug_func_kwargs, ) -> None: ''' @@ -1277,8 +1406,9 @@ async def _pause( 'for infected `asyncio` mode!' ) from rte - # task_name: str = task.name - + # TODO: this should be created as part of `DebugRequest()` init + # which should instead be a one-shot-use singleton much like + # the `PdbREPL`. if ( not DebugStatus.repl_release or @@ -1289,43 +1419,65 @@ async def _pause( if debug_func is not None: debug_func = partial(debug_func) - if pdb is None: - pdb: PdbREPL = mk_mpdb() + repl: PdbREPL = repl or mk_pdb() + # TODO: maybe make this a `PdbREPL` method or mod func? + # -[ ] factor out better, main reason for it is common logic for + # both root and sub repl entry def _enter_repl_sync( debug_func: Callable, ) -> None: __tracebackhide__: bool = hide_tb - try: - # TODO: do we want to support using this **just** for the - # locking / common code (prolly to help address #320)? - # - if debug_func is None: - task_status.started(Lock) - else: - # block here one (at the appropriate frame *up*) where - # ``breakpoint()`` was awaited and begin handling stdio. - log.debug('Entering sync world of the `pdb` REPL..') - try: - # log.critical( - # f'stack len: {len(pdb.stack)}\n' - # ) - debug_func( - actor, - pdb, - extra_frames_up_when_async=extra_frames_up_when_async, - shield=shield, - ) - except BaseException: - log.exception( - 'Failed to invoke internal `debug_func = ' - f'{debug_func.func.__name__}`\n' - ) - raise - except bdb.BdbQuit: - Lock.release() - raise + # TODO: do we want to support using this **just** for the + # locking / common code (prolly to help address #320)? + # + if debug_func is None: + task_status.started(DebugStatus) + else: + # block here one (at the appropriate frame *up*) where + # ``breakpoint()`` was awaited and begin handling stdio. + log.debug('Entering sync world of the `pdb` REPL..') + + # XXX used by the SIGINT handler to check if + # THIS actor is in REPL interaction + try: + # TODO: move this into a `open_debug_request()` @acm? + # -[ ] prolly makes the most send to do the request + # task spawn as part of an `@acm` api which + # delivers the `DebugRequest` instance and ensures + # encapsing all the pld-spec and debug-nursery? + # + # set local actor task to avoid recurrent + # entries/requests from the same local task + # (to the root process). + DebugStatus.repl_task = task + DebugStatus.repl = repl + DebugStatus.shield_sigint() + + # enter `PdbREPL` specific method + debug_func( + repl=repl, + hide_tb=hide_tb, + **debug_func_kwargs, + ) + except trio.Cancelled: + log.exception( + 'Cancelled during invoke of internal `debug_func = ' + f'{debug_func.func.__name__}`\n' + ) + # NOTE: DON'T release lock yet + raise + + except BaseException: + log.exception( + 'Failed to invoke internal `debug_func = ' + f'{debug_func.func.__name__}`\n' + ) + # NOTE: OW this is ONLY called from the + # `.set_continue/next` hooks! + DebugStatus.release() + raise try: if is_root_process(): @@ -1333,7 +1485,14 @@ async def _pause( # we also wait in the root-parent for any child that # may have the tty locked prior # TODO: wait, what about multiple root tasks acquiring it though? - if Lock.global_actor_in_debug == actor.uid: + ctx: Context|None = Lock.ctx_in_debug + if ( + ctx is None + and + DebugStatus.repl + and + DebugStatus.repl_task is task + ): # re-entrant root process already has it: noop. log.warning( f'{task.name}@{actor.uid} already has TTY lock\n' @@ -1347,8 +1506,8 @@ async def _pause( # callbacks. Can't think of a nicer way then this atm. if Lock._debug_lock.locked(): log.warning( - 'attempting to shield-acquire active TTY lock' - f' owned by {Lock.global_actor_in_debug}' + 'attempting to shield-acquire active TTY lock owned by\n' + f'{ctx}' ) # must shield here to avoid hitting a ``Cancelled`` and @@ -1359,10 +1518,6 @@ async def _pause( # may be cancelled await Lock._debug_lock.acquire() - Lock.global_actor_in_debug = actor.uid - DebugStatus.repl_task = task - DebugStatus.repl = Lock.repl = pdb - # enter REPL from root, no TTY locking IPC ctx necessary _enter_repl_sync(debug_func) return # next branch is mutex and for subactors @@ -1405,10 +1560,6 @@ async def _pause( await DebugStatus.repl_release.wait() await trio.sleep(0.1) - # mark local actor as "in debug mode" to avoid recurrent - # entries/requests to the root process - DebugStatus.repl_task = task - # this **must** be awaited by the caller and is done using the # root nursery so that the debugger can continue to run without # being restricted by the scope of a new task nursery. @@ -1420,88 +1571,106 @@ async def _pause( # actor._service_n.cancel_scope.shield = shield # ``` # but not entirely sure if that's a sane way to implement it? - - # NOTE: MUST it here bc multiple tasks are spawned by any - # one sub-actor AND there will be a race between when the - # root locking task delivers the `Started(pld=LockStatus)` - # and when the REPL is actually entered here. SO ensure - # the codec is set before either are run! - # - with ( - # _codec.limit_msg_spec( - # payload_spec=__msg_spec__, - # ) as debug_codec, - trio.CancelScope(shield=shield), - ): - # async with trio.open_nursery() as tn: - # tn.cancel_scope.shield = True - try: - # cs: trio.CancelScope = await tn.start( - cs: trio.CancelScope = await actor._service_n.start( - wait_for_parent_stdin_hijack, - actor.uid, - (task.name, id(task)), - ) - # our locker task should be the one in ctx - # with the root actor - assert DebugStatus.req_cs is cs - - # XXX used by the SIGINT handler to check if - # THIS actor is in REPL interaction - Lock.repl = pdb - - except RuntimeError: - Lock.release() - - if actor._cancel_called: - # service nursery won't be usable and we - # don't want to lock up the root either way since - # we're in (the midst of) cancellation. - return - - raise + try: + # NOTE spawn the stdio locker request task inside the + # current `Context._scope_nursery` to entsure that + # the request never can outlive the task's (parent) + # lifetime. + curr_ctx: Context = current_ipc_ctx() + # TODO: see `_errors_relayed_via_ipc()` where we + # should dynamically open a `debug_tn` for use here, + # BUT it needs to be outside the normal error + # catching and `_maybe_enter_debugger()` call! + # ctx: Context = await curr_ctx._debug_tn.start( + ctx: Context = await actor._service_n.start( + request_root_stdio_lock, + actor.uid, + (task.name, id(task)), # task uuid (effectively) + ) + # our locker task should be the one in ctx + # with the root actor + assert ( + ctx + is + DebugStatus.req_ctx + is not + curr_ctx + ) # enter REPL + _enter_repl_sync(debug_func) - try: - _enter_repl_sync(debug_func) - finally: - DebugStatus.unshield_sigint() + except RuntimeError: + if actor._cancel_called: + # service nursery won't be usable and we + # don't want to lock up the root either way since + # we're in (the midst of) cancellation. + return + + raise + + # TODO: prolly factor this plus the similar block from + # `_enter_repl_sync()` into a common @cm? + except BaseException as repl_err: + if isinstance(repl_err, bdb.BdbQuit): + log.devx( + 'REPL for pdb was quit!\n' + ) + else: + log.exception( + 'Failed to engage debugger via `_pause()` ??\n' + ) + + DebugStatus.release() + # sanity checks for ^ on request/status teardown + assert DebugStatus.repl is None + assert DebugStatus.repl_task is None + req_ctx: Context = DebugStatus.req_ctx + if req_ctx: + assert req_ctx._scope.cancel_called - except BaseException: - log.exception( - 'Failed to engage debugger via `_pause()` ??\n' - ) raise -# XXX: apparently we can't do this without showing this frame -# in the backtrace on first entry to the REPL? Seems like an odd -# behaviour that should have been fixed by now. This is also why -# we scrapped all the @cm approaches that were tried previously. -# finally: -# __tracebackhide__ = True -# # frame = sys._getframe() -# # last_f = frame.f_back -# # last_f.f_globals['__tracebackhide__'] = True -# # signal.signal = pdbp.hideframe(signal.signal) +def _set_trace( + repl: PdbREPL, # passed by `_pause()` + hide_tb: bool, + + # partial-ed in by `.pause()` + api_frame: FrameType, +): + __tracebackhide__: bool = hide_tb + actor: tractor.Actor = current_actor() + + # else: + # TODO: maybe print the actor supervion tree up to the + # root here? Bo + log.pdb( + f'{_pause_msg}\n' + '|\n' + # TODO: make an `Actor.__repr()__` + f'|_ {current_task()} @ {actor.uid}\n' + ) + # presuming the caller passed in the "api frame" + # (the last frame before user code - like `.pause()`) + # then we only step up one frame to where the user + # called our API. + caller_frame: FrameType = api_frame.f_back # type: ignore + + # engage ze REPL + # B~() + repl.set_trace(frame=caller_frame) async def pause( + *, + hide_tb: bool = True, + api_frame: FrameType|None = None, - debug_func: Callable|None = _set_trace, - - # TODO: allow caller to pause despite task cancellation, - # exactly the same as wrapping with: - # with CancelScope(shield=True): - # await pause() - # => the REMAINING ISSUE is that the scope's .__exit__() frame - # is always show in the debugger on entry.. and there seems to - # be no way to override it?.. - # + # TODO: figure out how to still make this work: + # -[ ] pass it direct to `_pause()`? + # -[ ] use it to set the `debug_nursery.cancel_scope.shield` shield: bool = False, - task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED, - **_pause_kwargs, ) -> None: @@ -1522,19 +1691,37 @@ async def pause( ''' __tracebackhide__: bool = True - with trio.CancelScope( - shield=shield, - ) as cs: + # always start 1 level up from THIS in user code since normally + # `tractor.pause()` is called explicitly by use-app code thus + # making it the highest up @api_frame. + api_frame: FrameType = api_frame or inspect.currentframe() + # XXX TODO: this was causing cs-stack corruption in trio due to + # usage within the `Context._scope_nursery` (which won't work + # based on scoping of it versus call to `_maybe_enter_debugger()` + # from `._rpc._invoke()`) + # with trio.CancelScope( + # shield=shield, + # ) as cs: # NOTE: so the caller can always manually cancel even # if shielded! - task_status.started(cs) - return await _pause( - debug_func=debug_func, - shield=shield, - task_status=task_status, - **_pause_kwargs - ) + # task_status.started(cs) + # log.critical( + # '`.pause() cancel-scope is:\n\n' + # f'{pformat_cs(cs, var_name="pause_cs")}\n\n' + # ) + await _pause( + debug_func=partial( + _set_trace, + api_frame=api_frame, + ), + + # task_status=task_status, + **_pause_kwargs + ) + # XXX avoid cs stack corruption when `PdbREPL.interaction()` + # raises `BdbQuit`. + # await DebugStatus.req_finished.wait() _gb_mod: None|ModuleType|False = None @@ -1626,7 +1813,7 @@ def pause_from_sync( # raises on not-found by default greenback: ModuleType = maybe_import_greenback() - mdb: PdbREPL = mk_mpdb() + mdb: PdbREPL = mk_pdb() # run async task which will lock out the root proc's TTY. if not Lock.is_main_trio_thread(): @@ -1664,10 +1851,10 @@ def pause_from_sync( # entering the global ``breakpoint()`` built-in from sync # code? _set_trace( + api_frame=inspect.current_frame(), actor=actor, pdb=mdb, hide_tb=hide_tb, - extra_frames_up_when_async=1, # TODO? will we ever need it? # -> the gb._await() won't be affected by cancellation? @@ -1691,8 +1878,8 @@ async def breakpoint(**kwargs): ) __tracebackhide__: bool = True await pause( - # extra_frames_up_when_async=6, - **kwargs + api_frame=inspect.currentframe(), + **kwargs, ) @@ -1702,12 +1889,15 @@ _crash_msg: str = ( def _post_mortem( - actor: tractor.Actor, - pdb: PdbREPL, - shield: bool = False, + # provided and passed by `_pause()` + repl: PdbREPL, - # only for compat with `._set_trace()`.. - extra_frames_up_when_async=1, + # XXX all `partial`-ed in by `post_mortem()` below! + tb: TracebackType, + api_frame: FrameType, + + shield: bool = False, + hide_tb: bool = False, ) -> None: ''' @@ -1715,6 +1905,9 @@ def _post_mortem( debugger instance. ''' + __tracebackhide__: bool = hide_tb + actor: tractor.Actor = current_actor() + # TODO: print the actor supervion tree up to the root # here! Bo log.pdb( @@ -1728,24 +1921,64 @@ def _post_mortem( # f'|_ {current_task()} @ {actor.name}\n' ) - # TODO: only replacing this to add the + # NOTE only replacing this from `pdbp.xpm()` to add the # `end=''` to the print XD - # pdbp.xpm(Pdb=lambda: pdb) - info = sys.exc_info() print(traceback.format_exc(), end='') - pdbp.post_mortem( - t=info[2], - Pdb=lambda: pdb, + + caller_frame: FrameType = api_frame.f_back + + # NOTE: see the impl details of followings to understand usage: + # - `pdbp.post_mortem()` + # - `pdbp.xps()` + # - `bdb.interaction()` + repl.reset() + repl.interaction( + frame=caller_frame, + # frame=None, + traceback=tb, ) -post_mortem = partial( - pause, - debug_func=_post_mortem, -) +async def post_mortem( + *, + tb: TracebackType|None = None, + api_frame: FrameType|None = None, + hide_tb: bool = False, + + # TODO: support shield here just like in `pause()`? + # shield: bool = False, + + **_pause_kwargs, + +) -> None: + __tracebackhide__: bool = hide_tb + + tb: TracebackType = tb or sys.exc_info()[2] + + # TODO: do upward stack scan for highest @api_frame and + # use its parent frame as the expected user-app code + # interact point. + api_frame: FrameType = api_frame or inspect.currentframe() + + await _pause( + debug_func=partial( + _post_mortem, + api_frame=api_frame, + tb=tb, + ), + hide_tb=hide_tb, + **_pause_kwargs + ) -async def _maybe_enter_pm(err): +async def _maybe_enter_pm( + err: BaseException, + *, + tb: TracebackType|None = None, + api_frame: FrameType|None = None, + hide_tb: bool = False, +): + from tractor._exceptions import is_multi_cancelled if ( debug_mode() @@ -1764,12 +1997,13 @@ async def _maybe_enter_pm(err): # might be a simpler check we can do? and not is_multi_cancelled(err) ): - log.debug("Actor crashed, entering debug mode") - try: - await post_mortem() - finally: - Lock.release() - return True + api_frame: FrameType = api_frame or inspect.currentframe() + tb: TracebackType = tb or sys.exc_info()[2] + await post_mortem( + api_frame=api_frame, + tb=tb, + ) + return True else: return False @@ -1796,12 +2030,12 @@ async def acquire_debug_lock( return async with trio.open_nursery() as n: - cs = await n.start( - wait_for_parent_stdin_hijack, + ctx: Context = await n.start( + request_root_stdio_lock, subactor_uid, ) - yield cs - cs.cancel() + yield ctx + ctx.cancel() async def maybe_wait_for_debugger( @@ -1830,8 +2064,8 @@ async def maybe_wait_for_debugger( # will make the pdb repl unusable. # Instead try to wait for pdb to be released before # tearing down. - in_debug: tuple[str, str]|None = Lock.global_actor_in_debug - + ctx_in_debug: Context|None = Lock.ctx_in_debug + in_debug: tuple[str, str]|None = ctx_in_debug.chan.uid if ctx_in_debug else None if in_debug == current_actor().uid: log.debug( msg @@ -1864,17 +2098,26 @@ async def maybe_wait_for_debugger( and not Lock.no_remote_has_tty.is_set() and in_debug is not None ): - log.pdb( + + # caller_frame_info: str = pformat_caller_frame() + log.debug( msg + - '\nRoot is waiting on tty lock to release..\n' + '\nRoot is waiting on tty lock to release from\n\n' + # f'{caller_frame_info}\n' ) + + if not any_connected_locker_child(): + Lock.get_locking_task_cs().cancel() + with trio.CancelScope(shield=True): await Lock.no_remote_has_tty.wait() + log.pdb( - f'Child subactor released debug lock\n' + f'Subactor released debug lock\n' f'|_{in_debug}\n' ) + break # is no subactor locking debugger currently? if ( @@ -1900,7 +2143,7 @@ async def maybe_wait_for_debugger( f'poll step: {istep}\n' f'poll delya: {poll_delay}' ) - with trio.CancelScope(shield=True): + with CancelScope(shield=True): await trio.sleep(poll_delay) continue -- 2.34.1 From 888af6025b3f44081078475702ef5da1a7c558c9 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 8 May 2024 13:30:15 -0400 Subject: [PATCH 093/190] Move `_debug.pformat_cs()` into `devx.pformat` --- tractor/devx/_debug.py | 19 ++++--------------- tractor/devx/pformat.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 0567e42a..da322407 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -73,7 +73,10 @@ from tractor._state import ( debug_mode, current_ipc_ctx, ) -# from .pformat import pformat_caller_frame +from .pformat import ( + # pformat_caller_frame, + pformat_cs, +) if TYPE_CHECKING: from tractor._ipc import Channel @@ -868,20 +871,6 @@ def apply_debug_pldec() -> _codec.MsgCodec: f'{orig_pldec}\n' ) -# TODO: add this formatter to `.devx.pformat()`! -def pformat_cs( - cs: CancelScope, - var_name: str = 'cs', -) -> str: - return ( - f'{var_name}: {cs}\n' - f'{var_name}.cancel_called = {cs.cancel_called}\n' - f'{var_name}.cancelled_caught = {cs.cancelled_caught}\n' - f'{var_name}._cancel_status = {cs._cancel_status}\n' - f'{var_name}.shield = {cs.shield}\n' - ) - - async def request_root_stdio_lock( actor_uid: tuple[str, str], task_uid: tuple[str, int], diff --git a/tractor/devx/pformat.py b/tractor/devx/pformat.py index 0b35feee..5fe9bc62 100644 --- a/tractor/devx/pformat.py +++ b/tractor/devx/pformat.py @@ -22,6 +22,8 @@ Mostly handy for logging and exception message content. import textwrap import traceback +from trio import CancelScope + def add_div( message: str, @@ -133,3 +135,34 @@ def pformat_caller_frame( indent='', ) return tb_str + + +def pformat_cs( + cs: CancelScope, + var_name: str = 'cs', + field_prefix: str = ' |_', +) -> str: + ''' + Pretty format info about a `trio.CancelScope` including most + of its public state and `._cancel_status`. + + The output can be modified to show a "var name" for the + instance as a field prefix, just a simple str before each + line more or less. + + ''' + + fields: str = textwrap.indent( + ( + f'cancel_called = {cs.cancel_called}\n' + f'cancelled_caught = {cs.cancelled_caught}\n' + f'_cancel_status = {cs._cancel_status}\n' + f'shield = {cs.shield}\n' + ), + prefix=field_prefix, + ) + return ( + f'{var_name}: {cs}\n' + + + fields + ) -- 2.34.1 From c03f6f917e208fbc3ee107c37d4e337ef981b917 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 8 May 2024 13:35:29 -0400 Subject: [PATCH 094/190] Even moar bitty `Context` refinements - set `._state._ctxvar_Context` just after `StartAck` inside `open_context_from_portal()` so that `current_ipc_ctx()` always works on the 'parent' side. - always set `.canceller` to any `MsgTypeError.src_uid` and otherwise to any maybe-detected `.src_uid` (i.e. for RAEs). - always set `.canceller` to us when we rx a ctxc which reports us as its canceller; this is a sanity check on definite "self cancellation". - adjust `._is_self_cancelled()` logic to only be `True` when `._remote_error` is both a ctxc with a `.canceller` set to us AND when `Context.canceller` is also set to us (since the change above) as a little bit of extra rigor. - fill-in/fix some `.repr_state` edge cases: - merge self-vs.-peer ctxc cases to one block and distinguish via nested `._is_self_cancelled()` check. - set 'errored' for all exception matched cases despite `.canceller`. - add pre-`Return` phase statuses: |_'pre-started' and 'syncing-to-child' depending on side and when `._stream` has not (yet) been set. |_'streaming' and 'streaming-finished' depending on side when `._stream` is set and whether it was stopped/closed. - tweak drainage log-message to use "outcome" instead of "result". - use new `.devx.pformat.pformat_cs()` inside `_maybe_cancel_and_set_remote_error()` but, IFF the log level is at least 'cancel'. --- tractor/_context.py | 200 ++++++++++++++++++++++++++++---------------- 1 file changed, 126 insertions(+), 74 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 20584979..a2adfa42 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -37,8 +37,9 @@ import inspect from pprint import pformat from typing import ( Any, - Callable, AsyncGenerator, + Callable, + Mapping, Type, TYPE_CHECKING, Union, @@ -59,7 +60,10 @@ from ._exceptions import ( pack_from_raise, unpack_error, ) -from .log import get_logger +from .log import ( + get_logger, + at_least_level, +) from .msg import ( Error, MsgType, @@ -83,6 +87,7 @@ from ._streaming import MsgStream from ._state import ( current_actor, debug_mode, + _ctxvar_Context, ) if TYPE_CHECKING: @@ -204,7 +209,7 @@ class Context: # cancelled that the other side is as well, so maybe we should # instead just have a `.canceller` pulled from the # `ContextCancelled`? - _canceller: tuple[str, str] | None = None + _canceller: tuple[str, str]|None = None # NOTE: we try to ensure assignment of a "cancel msg" since # there's always going to be an "underlying reason" that any @@ -384,8 +389,12 @@ class Context: re: BaseException|None = ( remote_error - or self._remote_error + or + self._remote_error ) + # XXX we only report "this context" as self-cancelled + # once we've received a ctxc from our direct-peer task + # (aka we're `.cancel_acked`). if not re: return False @@ -396,10 +405,10 @@ class Context: our_canceller = self.canceller return bool( - isinstance(re, ContextCancelled) + isinstance((ctxc := re), ContextCancelled) and from_uid == self.chan.uid - and re.canceller == our_uid - and our_canceller == from_uid + and ctxc.canceller == our_uid + and our_canceller == our_uid ) @property @@ -619,15 +628,27 @@ class Context: ) self._remote_error: BaseException = error + msgerr: bool = False + # self-cancel (ack) or, # peer propagated remote cancellation. - msgerr: bool = False if isinstance(error, ContextCancelled): + # NOTE in the case error is a ctxc the canceller will + # either be another peer or us. in the case where it's us + # we mark ourself as the canceller of ourselves (a ctx + # "self cancel" from this side's perspective), if instead + # the far end was cancelled by some other (inter-) peer, + # we want to mark our canceller as the actor that was + # cancelled, NOT their reported canceller. IOW in the + # latter case we're cancelled by someone else getting + # cancelled. + if (canc := error.canceller) == self._actor.uid: + whom: str = 'us' + self._canceller = canc + else: + whom = 'a remote peer (not us)' + self._canceller = error.src_uid - whom: str = ( - 'us' if error.canceller == self._actor.uid - else 'a remote peer (not us)' - ) log.cancel( f'IPC context was cancelled by {whom}!\n\n' f'{error}' @@ -635,6 +656,7 @@ class Context: elif isinstance(error, MsgTypeError): msgerr = True + self._canceller = error.src_uid log.error( f'IPC dialog error due to msg-type caused by {self.peer_side!r} side\n\n' f'{error}\n' @@ -642,28 +664,25 @@ class Context: ) else: + # always record the cancelling actor's uid since its + # cancellation state is linked and we want to know + # which process was the cause / requester of the + # cancellation. + maybe_error_src_uid: tuple = getattr( + error, + 'src_uid', + None, + ) + # we mark the source actor as our canceller + self._canceller = maybe_error_src_uid log.error( f'Remote context error:\n\n' # f'{pformat(self)}\n' f'{error}\n' ) - # always record the cancelling actor's uid since its - # cancellation state is linked and we want to know - # which process was the cause / requester of the - # cancellation. - maybe_error_src: tuple = getattr( - error, - 'src_uid', - None, - ) - self._canceller = ( - maybe_error_src - or - # XXX: in the case we get a non-boxed error? - # -> wait but this should never happen right? - self.chan.uid - ) + if self._canceller is None: + log.error('Ctx has no canceller set!?') # Cancel the local `._scope`, catch that # `._scope.cancelled_caught` and re-raise any remote error @@ -707,30 +726,34 @@ class Context: message: str = 'NOT cancelling `Context._scope` !\n\n' fmt_str: str = 'No `self._scope: CancelScope` was set/used ?' - if cs: + if ( + cs + and + at_least_level(log=log, level='cancel') + ): fmt_str: str = self.pformat( extra_fields={ '._is_self_cancelled()': self._is_self_cancelled(), '._cancel_on_msgerr': self._cancel_on_msgerr, - - '._scope': cs, - '._scope.cancel_called': cs.cancel_called, - '._scope.cancelled_caught': cs.cancelled_caught, - '._scope._cancel_status': cs._cancel_status, } ) + from .devx.pformat import pformat_cs + cs_fmt: str = pformat_cs( + cs, + var_name='Context._scope', + ) + fmt_str += ( + '\n' + + + cs_fmt + ) log.cancel( message + fmt_str ) - # TODO: maybe we should also call `._res_scope.cancel()` if it - # exists to support cancelling any drain loop hangs? - # NOTE: this usage actually works here B) - # from .devx._debug import breakpoint - # await breakpoint() - # TODO: add to `Channel`? + # TODO: also add to `Channel`? @property def dst_maddr(self) -> str: chan: Channel = self.chan @@ -1103,7 +1126,8 @@ class Context: f'ctx id: {self.cid}' ) - # TODO: replace all the instances of this!! XD + # TODO: replace all the `._maybe_raise_remote_err()` usage + # with instances of this!! def maybe_raise( self, hide_tb: bool = True, @@ -1114,6 +1138,7 @@ class Context: if re := self._remote_error: return self._maybe_raise_remote_err( re, + hide_tb=hide_tb, **kwargs, ) @@ -1215,7 +1240,6 @@ class Context: # runtime frames from the tb explicitly? # https://docs.python.org/3/reference/simple_stmts.html#the-raise-statement # https://stackoverflow.com/a/24752607 - __tracebackhide__: bool = True raise remote_error # from None # TODO: change to `.wait_for_result()`? @@ -1266,8 +1290,15 @@ class Context: # wait for a final context result/error by "draining" # (by more or less ignoring) any bi-dir-stream "yield" # msgs still in transit from the far end. + # + # XXX NOTE XXX: this call shouldn't really ever raise + # (other then internal error), instead delivering an + # `Error`-msg and that being `.maybe_raise()`-ed below + # since every message should be delivered via the normal + # `._deliver_msg()` route which will appropriately set + # any `.maybe_error`. ( - return_msg, + outcome_msg, drained_msgs, ) = await msgops.drain_to_final_msg( ctx=self, @@ -1285,13 +1316,18 @@ class Context: f'{msg}\n' ) - log.cancel( - 'Ctx drained to final result msgs\n' - f'{return_msg}\n\n' - - f'pre-result drained msgs:\n' - f'{pformat(drained_msgs)}\n' + drained_status: str = ( + 'Ctx drained to final outcome msg\n\n' + f'{outcome_msg}\n' ) + if drained_msgs: + drained_status += ( + '\n' + f'The pre-drained msgs are\n' + f'{pformat(drained_msgs)}\n' + ) + + log.cancel(drained_status) self.maybe_raise( # NOTE: obvi we don't care if we @@ -1322,7 +1358,7 @@ class Context: @property def maybe_error(self) -> BaseException|None: - le: Exception|None = self._local_error + le: BaseException|None = self._local_error re: RemoteActorError|ContextCancelled|None = self._remote_error match (le, re): @@ -1350,7 +1386,7 @@ class Context: # ContextCancelled(canceller=), # ): - error: Exception|None = le or re + error: BaseException|None = le or re if error: return error @@ -1465,52 +1501,63 @@ class Context: ''' merr: Exception|None = self.maybe_error outcome: Unresolved|Exception|Any = self.outcome - + status: str|None = None match ( outcome, merr, ): + # "graceful" ctx cancellation case ( Unresolved, ContextCancelled(), - ) if self.cancel_acked: - status = 'self-cancelled' - - case ( - Unresolved, - ContextCancelled(), - ) if ( - self.canceller - and not self._cancel_called ): - status = 'peer-cancelled' + if self._is_self_cancelled(): + status = 'self-cancelled' + elif ( + self.canceller + and not self._cancel_called + ): + status = 'peer-cancelled' + # (remote) error condition case ( Unresolved, - BaseException(), - ) if self.canceller: + BaseException(), # any error-type + ): status = 'errored' + # result already returned case ( _, # any non-unresolved value None, ) if self._final_result_is_set(): status = 'returned' + # normal operation but still in a pre-`Return`-result + # dialog phase case ( - Unresolved, # noqa (weird.. ruff) - None, + Unresolved, # noqa (ruff, you so weird..) + None, # no (remote) error set ): if stream := self._stream: if stream.closed: status = 'streaming-finished' else: status = 'streaming' + elif self._started_called: status = 'started' - case _: - status = 'unknown!?' + else: + if self.side == 'child': + status = 'pre-started' + else: + status = 'syncing-to-child' + + if status is None: + status = '??unknown??' + # from tractor.devx import mk_pdb + # mk_pdb().set_trace() return status @@ -1741,7 +1788,6 @@ class Context: f'Delivering IPC ctx error from {self.peer_side!r} to {side!r} task\n\n' f'{flow_body}' - f'{pformat(re)}\n' ) self._cancel_msg: dict = msg @@ -2006,6 +2052,7 @@ async def open_context_from_portal( ) assert ctx._remote_func_type == 'context' assert ctx._caller_info + _ctxvar_Context.set(ctx) # XXX NOTE since `._scope` is NOT set BEFORE we retreive the # `Started`-msg any cancellation triggered @@ -2159,7 +2206,7 @@ async def open_context_from_portal( # CASE 2: context was cancelled by local task calling # `.cancel()`, we don't raise and the exit block should - # exit silently. + # finish silently. if ( ctx._cancel_called and @@ -2284,6 +2331,11 @@ async def open_context_from_portal( try: result_or_err: Exception|Any = await ctx.result() except BaseException as berr: + # cancelled before (or maybe during?) final result capture + # if isinstance(trio.Cancelled, berr): + # from .devx import mk_pdb + # mk_pdb.set_trace() + # on normal teardown, if we get some error # raised in `Context.result()` we still want to # save that error on the ctx's state to @@ -2479,12 +2531,12 @@ def mk_context( _caller_info=caller_info, **kwargs, ) - # TODO: we can drop the old placeholder yah? - # ctx._result: int | Any = id(ctx) ctx._result = Unresolved return ctx +# TODO: use the new type-parameters to annotate this in 3.13? +# -[ ] https://peps.python.org/pep-0718/#unknown-types def context(func: Callable) -> Callable: ''' Mark an (async) function as an SC-supervised, inter-`Actor`, @@ -2498,8 +2550,8 @@ def context(func: Callable) -> Callable: # https://github.com/python/mypy/issues/2087#issuecomment-769266912 func._tractor_context_function = True # type: ignore - sig = inspect.signature(func) - params = sig.parameters + sig: inspect.Signature = inspect.signature(func) + params: Mapping = sig.parameters if 'ctx' not in params: raise TypeError( "The first argument to the context function " -- 2.34.1 From 7010a39bd306ac125cef71bde1d0475f2ffed92c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 8 May 2024 14:24:25 -0400 Subject: [PATCH 095/190] Rework and first draft of `.devx._frame_stack.py` Proto-ing a little suite of call-stack-frame annotation-for-scanning sub-systems for the purposes of both, - the `.devx._debug`er and its traceback and frame introspection needs when entering the REPL, - detailed trace-style logging such that we can explicitly report on "which and where" `tractor`'s APIs are used in the "app" code. Deats: - change mod name obvi from `._code` and adjust client mod imports. - using `wrapt` (for perf) implement a `@api_frame` annot decorator which both stashes per-call-stack-frame instances of `CallerInfo` in a table and marks the function such that API endpoints can be easily found via runtime stack scanning despite any internal impl changes. - add a global `_frame2callerinfo_cache: dict[FrameType, CallerInfo]` table for providing the per func-frame info caching. - Re-implement `CallerInfo` to require less (types of) inputs: |_ `_api_func: Callable`, a ref to the (singleton) func def. |_ `_api_frame: FrameType` taken from the `@api_frame` marked `tractor`-API func's runtime call-stack, from which we can determine the app code's `.caller_frame`. |_`_caller_frames_up: int|None` allowing the specific `@api_frame` to determine "how many frames up" the application / calling code is. And, a better set of derived attrs: |_`caller_frame: FrameType` which finds and caches the API-eps calling frame. |_`caller_frame: FrameType` which finds and caches the API-eps calling - add a new attempt at "getting a method ref from its runtime frame" with `get_ns_and_func_from_frame()` using a heuristic that the `CodeType.co_qualname: str` should have a "." in it for methods. - main issue is still that the func-ref lookup will require searching for the method's instance type by name, and that name isn't guaranteed to be defined in any particular ns.. |_rn we try to read it from the `FrameType.f_locals` but that is going to obvi fail any time the method is called in a module where it's type is not also defined/imported. - returns both the ns and the func ref FYI. --- tractor/_context.py | 4 +- tractor/devx/{_code.py => _frame_stack.py} | 229 +++++++++++---------- 2 files changed, 121 insertions(+), 112 deletions(-) rename tractor/devx/{_code.py => _frame_stack.py} (53%) diff --git a/tractor/_context.py b/tractor/_context.py index a2adfa42..b58010b6 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -94,7 +94,7 @@ if TYPE_CHECKING: from ._portal import Portal from ._runtime import Actor from ._ipc import MsgTransport - from .devx._code import ( + from .devx._frame_stack import ( CallerInfo, ) @@ -2513,7 +2513,7 @@ def mk_context( send_chan, recv_chan = trio.open_memory_channel(msg_buffer_size) # TODO: only scan caller-info if log level so high! - from .devx._code import find_caller_info + from .devx._frame_stack import find_caller_info caller_info: CallerInfo|None = find_caller_info() # TODO: when/how do we apply `.limit_plds()` from here? diff --git a/tractor/devx/_code.py b/tractor/devx/_frame_stack.py similarity index 53% rename from tractor/devx/_code.py rename to tractor/devx/_frame_stack.py index 8d55212b..89a9e849 100644 --- a/tractor/devx/_code.py +++ b/tractor/devx/_frame_stack.py @@ -20,11 +20,8 @@ as it pertains to improving the grok-ability of our runtime! ''' from __future__ import annotations +from functools import partial import inspect -# import msgspec -# from pprint import pformat -import textwrap -import traceback from types import ( FrameType, FunctionType, @@ -32,9 +29,8 @@ from types import ( # CodeType, ) from typing import ( - # Any, + Any, Callable, - # TYPE_CHECKING, Type, ) @@ -42,6 +38,7 @@ from tractor.msg import ( pretty_struct, NamespacePath, ) +import wrapt # TODO: yeah, i don't love this and we should prolly just @@ -83,6 +80,31 @@ def get_class_from_frame(fr: FrameType) -> ( return None +def get_ns_and_func_from_frame( + frame: FrameType, +) -> Callable: + ''' + Return the corresponding function object reference from + a `FrameType`, and return it and it's parent namespace `dict`. + + ''' + ns: dict[str, Any] + + # for a method, go up a frame and lookup the name in locals() + if '.' in (qualname := frame.f_code.co_qualname): + cls_name, _, func_name = qualname.partition('.') + ns = frame.f_back.f_locals[cls_name].__dict__ + + else: + func_name: str = frame.f_code.co_name + ns = frame.f_globals + + return ( + ns, + ns[func_name], + ) + + def func_ref_from_frame( frame: FrameType, ) -> Callable: @@ -98,34 +120,63 @@ def func_ref_from_frame( ) -# TODO: move all this into new `.devx._code`! -# -[ ] prolly create a `@runtime_api` dec? -# -[ ] ^- make it capture and/or accept buncha optional -# meta-data like a fancier version of `@pdbp.hideframe`. -# class CallerInfo(pretty_struct.Struct): - rt_fi: inspect.FrameInfo - call_frame: FrameType + # https://docs.python.org/dev/reference/datamodel.html#frame-objects + # https://docs.python.org/dev/library/inspect.html#the-interpreter-stack + _api_frame: FrameType @property - def api_func_ref(self) -> Callable|None: - return func_ref_from_frame(self.rt_fi.frame) + def api_frame(self) -> FrameType: + try: + self._api_frame.clear() + except RuntimeError: + # log.warning( + print( + f'Frame {self._api_frame} for {self.api_func} is still active!' + ) + + return self._api_frame + + _api_func: Callable + + @property + def api_func(self) -> Callable: + return self._api_func + + _caller_frames_up: int|None = 1 + _caller_frame: FrameType|None = None # cached after first stack scan @property def api_nsp(self) -> NamespacePath|None: - func: FunctionType = self.api_func_ref + func: FunctionType = self.api_func if func: return NamespacePath.from_ref(func) return '' @property - def caller_func_ref(self) -> Callable|None: - return func_ref_from_frame(self.call_frame) + def caller_frame(self) -> FrameType: + + # if not already cached, scan up stack explicitly by + # configured count. + if not self._caller_frame: + if self._caller_frames_up: + for _ in range(self._caller_frames_up): + caller_frame: FrameType|None = self.api_frame.f_back + + if not caller_frame: + raise ValueError( + 'No frame exists {self._caller_frames_up} up from\n' + f'{self.api_frame} @ {self.api_nsp}\n' + ) + + self._caller_frame = caller_frame + + return self._caller_frame @property def caller_nsp(self) -> NamespacePath|None: - func: FunctionType = self.caller_func_ref + func: FunctionType = self.api_func if func: return NamespacePath.from_ref(func) @@ -172,108 +223,66 @@ def find_caller_info( call_frame = call_frame.f_back return CallerInfo( - rt_fi=fi, - call_frame=call_frame, + _api_frame=rt_frame, + _api_func=func_ref_from_frame(rt_frame), + _caller_frames_up=go_up_iframes, ) return None -def pformat_boxed_tb( - tb_str: str, - fields_str: str|None = None, - field_prefix: str = ' |_', +_frame2callerinfo_cache: dict[FrameType, CallerInfo] = {} - tb_box_indent: int|None = None, - tb_body_indent: int = 1, -) -> str: - ''' - Create a "boxed" looking traceback string. +# TODO: -[x] move all this into new `.devx._code`! +# -[ ] consider rename to _callstack? +# -[ ] prolly create a `@runtime_api` dec? +# |_ @api_frame seems better? +# -[ ] ^- make it capture and/or accept buncha optional +# meta-data like a fancier version of `@pdbp.hideframe`. +# +def api_frame( + wrapped: Callable|None = None, + *, + caller_frames_up: int = 1, - Useful for emphasizing traceback text content as being an - embedded attribute of some other object (like - a `RemoteActorError` or other boxing remote error shuttle - container). +) -> Callable: - Any other parent/container "fields" can be passed in the - `fields_str` input along with other prefix/indent settings. + # handle the decorator called WITHOUT () case, + # i.e. just @api_frame, NOT @api_frame(extra=) + if wrapped is None: + return partial( + api_frame, + caller_frames_up=caller_frames_up, + ) - ''' - if ( - fields_str - and - field_prefix + @wrapt.decorator + async def wrapper( + wrapped: Callable, + instance: object, + args: tuple, + kwargs: dict, ): - fields: str = textwrap.indent( - fields_str, - prefix=field_prefix, - ) - else: - fields = fields_str or '' + # maybe cache the API frame for this call + global _frame2callerinfo_cache + this_frame: FrameType = inspect.currentframe() + api_frame: FrameType = this_frame.f_back - tb_body = tb_str - if tb_body_indent: - tb_body: str = textwrap.indent( - tb_str, - prefix=tb_body_indent * ' ', - ) + if not _frame2callerinfo_cache.get(api_frame): + _frame2callerinfo_cache[api_frame] = CallerInfo( + _api_frame=api_frame, + _api_func=wrapped, + _caller_frames_up=caller_frames_up, + ) - tb_box: str = ( + return wrapped(*args, **kwargs) - # orig - # f' |\n' - # f' ------ - ------\n\n' - # f'{tb_str}\n' - # f' ------ - ------\n' - # f' _|\n' - - f'|\n' - f' ------ - ------\n\n' - # f'{tb_str}\n' - f'{tb_body}' - f' ------ - ------\n' - f'_|\n' - ) - tb_box_indent: str = ( - tb_box_indent - or - 1 - - # (len(field_prefix)) - # ? ^-TODO-^ ? if you wanted another indent level - ) - if tb_box_indent > 0: - tb_box: str = textwrap.indent( - tb_box, - prefix=tb_box_indent * ' ', - ) - - return ( - fields - + - tb_box - ) - - -def pformat_caller_frame( - stack_limit: int = 1, - box_tb: bool = True, -) -> str: - ''' - Capture and return the traceback text content from - `stack_limit` call frames up. - - ''' - tb_str: str = ( - '\n'.join( - traceback.format_stack(limit=stack_limit) - ) - ) - if box_tb: - tb_str: str = pformat_boxed_tb( - tb_str=tb_str, - field_prefix=' ', - indent='', - ) - return tb_str + # annotate the function as a "api function", meaning it is + # a function for which the function above it in the call stack should be + # non-`tractor` code aka "user code". + # + # in the global frame cache for easy lookup from a given + # func-instance + wrapped._call_infos: dict[FrameType, CallerInfo] = _frame2callerinfo_cache + wrapped.__api_func__: bool = True + return wrapper(wrapped) -- 2.34.1 From 429f8f4e13910146f096958ffe35a1206da9387c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 8 May 2024 14:53:45 -0400 Subject: [PATCH 096/190] Adjust `._runtime` to report `DebugStatus.req_ctx` - inside the `Actor.cancel()`'s maybe-wait-on-debugger delay, report the full debug request status and it's affiliated lock request IPC ctx. - use the new `.req_ctx.chan.uid` to do the local nursery lookup during channel teardown handling. - another couple log fmt tweaks. --- tractor/_runtime.py | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 84940222..1d931cd7 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -646,7 +646,7 @@ class Actor: peers_str: str = '' for uid, chans in self._peers.items(): peers_str += ( - f'|_ uid: {uid}\n' + f'uid: {uid}\n' ) for i, chan in enumerate(chans): peers_str += ( @@ -680,10 +680,12 @@ class Actor: # XXX => YES IT DOES, when i was testing ctl-c # from broken debug TTY locking due to # msg-spec races on application using RunVar... - pdb_user_uid: tuple = pdb_lock.global_actor_in_debug if ( - pdb_user_uid - and local_nursery + (ctx_in_debug := pdb_lock.ctx_in_debug) + and + (pdb_user_uid := ctx_in_debug.chan.uid) + and + local_nursery ): entry: tuple|None = local_nursery._children.get( tuple(pdb_user_uid) @@ -1171,13 +1173,17 @@ class Actor: # kill any debugger request task to avoid deadlock # with the root actor in this tree - dbcs = _debug.DebugStatus.req_cs - if dbcs is not None: + debug_req = _debug.DebugStatus + lock_req_ctx: Context = debug_req.req_ctx + if lock_req_ctx is not None: msg += ( '-> Cancelling active debugger request..\n' - f'|_{_debug.Lock.pformat()}' + f'|_{_debug.Lock.repr()}\n\n' + f'|_{lock_req_ctx}\n\n' ) - dbcs.cancel() + # lock_req_ctx._scope.cancel() + # TODO: wrap this in a method-API.. + debug_req.req_cs.cancel() # self-cancel **all** ongoing RPC tasks await self.cancel_rpc_tasks( @@ -1377,15 +1383,17 @@ class Actor: "IPC channel's " ) rent_chan_repr: str = ( - f'|_{parent_chan}' + f' |_{parent_chan}\n\n' if parent_chan else '' ) log.cancel( - f'Cancelling {descr} {len(tasks)} rpc tasks\n\n' - f'<= `Actor.cancel_rpc_tasks()`: {req_uid}\n' - f' {rent_chan_repr}\n' - # f'{self}\n' + f'Cancelling {descr} RPC tasks\n\n' + f'<= canceller: {req_uid}\n' + f'{rent_chan_repr}' + f'=> cancellee: {self.uid}\n' + f' |_{self}.cancel_rpc_tasks()\n' + f' |_tasks: {len(tasks)}\n' # f'{tasks_str}' ) for ( @@ -1415,7 +1423,7 @@ class Actor: if tasks: log.cancel( 'Waiting for remaining rpc tasks to complete\n' - f'|_{tasks}' + f'|_{tasks_str}' ) await self._ongoing_rpc_tasks.wait() @@ -1468,7 +1476,10 @@ class Actor: assert self._parent_chan, "No parent channel for this actor?" return Portal(self._parent_chan) - def get_chans(self, uid: tuple[str, str]) -> list[Channel]: + def get_chans( + self, + uid: tuple[str, str], + ) -> list[Channel]: ''' Return all IPC channels to the actor with provided `uid`. @@ -1631,6 +1642,9 @@ async def async_main( entered_debug: bool = await _debug._maybe_enter_pm(oserr) if not entered_debug: log.exception('Failed to init IPC channel server !?\n') + else: + log.runtime('Exited debug REPL..') + raise accept_addrs: list[tuple[str, int]] = actor.accept_addrs -- 2.34.1 From e07e7da0b5d086703a1744981955b7081636e503 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 8 May 2024 15:07:34 -0400 Subject: [PATCH 097/190] Rename `.msg.types.Msg` -> `PayloadMsg` --- tractor/msg/types.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 7e10dab0..1b3733cb 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -56,8 +56,7 @@ log = get_logger('tractor.msgspec') PayloadT = TypeVar('PayloadT') -# TODO: PayloadMsg -class Msg( +class PayloadMsg( Struct, Generic[PayloadT], @@ -110,6 +109,10 @@ class Msg( pld: Raw +# TODO: complete rename +Msg = PayloadMsg + + class Aid( Struct, tag=True, -- 2.34.1 From 953976d5888f7044cbb71854478f323516c63e5e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 9 May 2024 09:37:47 -0400 Subject: [PATCH 098/190] Use `.recv_msg_w_pld()` for final `Portal.result()` Woops, due to a `None` test against the `._final_result`, any actual final `None` result would be received but not acked as such causing a spawning test to hang. Fix it by instead receiving and assigning both a `._final_result_msg: PayloadMsg` and `._final_result_pld`. NB: as mentioned in many recent comments surrounding this API layer, really this whole `Portal`-has-final-result interface/semantics should be entirely removed as should the `ActorNursery.run_in_actor()` API(s). Instead it should all be replaced by a wrapping "high level" API (`tractor.hilevel` ?) which combines a task nursery, `Portal.open_context()` and underlying `Context` APIs + an `outcome.Outcome` to accomplish the same "run a single task in a spawned actor and return it's result"; aka a "one-shot-task-actor". --- tractor/_portal.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tractor/_portal.py b/tractor/_portal.py index 79a9dc5d..700f2fdc 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -47,6 +47,7 @@ from ._ipc import Channel from .log import get_logger from .msg import ( # Error, + PayloadMsg, NamespacePath, Return, ) @@ -98,7 +99,8 @@ class Portal: self.chan = channel # during the portal's lifetime - self._final_result: Any|None = None + self._final_result_pld: Any|None = None + self._final_result_msg: PayloadMsg|None = None # When set to a ``Context`` (when _submit_for_result is called) # it is expected that ``result()`` will be awaited at some @@ -132,7 +134,7 @@ class Portal: 'A pending main result has already been submitted' ) - self._expect_result_ctx = await self.actor.start_remote_task( + self._expect_result_ctx: Context = await self.actor.start_remote_task( self.channel, nsf=NamespacePath(f'{ns}:{func}'), kwargs=kwargs, @@ -163,13 +165,16 @@ class Portal: # expecting a "main" result assert self._expect_result_ctx - if self._final_result is None: - self._final_result: Any = await self._expect_result_ctx._pld_rx.recv_pld( - ctx=self._expect_result_ctx, + if self._final_result_msg is None: + ( + self._final_result_msg, + self._final_result_pld, + ) = await self._expect_result_ctx._pld_rx.recv_msg_w_pld( + ipc=self._expect_result_ctx, expect_msg=Return, ) - return self._final_result + return self._final_result_pld async def _cancel_streams(self): # terminate all locally running async generator -- 2.34.1 From fded92115a23a36cff6694abe01f2b72f16e5ed0 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 9 May 2024 15:20:03 -0400 Subject: [PATCH 099/190] Hide some API frames, port to new `._debug` apis - start tossing in `__tracebackhide__`s to various eps which don't need to show in tbs or in the pdb REPL. - port final `._maybe_enter_pm()` to pass a `api_frame`. - start comment-marking up some API eps with `@api_frame` in prep for actually using the new frame-stack tracing. --- tractor/_root.py | 12 +++++++++--- tractor/_spawn.py | 19 ++++++++----------- tractor/_supervise.py | 4 ++-- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/tractor/_root.py b/tractor/_root.py index 377f494e..4c0bb4f6 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -18,7 +18,7 @@ Root actor runtime ignition(s). ''' -from contextlib import asynccontextmanager +from contextlib import asynccontextmanager as acm from functools import partial import importlib import logging @@ -60,7 +60,7 @@ _default_lo_addrs: list[tuple[str, int]] = [( logger = log.get_logger('tractor') -@asynccontextmanager +@acm async def open_root_actor( *, @@ -97,6 +97,7 @@ async def open_root_actor( Runtime init entry point for ``tractor``. ''' + __tracebackhide__ = True # TODO: stick this in a `@cm` defined in `devx._debug`? # # Override the global debugger hook to make it play nice with @@ -363,7 +364,12 @@ async def open_root_actor( BaseExceptionGroup, ) as err: - entered: bool = await _debug._maybe_enter_pm(err) + import inspect + entered: bool = await _debug._maybe_enter_pm( + err, + api_frame=inspect.currentframe(), + ) + if ( not entered and diff --git a/tractor/_spawn.py b/tractor/_spawn.py index 3f886c01..09d9aff8 100644 --- a/tractor/_spawn.py +++ b/tractor/_spawn.py @@ -142,7 +142,9 @@ async def exhaust_portal( ''' __tracebackhide__ = True try: - log.debug(f"Waiting on final result from {actor.uid}") + log.debug( + f'Waiting on final result from {actor.uid}' + ) # XXX: streams should never be reaped here since they should # always be established and shutdown using a context manager api @@ -195,7 +197,10 @@ async def cancel_on_completion( # if this call errors we store the exception for later # in ``errors`` which will be reraised inside # an exception group and we still send out a cancel request - result: Any|Exception = await exhaust_portal(portal, actor) + result: Any|Exception = await exhaust_portal( + portal, + actor, + ) if isinstance(result, Exception): errors[actor.uid]: Exception = result log.cancel( @@ -507,14 +512,6 @@ async def trio_proc( ) ) - # await chan.send({ - # '_parent_main_data': subactor._parent_main_data, - # 'enable_modules': subactor.enable_modules, - # 'reg_addrs': subactor.reg_addrs, - # 'bind_addrs': bind_addrs, - # '_runtime_vars': _runtime_vars, - # }) - # track subactor in current nursery curr_actor: Actor = current_actor() curr_actor._actoruid2nursery[subactor.uid] = actor_nursery @@ -558,8 +555,8 @@ async def trio_proc( # killing the process too early. if proc: log.cancel(f'Hard reap sequence starting for {subactor.uid}') - with trio.CancelScope(shield=True): + with trio.CancelScope(shield=True): # don't clobber an ongoing pdb if cancelled_during_spawn: # Try again to avoid TTY clobbering. diff --git a/tractor/_supervise.py b/tractor/_supervise.py index dc65cc65..59ec728b 100644 --- a/tractor/_supervise.py +++ b/tractor/_supervise.py @@ -346,8 +346,6 @@ async def _open_and_supervise_one_cancels_all_nursery( actor: Actor, ) -> typing.AsyncGenerator[ActorNursery, None]: - - # TODO: yay or nay? __tracebackhide__ = True # the collection of errors retreived from spawned sub-actors @@ -519,6 +517,7 @@ async def _open_and_supervise_one_cancels_all_nursery( @acm +# @api_frame async def open_nursery( **kwargs, @@ -538,6 +537,7 @@ async def open_nursery( which cancellation scopes correspond to each spawned subactor set. ''' + __tracebackhide__ = True implicit_runtime: bool = False actor: Actor = current_actor(err_on_no_runtime=False) an: ActorNursery|None = None -- 2.34.1 From 683288c8db29f7b37a34fa9296100585395ad3c4 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 9 May 2024 16:31:23 -0400 Subject: [PATCH 100/190] Update tests for `PldRx` and `Context` changes Mostly adjustments for the new pld-receiver semantics/shim-layer which results more often in the direct delivery of `RemoteActorError`s from IPC API primitives (like `Portal.result()`) instead of being embedded in an `ExceptionGroup` bundled from an embedded nursery. Tossed usage of the `debug_mode: bool` fixture to a couple problematic tests while i was working on them. Also includes detailed assertion updates to the inter-peer cancellation suite in terms of, - `Context.canceller` state correctly matching the true src actor when expecting a ctxc. - any rxed `ContextCancelled` should instance match the `Context._local/remote_error` as should the `.msgdata` and `._ipc_msg`. --- tests/test_cancellation.py | 25 +++++-- tests/test_infected_asyncio.py | 22 +++--- tests/test_inter_peer_cancellation.py | 99 +++++++++++++++++++-------- tests/test_spawning.py | 20 +++--- 4 files changed, 113 insertions(+), 53 deletions(-) diff --git a/tests/test_cancellation.py b/tests/test_cancellation.py index 18ad3615..92540ed4 100644 --- a/tests/test_cancellation.py +++ b/tests/test_cancellation.py @@ -89,17 +89,30 @@ def test_remote_error(reg_addr, args_err): assert excinfo.value.boxed_type == errtype else: - # the root task will also error on the `.result()` call - # so we expect an error from there AND the child. - with pytest.raises(BaseExceptionGroup) as excinfo: + # the root task will also error on the `Portal.result()` + # call so we expect an error from there AND the child. + # |_ tho seems like on new `trio` this doesn't always + # happen? + with pytest.raises(( + BaseExceptionGroup, + tractor.RemoteActorError, + )) as excinfo: trio.run(main) - # ensure boxed errors - for exc in excinfo.value.exceptions: + # ensure boxed errors are `errtype` + err: BaseException = excinfo.value + if isinstance(err, BaseExceptionGroup): + suberrs: list[BaseException] = err.exceptions + else: + suberrs: list[BaseException] = [err] + + for exc in suberrs: assert exc.boxed_type == errtype -def test_multierror(reg_addr): +def test_multierror( + reg_addr: tuple[str, int], +): ''' Verify we raise a ``BaseExceptionGroup`` out of a nursery where more then one actor errors. diff --git a/tests/test_infected_asyncio.py b/tests/test_infected_asyncio.py index 8d34bef4..45722a63 100644 --- a/tests/test_infected_asyncio.py +++ b/tests/test_infected_asyncio.py @@ -444,6 +444,7 @@ def test_basic_interloop_channel_stream(reg_addr, fan_out): infect_asyncio=True, fan_out=fan_out, ) + # should raise RAE diectly await portal.result() trio.run(main) @@ -461,12 +462,11 @@ def test_trio_error_cancels_intertask_chan(reg_addr): # should trigger remote actor error await portal.result() - with pytest.raises(BaseExceptionGroup) as excinfo: + with pytest.raises(RemoteActorError) as excinfo: trio.run(main) - # ensure boxed errors - for exc in excinfo.value.exceptions: - assert exc.boxed_type == Exception + # ensure boxed error type + excinfo.value.boxed_type == Exception def test_trio_closes_early_and_channel_exits(reg_addr): @@ -477,7 +477,7 @@ def test_trio_closes_early_and_channel_exits(reg_addr): exit_early=True, infect_asyncio=True, ) - # should trigger remote actor error + # should raise RAE diectly await portal.result() # should be a quiet exit on a simple channel exit @@ -492,15 +492,17 @@ def test_aio_errors_and_channel_propagates_and_closes(reg_addr): aio_raise_err=True, infect_asyncio=True, ) - # should trigger remote actor error + # should trigger RAE directly, not an eg. await portal.result() - with pytest.raises(BaseExceptionGroup) as excinfo: + with pytest.raises( + # NOTE: bc we directly wait on `Portal.result()` instead + # of capturing it inside the `ActorNursery` machinery. + expected_exception=RemoteActorError, + ) as excinfo: trio.run(main) - # ensure boxed errors - for exc in excinfo.value.exceptions: - assert exc.boxed_type == Exception + excinfo.value.boxed_type == Exception @tractor.context diff --git a/tests/test_inter_peer_cancellation.py b/tests/test_inter_peer_cancellation.py index aa05e3c8..7bf9a2bd 100644 --- a/tests/test_inter_peer_cancellation.py +++ b/tests/test_inter_peer_cancellation.py @@ -55,9 +55,10 @@ from tractor._testing import ( @tractor.context -async def sleep_forever( +async def open_stream_then_sleep_forever( ctx: Context, expect_ctxc: bool = False, + ) -> None: ''' Sync the context, open a stream then just sleep. @@ -67,6 +68,10 @@ async def sleep_forever( ''' try: await ctx.started() + + # NOTE: the below means this child will send a `Stop` + # to it's parent-side task despite that side never + # opening a stream itself. async with ctx.open_stream(): await trio.sleep_forever() @@ -100,7 +105,7 @@ async def error_before_started( ''' async with tractor.wait_for_actor('sleeper') as p2: async with ( - p2.open_context(sleep_forever) as (peer_ctx, first), + p2.open_context(open_stream_then_sleep_forever) as (peer_ctx, first), peer_ctx.open_stream(), ): # NOTE: this WAS inside an @acm body but i factored it @@ -204,9 +209,13 @@ async def stream_ints( @tractor.context async def stream_from_peer( ctx: Context, + debug_mode: bool, peer_name: str = 'sleeper', ) -> None: + # sanity + assert tractor._state.debug_mode() == debug_mode + peer: Portal try: async with ( @@ -240,26 +249,54 @@ async def stream_from_peer( assert msg is not None print(msg) - # NOTE: cancellation of the (sleeper) peer should always - # cause a `ContextCancelled` raise in this streaming - # actor. - except ContextCancelled as ctxc: - ctxerr = ctxc + # NOTE: cancellation of the (sleeper) peer should always cause + # a `ContextCancelled` raise in this streaming actor. + except ContextCancelled as _ctxc: + ctxc = _ctxc - assert peer_ctx._remote_error is ctxerr - assert peer_ctx._remote_error.msgdata == ctxerr.msgdata + # print("TRYING TO ENTER PAUSSE!!!") + # await tractor.pause(shield=True) + re: ContextCancelled = peer_ctx._remote_error - # XXX YES, bc exact same msg instances - assert peer_ctx._remote_error._ipc_msg is ctxerr._ipc_msg + # XXX YES XXX, remote error should be unpacked only once! + assert ( + re + is + peer_ctx.maybe_error + is + ctxc + is + peer_ctx._local_error + ) + # NOTE: these errors should all match! + # ------ - ------ + # XXX [2024-05-03] XXX + # ------ - ------ + # broke this due to a re-raise inside `.msg._ops.drain_to_final_msg()` + # where the `Error()` msg was directly raising the ctxc + # instead of just returning up to the caller inside + # `Context.return()` which would results in a diff instance of + # the same remote error bubbling out above vs what was + # already unpacked and set inside `Context. + assert ( + peer_ctx._remote_error.msgdata + == + ctxc.msgdata + ) + # ^-XXX-^ notice the data is of course the exact same.. so + # the above larger assert makes sense to also always be true! - # XXX NO, bc new one always created for property accesss - assert peer_ctx._remote_error.ipc_msg != ctxerr.ipc_msg + # XXX YES XXX, bc should be exact same msg instances + assert peer_ctx._remote_error._ipc_msg is ctxc._ipc_msg + + # XXX NO XXX, bc new one always created for property accesss + assert peer_ctx._remote_error.ipc_msg != ctxc.ipc_msg # the peer ctx is the canceller even though it's canceller # is the "canceller" XD assert peer_name in peer_ctx.canceller - assert "canceller" in ctxerr.canceller + assert "canceller" in ctxc.canceller # caller peer should not be the cancel requester assert not ctx.cancel_called @@ -283,12 +320,13 @@ async def stream_from_peer( # TODO / NOTE `.canceller` won't have been set yet # here because that machinery is inside - # `.open_context().__aexit__()` BUT, if we had + # `Portal.open_context().__aexit__()` BUT, if we had # a way to know immediately (from the last # checkpoint) that cancellation was due to # a remote, we COULD assert this here..see, # https://github.com/goodboy/tractor/issues/368 # + # await tractor.pause() # assert 'canceller' in ctx.canceller # root/parent actor task should NEVER HAVE cancelled us! @@ -392,12 +430,13 @@ def test_peer_canceller( try: async with ( sleeper.open_context( - sleep_forever, + open_stream_then_sleep_forever, expect_ctxc=True, ) as (sleeper_ctx, sent), just_caller.open_context( stream_from_peer, + debug_mode=debug_mode, ) as (caller_ctx, sent), canceller.open_context( @@ -423,10 +462,11 @@ def test_peer_canceller( # should always raise since this root task does # not request the sleeper cancellation ;) - except ContextCancelled as ctxerr: + except ContextCancelled as _ctxc: + ctxc = _ctxc print( 'CAUGHT REMOTE CONTEXT CANCEL\n\n' - f'{ctxerr}\n' + f'{ctxc}\n' ) # canceller and caller peers should not @@ -437,7 +477,7 @@ def test_peer_canceller( # we were not the actor, our peer was assert not sleeper_ctx.cancel_acked - assert ctxerr.canceller[0] == 'canceller' + assert ctxc.canceller[0] == 'canceller' # XXX NOTE XXX: since THIS `ContextCancelled` # HAS NOT YET bubbled up to the @@ -448,7 +488,7 @@ def test_peer_canceller( # CASE_1: error-during-ctxc-handling, if error_during_ctxerr_handling: - raise RuntimeError('Simulated error during teardown') + raise RuntimeError('Simulated RTE re-raise during ctxc handling') # CASE_2: standard teardown inside in `.open_context()` block raise @@ -513,6 +553,9 @@ def test_peer_canceller( # should be cancelled by US. # if error_during_ctxerr_handling: + print(f'loc_err: {_loc_err}\n') + assert isinstance(loc_err, RuntimeError) + # since we do a rte reraise above, the # `.open_context()` error handling should have # raised a local rte, thus the internal @@ -521,9 +564,6 @@ def test_peer_canceller( # a `trio.Cancelled` due to a local # `._scope.cancel()` call. assert not sleeper_ctx._scope.cancelled_caught - - assert isinstance(loc_err, RuntimeError) - print(f'_loc_err: {_loc_err}\n') # assert sleeper_ctx._local_error is _loc_err # assert sleeper_ctx._local_error is _loc_err assert not ( @@ -560,9 +600,12 @@ def test_peer_canceller( else: # the other 2 ctxs assert ( - re.canceller - == - canceller.channel.uid + isinstance(re, ContextCancelled) + and ( + re.canceller + == + canceller.channel.uid + ) ) # since the sleeper errors while handling a @@ -811,8 +854,7 @@ async def serve_subactors( async with open_nursery() as an: # sanity - if debug_mode: - assert tractor._state.debug_mode() + assert tractor._state.debug_mode() == debug_mode await ctx.started(peer_name) async with ctx.open_stream() as ipc: @@ -1091,7 +1133,6 @@ def test_peer_spawns_and_cancels_service_subactor( '-> root checking `client_ctx.result()`,\n' f'-> checking that sub-spawn {peer_name} is down\n' ) - # else: try: res = await client_ctx.result(hide_tb=False) diff --git a/tests/test_spawning.py b/tests/test_spawning.py index 5995ed2d..99ec9abc 100644 --- a/tests/test_spawning.py +++ b/tests/test_spawning.py @@ -2,7 +2,9 @@ Spawning basics """ -from typing import Optional +from typing import ( + Any, +) import pytest import trio @@ -25,13 +27,11 @@ async def spawn( async with tractor.open_root_actor( arbiter_addr=reg_addr, ): - actor = tractor.current_actor() assert actor.is_arbiter == is_arbiter data = data_to_pass_down if actor.is_arbiter: - async with tractor.open_nursery() as nursery: # forks here @@ -95,7 +95,9 @@ async def test_movie_theatre_convo(start_method): await portal.cancel_actor() -async def cellar_door(return_value: Optional[str]): +async def cellar_door( + return_value: str|None, +): return return_value @@ -105,16 +107,18 @@ async def cellar_door(return_value: Optional[str]): ) @tractor_test async def test_most_beautiful_word( - start_method, - return_value + start_method: str, + return_value: Any, + debug_mode: bool, ): ''' The main ``tractor`` routine. ''' with trio.fail_after(1): - async with tractor.open_nursery() as n: - + async with tractor.open_nursery( + debug_mode=debug_mode, + ) as n: portal = await n.run_in_actor( cellar_door, return_value=return_value, -- 2.34.1 From b22ee84d268fef6ad4a89a7f8d882638196fa1b6 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 9 May 2024 16:51:51 -0400 Subject: [PATCH 101/190] Modernize streaming example script - add typing, - apply multi-line call style, - use 'cancel' log level, - enable debug mode. --- examples/full_fledged_streaming_service.py | 45 +++++++++++++++------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/examples/full_fledged_streaming_service.py b/examples/full_fledged_streaming_service.py index c93df242..be4c372e 100644 --- a/examples/full_fledged_streaming_service.py +++ b/examples/full_fledged_streaming_service.py @@ -1,6 +1,11 @@ import time import trio import tractor +from tractor import ( + ActorNursery, + MsgStream, + Portal, +) # this is the first 2 actors, streamer_1 and streamer_2 @@ -12,14 +17,18 @@ async def stream_data(seed): # this is the third actor; the aggregator async def aggregate(seed): - """Ensure that the two streams we receive match but only stream + ''' + Ensure that the two streams we receive match but only stream a single set of values to the parent. - """ - async with tractor.open_nursery() as nursery: - portals = [] + + ''' + an: ActorNursery + async with tractor.open_nursery() as an: + portals: list[Portal] = [] for i in range(1, 3): - # fork point - portal = await nursery.start_actor( + + # fork/spawn call + portal = await an.start_actor( name=f'streamer_{i}', enable_modules=[__name__], ) @@ -43,7 +52,11 @@ async def aggregate(seed): async with trio.open_nursery() as n: for portal in portals: - n.start_soon(push_to_chan, portal, send_chan.clone()) + n.start_soon( + push_to_chan, + portal, + send_chan.clone(), + ) # close this local task's reference to send side await send_chan.aclose() @@ -60,7 +73,7 @@ async def aggregate(seed): print("FINISHED ITERATING in aggregator") - await nursery.cancel() + await an.cancel() print("WAITING on `ActorNursery` to finish") print("AGGREGATOR COMPLETE!") @@ -75,18 +88,21 @@ async def main() -> list[int]: ''' # yes, a nursery which spawns `trio`-"actors" B) - nursery: tractor.ActorNursery - async with tractor.open_nursery() as nursery: + an: ActorNursery + async with tractor.open_nursery( + loglevel='cancel', + debug_mode=True, + ) as an: seed = int(1e3) pre_start = time.time() - portal: tractor.Portal = await nursery.start_actor( + portal: Portal = await an.start_actor( name='aggregator', enable_modules=[__name__], ) - stream: tractor.MsgStream + stream: MsgStream async with portal.open_stream_from( aggregate, seed=seed, @@ -95,11 +111,12 @@ async def main() -> list[int]: start = time.time() # the portal call returns exactly what you'd expect # as if the remote "aggregate" function was called locally - result_stream = [] + result_stream: list[int] = [] async for value in stream: result_stream.append(value) - await portal.cancel_actor() + cancelled: bool = await portal.cancel_actor() + assert cancelled print(f"STREAM TIME = {time.time() - start}") print(f"STREAM + SPAWN TIME = {time.time() - pre_start}") -- 2.34.1 From e3b1c13eba54e6610208b5e9cc5b06f3843ac6df Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 10 May 2024 13:15:45 -0400 Subject: [PATCH 102/190] Rename `.msg.types.Msg` -> `PayloadMsg` --- tractor/msg/__init__.py | 2 +- tractor/msg/_codec.py | 2 +- tractor/msg/types.py | 26 +++++++++++++------------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index 13739cdb..44586f2d 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -44,7 +44,7 @@ from ._codec import ( # ) from .types import ( - Msg as Msg, + PayloadMsg as PayloadMsg, Aid as Aid, SpawnSpec as SpawnSpec, diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 901c0da1..6ba23b78 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -432,7 +432,7 @@ class MsgCodec(Struct): # ) -> Any|Struct: - # msg: Msg = codec.dec.decode(msg) + # msg: PayloadMsg = codec.dec.decode(msg) # payload_tag: str = msg.header.payload_tag # payload_dec: msgpack.Decoder = codec._payload_decs[payload_tag] # return payload_dec.decode(msg.pld) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 1b3733cb..f8205c23 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -302,7 +302,7 @@ class StartAck( class Started( - Msg, + PayloadMsg, Generic[PayloadT], ): ''' @@ -316,12 +316,12 @@ class Started( # TODO: instead of using our existing `Start` # for this (as we did with the original `{'cmd': ..}` style) -# class Cancel(Msg): +# class Cancel: # cid: str class Yield( - Msg, + PayloadMsg, Generic[PayloadT], ): ''' @@ -348,7 +348,7 @@ class Stop( # TODO: is `Result` or `Out[come]` a better name? class Return( - Msg, + PayloadMsg, Generic[PayloadT], ): ''' @@ -360,7 +360,7 @@ class Return( class CancelAck( - Msg, + PayloadMsg, Generic[PayloadT], ): ''' @@ -466,14 +466,14 @@ def from_dict_msg( # TODO: should be make a msg version of `ContextCancelled?` # and/or with a scope field or a full `ActorCancelled`? -# class Cancelled(Msg): +# class Cancelled(MsgType): # cid: str # TODO what about overruns? -# class Overrun(Msg): +# class Overrun(MsgType): # cid: str -_runtime_msgs: list[Msg] = [ +_runtime_msgs: list[Struct] = [ # identity handshake on first IPC `Channel` contact. Aid, @@ -499,9 +499,9 @@ _runtime_msgs: list[Msg] = [ ] # the no-outcome-yet IAC (inter-actor-communication) sub-set which -# can be `Msg.pld` payload field type-limited by application code +# can be `PayloadMsg.pld` payload field type-limited by application code # using `apply_codec()` and `limit_msg_spec()`. -_payload_msgs: list[Msg] = [ +_payload_msgs: list[PayloadMsg] = [ # first from `Context.started()` Started, @@ -544,8 +544,8 @@ def mk_msg_spec( ] = 'indexed_generics', ) -> tuple[ - Union[Type[Msg]], - list[Type[Msg]], + Union[MsgType], + list[MsgType], ]: ''' Create a payload-(data-)type-parameterized IPC message specification. @@ -557,7 +557,7 @@ def mk_msg_spec( determined by the input `payload_type_union: Union[Type]`. ''' - submsg_types: list[Type[Msg]] = Msg.__subclasses__() + submsg_types: list[MsgType] = Msg.__subclasses__() bases: tuple = ( # XXX NOTE XXX the below generic-parameterization seems to # be THE ONLY way to get this to work correctly in terms -- 2.34.1 From 8bab8e8bde4b8acf93b828f90ae7b0751c3c9784 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 14 May 2024 11:39:04 -0400 Subject: [PATCH 103/190] Always release debug request from `._post_mortem()` Since obviously the thread is likely expected to halt and raise after the REPL session exits; this was a regression from the prior impl. The main reason for this is that otherwise the request task will never unblock if the user steps through the crashed task using 'next' since the `.do_next()` handler doesn't by default release the request since in the `.pause()` case this would end the session too early. Other, - toss in draft `Pdb.user_exception()`, though doesn't seem to ever trigger? - only release `Lock._debug_lock` when already locked. --- tractor/devx/_debug.py | 43 +++++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index da322407..a789c6ce 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -249,7 +249,10 @@ class Lock: message: str = 'TTY lock not held by any child\n' except RuntimeError as rte: - message: str = 'TTY lock FAILED to release for child??\n' + message: str = ( + 'TTY lock FAILED to release for child??\n' + f'{current_task()}\n' + ) log.exception(message) # uhhh makes no sense but been seeing the non-owner @@ -755,6 +758,16 @@ class PdbREPL(pdbp.Pdb): status = DebugStatus + # NOTE: see details in stdlib's `bdb.py` + def user_exception(self, frame, exc_info): + ''' + Called when we stop on an exception. + ''' + log.warning( + 'Exception during REPL sesh\n\n' + f'{frame}\n\n' + f'{exc_info}\n\n' + ) # def preloop(self): # print('IN PRELOOP') @@ -780,7 +793,11 @@ class PdbREPL(pdbp.Pdb): # NOTE: for subactors the stdio lock is released via the # allocated RPC locker task, so for root we have to do it # manually. - if is_root_process(): + if ( + is_root_process() + and + Lock._debug_lock.locked() + ): Lock.release() def set_quit(self): @@ -791,7 +808,11 @@ class PdbREPL(pdbp.Pdb): cancel_req_task=False, ) - if is_root_process(): + if ( + is_root_process() + and + Lock._debug_lock.locked() + ): Lock.release() # TODO: special handling where we just want the next LOC and @@ -803,7 +824,7 @@ class PdbREPL(pdbp.Pdb): # try: # super().set_next(frame) # finally: - # Lock.release() + # pdbp.set_trace() # XXX NOTE: we only override this because apparently the stdlib pdb # bois likes to touch the SIGINT handler as much as i like to touch @@ -1251,7 +1272,7 @@ def shield_sigint_handler( # child actor that has locked the debugger elif not is_root_process(): - log.warning( + log.debug( f'Subactor {actor.uid} handling SIGINT\n\n' f'{Lock.repr()}\n' ) @@ -1484,8 +1505,11 @@ async def _pause( ): # re-entrant root process already has it: noop. log.warning( - f'{task.name}@{actor.uid} already has TTY lock\n' - f'ignoring..' + f'This root actor task is already within an active REPL session\n' + f'Ignoring this re-entered `tractor.pause()`\n' + f'task: {task.name}\n' + f'REPL: {Lock.repl}\n' + # TODO: use `._frame_stack` scanner to find the @api_frame ) await trio.lowlevel.checkpoint() return @@ -1609,6 +1633,7 @@ async def _pause( log.exception( 'Failed to engage debugger via `_pause()` ??\n' ) + mk_pdb().set_trace() DebugStatus.release() # sanity checks for ^ on request/status teardown @@ -1926,6 +1951,10 @@ def _post_mortem( # frame=None, traceback=tb, ) + # Since we presume the post-mortem was enaged to a task-ending + # error, we MUST release the local REPL request so that not other + # local task nor the root remains blocked! + DebugStatus.release() async def post_mortem( -- 2.34.1 From 76563264848645a2f033295970dc2e53199b975b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 14 May 2024 15:22:13 -0400 Subject: [PATCH 104/190] Make `request_root_stdio_lock()` post-mortem-able Finally got this working so that if/when an internal bug is introduced to this request task-func, we can actually REPL-debug the lock request task itself B) As in, if the subactor's lock request task internally errors we, - ensure the task always terminates (by calling `DebugStatus.release()`) and explicitly reports (via a `log.exception()`) the internal error. - capture the error instance and set as a new `DebugStatus.req_err` and always check for it on final teardown - in which case we also, - ensure it's reraised from a new `DebugRequestError`. - unhide the stack frames for `_pause()`, `_enter_repl_sync()` so that the dev can upward inspect the `_pause()` call stack sanely. Supporting internal impl changes, - add `DebugStatus.cancel()` and `.req_err`. - don't ever cancel the request task from `PdbREPL.set_[continue/quit]()` only when there's some internal error that would likely result in a hang and stale lock state with the root. - only release the root's lock when the current ask is also the owner (avoids bad release errors). - also show internal `._pause()`-related frames on any `repl_err`. Other temp-dev-tweaks, - make pld-dec change log msgs info level again while solving this final context-vars race stuff.. - drop the debug pld-dec instance match asserts for now since the problem is already caught (and now debug-able B) by an attr-error on the decoded-as-`dict` started msg, and instead add in a `log.exception()` trace to see which task is triggering the case where the debug `MsgDec` isn't set correctly vs. when we think it's being applied. --- tractor/devx/_debug.py | 341 +++++++++++++++++++++++++---------------- 1 file changed, 207 insertions(+), 134 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index a789c6ce..1e82122c 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -234,49 +234,26 @@ class Lock: cls, force: bool = False, ): - lock: trio.StrictFIFOLock = cls._debug_lock try: - if lock.locked(): + lock: trio.StrictFIFOLock = cls._debug_lock + owner: Task = lock.statistics().owner + if ( + lock.locked() + and + owner is current_task() + # ^-NOTE-^ if not will raise a RTE.. + ): if not DebugStatus.is_main_trio_thread(): trio.from_thread.run_sync( cls._debug_lock.release ) else: cls._debug_lock.release() + message: str = 'TTY lock released for child\n' - message: str = 'TTY lock released for child\n' else: message: str = 'TTY lock not held by any child\n' - except RuntimeError as rte: - message: str = ( - 'TTY lock FAILED to release for child??\n' - f'{current_task()}\n' - ) - log.exception(message) - - # uhhh makes no sense but been seeing the non-owner - # release error even though this is definitely the task - # that locked? - owner = cls._debug_lock.statistics().owner - # if ( - # owner - # and - # cls.remote_task_in_debug is None - # ): - # raise RuntimeError( - # 'Stale `Lock` detected, no remote task active!?\n' - # f'|_{owner}\n' - # # f'{cls}' - # ) from rte - - if owner: - raise rte - - # OW suppress, can't member why tho .. XD - # something somethin corrupts a cancel-scope - # somewhere.. - finally: # IFF there are no more requesting tasks queued up fire, the # "tty-unlocked" event thereby alerting any monitors of the lock that @@ -518,11 +495,23 @@ async def lock_tty_for_child( locked=False, ) - except BaseException: - log.exception( - 'Errored during root TTY-lock dialog?\n' - 'Forcing release since an internal error caused this!\n' + except BaseException as req_err: + message: str = ( + 'Forcing `Lock.release()` since likely an internal error!\n' ) + if isinstance(req_err, trio.Cancelled): + log.cancel( + 'Cancelled during root TTY-lock dialog?\n' + + + message + ) + else: + log.exception( + 'Errored during root TTY-lock dialog?\n' + + + message + ) + Lock.release(force=True) raise @@ -555,6 +544,7 @@ class DebugStatus: repl_release: trio.Event|None = None req_finished: trio.Event|None = None lock_status: LockStatus|None = None + req_err: BaseException|None = None _orig_sigint_handler: Callable|None = None _trio_handler: ( @@ -693,28 +683,37 @@ class DebugStatus: # is not threading.main_thread() # ) + @classmethod + def cancel(cls) -> bool: + if (req_cs := cls.req_cs): + req_cs.cancel() + return True + + return False + @classmethod @pdbp.hideframe def release( cls, - cancel_req_task: bool = True, + cancel_req_task: bool = False, ): + repl_release: trio.Event = cls.repl_release try: # sometimes the task might already be terminated in # which case this call will raise an RTE? - if cls.repl_release is not None: - cls.repl_release.set() + if repl_release is not None: + repl_release.set() finally: # if req_ctx := cls.req_ctx: # req_ctx._scope.cancel() - - if ( - cancel_req_task - and - (req_cs := cls.req_cs) - ): - req_cs.cancel() + if cancel_req_task: + cancelled: bool = cls.cancel() + if not cancelled: + log.warning( + 'Failed to cancel request task!?\n' + f'{cls.repl_task}\n' + ) # restore original sigint handler cls.unshield_sigint() @@ -759,16 +758,19 @@ class PdbREPL(pdbp.Pdb): status = DebugStatus # NOTE: see details in stdlib's `bdb.py` - def user_exception(self, frame, exc_info): - ''' - Called when we stop on an exception. - ''' - log.warning( - 'Exception during REPL sesh\n\n' - f'{frame}\n\n' - f'{exc_info}\n\n' - ) + # def user_exception(self, frame, exc_info): + # ''' + # Called when we stop on an exception. + # ''' + # log.warning( + # 'Exception during REPL sesh\n\n' + # f'{frame}\n\n' + # f'{exc_info}\n\n' + # ) + # NOTE: this actually hooks but i don't see anyway to detect + # if an error was caught.. this is why currently we just always + # call `DebugStatus.release` inside `_post_mortem()`. # def preloop(self): # print('IN PRELOOP') # super().preloop() @@ -804,10 +806,7 @@ class PdbREPL(pdbp.Pdb): try: super().set_quit() finally: - DebugStatus.release( - cancel_req_task=False, - ) - + DebugStatus.release() if ( is_root_process() and @@ -863,7 +862,6 @@ def apply_debug_pldec() -> _codec.MsgCodec: (only in the current task). ''' - from tractor.msg import ( _ops as msgops, ) @@ -874,8 +872,12 @@ def apply_debug_pldec() -> _codec.MsgCodec: with msgops.limit_plds( spec=__pld_spec__, ) as debug_dec: - assert debug_dec is msgops.current_pldrx().pld_dec - log.runtime( + assert ( + debug_dec + is + msgops.current_pldrx().pld_dec + ) + log.info( 'Applied `.devx._debug` pld-spec\n\n' f'{debug_dec}\n' ) @@ -887,11 +889,12 @@ def apply_debug_pldec() -> _codec.MsgCodec: and plrx.pld_dec is orig_pldec ) - log.runtime( + log.info( 'Reverted to previous pld-spec\n\n' f'{orig_pldec}\n' ) + async def request_root_stdio_lock( actor_uid: tuple[str, str], task_uid: tuple[str, int], @@ -911,6 +914,10 @@ async def request_root_stdio_lock( entering the REPL at the same time. ''' + + log.pdb( + 'Initing stdio-lock request task with root actor' + ) # TODO: likely we can implement this mutex more generally as # a `._sync.Lock`? # -[ ] simply add the wrapping needed for the debugger specifics? @@ -923,6 +930,8 @@ async def request_root_stdio_lock( DebugStatus.req_finished = trio.Event() try: from tractor._discovery import get_root + from tractor.msg import _ops as msgops + debug_dec: msgops.MsgDec with ( # NOTE: we need this to ensure that this task exits # BEFORE the REPl instance raises an error like @@ -953,12 +962,13 @@ async def request_root_stdio_lock( # apply_debug_pldec() as debug_dec, ): - log.critical( - 'Request cancel-scope is:\n\n' - f'{pformat_cs(req_cs, var_name="req_cs")}\n\n' - - ) + # XXX: was orig for debugging cs stack corruption.. + # log.info( + # 'Request cancel-scope is:\n\n' + # f'{pformat_cs(req_cs, var_name="req_cs")}\n\n' + # ) DebugStatus.req_cs = req_cs + req_ctx: Context|None = None try: # TODO: merge into single async with ? async with get_root() as portal: @@ -966,31 +976,37 @@ async def request_root_stdio_lock( async with portal.open_context( lock_tty_for_child, subactor_task_uid=task_uid, - ) as (ctx, status): + ) as (req_ctx, status): - DebugStatus.req_ctx = ctx + DebugStatus.req_ctx = req_ctx + + # sanity checks on pld-spec limit state + assert debug_dec + # curr_pldrx: msgops.PldRx = msgops.current_pldrx() + # assert ( + # curr_pldrx.pld_dec is debug_dec + # ) - from tractor.msg import ( - _ops as msgops, - ) - assert ( - msgops.current_pldrx().pld_dec is debug_dec - ) log.debug( 'Subactor locked TTY with msg\n\n' f'{status}\n' ) # mk_pdb().set_trace() - assert status.subactor_uid == actor_uid - assert status.cid + try: + assert status.subactor_uid == actor_uid + assert status.cid + except AttributeError: + log.exception('failed pldspec asserts!') + raise # set last rxed lock dialog status. DebugStatus.lock_status = status - async with ctx.open_stream() as stream: + async with req_ctx.open_stream() as stream: + assert DebugStatus.repl_release - task_status.started(ctx) + task_status.started(req_ctx) # wait for local task to exit its # `PdbREPL.interaction()`, call @@ -1006,25 +1022,25 @@ async def request_root_stdio_lock( # sync with child-side root locker task # completion - status: LockStatus = await ctx.result() + status: LockStatus = await req_ctx.result() assert not status.locked DebugStatus.lock_status = status log.pdb( 'TTY lock was released for subactor with msg\n\n' f'{status}\n\n' - f'Exitting {ctx.side!r}-side of locking ctx' + f'Exitting {req_ctx.side!r}-side of locking req_ctx' ) except ( tractor.ContextCancelled, trio.Cancelled, ): - log.exception( - 'Debug lock request CANCELLED?\n\n' - f'{pformat_cs(req_cs, var_name="req_cs")}\n\n' - f'{pformat_cs(ctx._scope, var_name="ctx._scope")}\n\n' - f'{ctx}' + log.cancel( + 'Debug lock request was CANCELLED?\n\n' + f'{req_ctx}\n' + # f'{pformat_cs(req_cs, var_name="req_cs")}\n\n' + # f'{pformat_cs(req_ctx._scope, var_name="req_ctx._scope")}\n\n' ) raise @@ -1033,11 +1049,11 @@ async def request_root_stdio_lock( ): log.exception( 'Failed during root TTY-lock dialog?\n' - f'{ctx}\n' + f'{req_ctx}\n' f'Cancelling IPC ctx!\n' ) - await ctx.cancel() + await req_ctx.cancel() raise @@ -1047,13 +1063,26 @@ async def request_root_stdio_lock( ): log.cancel( 'Debug lock request CANCELLED?\n' - f'{ctx}\n' + f'{req_ctx}\n' ) raise - except BaseException: - log.exception('Errored during root TTY-lock dialog?') - raise + except BaseException as req_err: + # log.error('Failed to request root stdio-lock?') + DebugStatus.req_err = req_err + DebugStatus.release() + + # TODO: how to dev a test that ensures we actually drop + # into THIS internal frame on any internal error in the above + # code? + # -[ ] eg. on failed pld_dec assert above we should be able + # to REPL pm it. + # -[ ]FURTHER, after we 'continue', we should be able to + # ctl-c out of the currently hanging task! + raise DebugRequestError( + 'Failed to lock stdio from subactor IPC ctx!\n\n' + f'req_ctx: {req_ctx}\n' + ) from req_err finally: log.debug('Exiting debugger TTY lock request func from child') @@ -1369,6 +1398,13 @@ def shield_sigint_handler( _pause_msg: str = 'Attaching to pdb REPL in actor' +class DebugRequestError(RuntimeError): + ''' + Failed to request stdio lock from root actor! + + ''' + + async def _pause( debug_func: Callable|None, @@ -1480,15 +1516,18 @@ async def _pause( raise except BaseException: + __tracebackhide__: bool = False log.exception( 'Failed to invoke internal `debug_func = ' f'{debug_func.func.__name__}`\n' ) # NOTE: OW this is ONLY called from the # `.set_continue/next` hooks! - DebugStatus.release() + DebugStatus.release(cancel_req_task=True) + raise + repl_err: BaseException|None = None try: if is_root_process(): @@ -1584,43 +1623,45 @@ async def _pause( # actor._service_n.cancel_scope.shield = shield # ``` # but not entirely sure if that's a sane way to implement it? - try: - # NOTE spawn the stdio locker request task inside the - # current `Context._scope_nursery` to entsure that - # the request never can outlive the task's (parent) - # lifetime. - curr_ctx: Context = current_ipc_ctx() - # TODO: see `_errors_relayed_via_ipc()` where we - # should dynamically open a `debug_tn` for use here, - # BUT it needs to be outside the normal error - # catching and `_maybe_enter_debugger()` call! - # ctx: Context = await curr_ctx._debug_tn.start( - ctx: Context = await actor._service_n.start( - request_root_stdio_lock, - actor.uid, - (task.name, id(task)), # task uuid (effectively) - ) - # our locker task should be the one in ctx - # with the root actor - assert ( - ctx - is - DebugStatus.req_ctx - is not - curr_ctx - ) - # enter REPL - _enter_repl_sync(debug_func) + # NOTE currently we spawn the lock request task inside this + # subactor's global `Actor._service_n` so that the + # lifetime of the lock-request can outlive the current + # `._pause()` scope while the user steps through their + # application code and when they finally exit the + # session, via 'continue' or 'quit' cmds, the `PdbREPL` + # will manually call `DebugStatus.release()` to release + # the lock session with the root actor. + # + # TODO: ideally we can add a tighter scope for this + # request task likely by conditionally opening a "debug + # nursery" inside `_errors_relayed_via_ipc()`, see the + # todo in tht module, but + # -[ ] it needs to be outside the normal crash handling + # `_maybe_enter_debugger()` block-call. + # -[ ] we probably only need to allocate the nursery when + # we detect the runtime is already in debug mode. + # + # ctx: Context = await curr_ctx._debug_tn.start( + req_ctx: Context = await actor._service_n.start( + request_root_stdio_lock, + actor.uid, + (task.name, id(task)), # task uuid (effectively) + ) + # XXX sanity, our locker task should be the one which + # entered a new IPC ctx with the root actor, NOT the one + # that exists around the task calling into `._pause()`. + curr_ctx: Context = current_ipc_ctx() + assert ( + req_ctx + is + DebugStatus.req_ctx + is not + curr_ctx + ) - except RuntimeError: - if actor._cancel_called: - # service nursery won't be usable and we - # don't want to lock up the root either way since - # we're in (the midst of) cancellation. - return - - raise + # enter REPL + _enter_repl_sync(debug_func) # TODO: prolly factor this plus the similar block from # `_enter_repl_sync()` into a common @cm? @@ -1629,13 +1670,31 @@ async def _pause( log.devx( 'REPL for pdb was quit!\n' ) + + # when the actor is mid-runtime cancellation the + # `Actor._service_n` might get closed before we can spawn + # the request task, so just ignore expected RTE. + elif ( + isinstance(repl_err, RuntimeError) + and + actor._cancel_called + ): + # service nursery won't be usable and we + # don't want to lock up the root either way since + # we're in (the midst of) cancellation. + log.warning( + 'Service nursery likely closed due to actor-runtime cancellation..\n' + 'Ignoring failed debugger lock request task spawn..\n' + ) + return + else: log.exception( 'Failed to engage debugger via `_pause()` ??\n' ) - mk_pdb().set_trace() - DebugStatus.release() + DebugStatus.release(cancel_req_task=True) + # sanity checks for ^ on request/status teardown assert DebugStatus.repl is None assert DebugStatus.repl_task is None @@ -1645,6 +1704,16 @@ async def _pause( raise + finally: + # always show frame when request fails due to internal + # failure in the above code (including an `BdbQuit`). + if ( + DebugStatus.req_err + or + repl_err + ): + __tracebackhide__: bool = False + def _set_trace( repl: PdbREPL, # passed by `_pause()` @@ -1703,7 +1772,7 @@ async def pause( https://en.wikipedia.org/wiki/Breakpoint ''' - __tracebackhide__: bool = True + __tracebackhide__: bool = hide_tb # always start 1 level up from THIS in user code since normally # `tractor.pause()` is called explicitly by use-app code thus @@ -1885,12 +1954,15 @@ def pause_from_sync( # NOTE prefer a new "pause" semantic since it better describes # "pausing the actor's runtime" for this particular # paralell task to do debugging in a REPL. -async def breakpoint(**kwargs): +async def breakpoint( + hide_tb: bool = True, + **kwargs, +): log.warning( '`tractor.breakpoint()` is deprecated!\n' 'Please use `tractor.pause()` instead!\n' ) - __tracebackhide__: bool = True + __tracebackhide__: bool = hide_tb await pause( api_frame=inspect.currentframe(), **kwargs, @@ -1951,6 +2023,7 @@ def _post_mortem( # frame=None, traceback=tb, ) + # XXX NOTE XXX: absolutely required to avoid hangs! # Since we presume the post-mortem was enaged to a task-ending # error, we MUST release the local REPL request so that not other # local task nor the root remains blocked! -- 2.34.1 From c67a04f978f492e00123480da4ed7de5585a9200 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 20 May 2024 14:34:50 -0400 Subject: [PATCH 105/190] Allocate a `PldRx` per `Context`, new pld-spec API Since the state mgmt becomes quite messy with multiple sub-tasks inside an IPC ctx, AND bc generally speaking the payload-type-spec should map 1-to-1 with the `Context`, it doesn't make a lot of sense to be using `ContextVar`s to modify the `Context.pld_rx: PldRx` instance. Instead, always allocate a full instance inside `mk_context()` with the default `.pld_rx: PldRx` set to use the `msg._ops._def_any_pldec: MsgDec` In support, simplify the `.msg._ops` impl and APIs: - drop `_ctxvar_PldRx`, `_def_pld_rx` and `current_pldrx()`. - rename `PldRx._pldec` -> `._pld_dec`. - rename the unused `PldRx.apply_to_ipc()` -> `.wraps_ipc()`. - add a required `PldRx._ctx: Context` attr since it is needed internally in some meths and each pld-rx now maps to a specific ctx. - modify all recv methods to accept a `ipc: Context|MsgStream` (instead of a `ctx` arg) since both have a ref to the same `._rx_chan` and there are only a couple spots (in `.dec_msg()`) where we need the `ctx` explicitly (which can now be easily accessed via a new `MsgStream.ctx` property, see below). - always show the `.dec_msg()` frame in tbs if there's a reference error when calling `_raise_from_unexpected_msg()` in the fallthrough case. - implement `limit_plds()` as light wrapper around getting the `current_ipc_ctx()` and mutating its `MsgDec` via `Context.pld_rx.limit_plds()`. - add a `maybe_limit_plds()` which just provides an `@acm` equivalent of `limit_plds()` handy for composing in a `async with ():` style block (avoiding additional indent levels in the body of async funcs). Obvi extend the `Context` and `MsgStream` interfaces as needed to match the above: - add a `Context.pld_rx` pub prop. - new private refs to `Context._started_msg: Started` and a `._started_pld` (mostly for internal debugging / testing / logging) and set inside `.open_context()` immediately after the syncing phase. - a `Context.has_outcome() -> bool:` predicate which can be used to more easily determine if the ctx errored or has a final result. - pub props for `MsgStream.ctx: Context` and `.chan: Channel` providing full `ipc`-arg compat with the `PldRx` method signatures. --- tractor/_context.py | 145 ++++++++++++++++++++---------- tractor/_streaming.py | 25 +++++- tractor/msg/_ops.py | 205 +++++++++++++++++++----------------------- 3 files changed, 212 insertions(+), 163 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index b58010b6..152efdee 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -41,6 +41,7 @@ from typing import ( Callable, Mapping, Type, + TypeAlias, TYPE_CHECKING, Union, ) @@ -155,6 +156,41 @@ class Context: # payload receiver _pld_rx: msgops.PldRx + @property + def pld_rx(self) -> msgops.PldRx: + ''' + The current `tractor.Context`'s msg-payload-receiver. + + A payload receiver is the IPC-msg processing sub-sys which + filters inter-actor-task communicated payload data, i.e. the + `PayloadMsg.pld: PayloadT` field value, AFTER its container + shuttlle msg (eg. `Started`/`Yield`/`Return) has been + delivered up from `tractor`'s transport layer but BEFORE the + data is yielded to `tractor` application code. + + The "IPC-primitive API" is normally one of a `Context` (this)` or a `MsgStream` + or some higher level API using one of them. + + For ex. `pld_data: PayloadT = MsgStream.receive()` implicitly + calls into the stream's parent `Context.pld_rx.recv_pld().` to + receive the latest `PayloadMsg.pld` value. + + Modification of the current payload spec via `limit_plds()` + allows a `tractor` application to contextually filter IPC + payload content with a type specification as supported by the + interchange backend. + + - for `msgspec` see . + + Note that the `PldRx` itself is a per-`Context` instance that + normally only changes when some (sub-)task, on a given "side" + of the IPC ctx (either a "child"-side RPC or inside + a "parent"-side `Portal.open_context()` block), modifies it + using the `.msg._ops.limit_plds()` API. + + ''' + return self._pld_rx + # full "namespace-path" to target RPC function _nsf: NamespacePath @@ -231,6 +267,8 @@ class Context: # init and streaming state _started_called: bool = False + _started_msg: MsgType|None = None + _started_pld: Any = None _stream_opened: bool = False _stream: MsgStream|None = None @@ -623,7 +661,7 @@ class Context: log.runtime( 'Setting remote error for ctx\n\n' f'<= {self.peer_side!r}: {self.chan.uid}\n' - f'=> {self.side!r}\n\n' + f'=> {self.side!r}: {self._actor.uid}\n\n' f'{error}' ) self._remote_error: BaseException = error @@ -678,7 +716,7 @@ class Context: log.error( f'Remote context error:\n\n' # f'{pformat(self)}\n' - f'{error}\n' + f'{error}' ) if self._canceller is None: @@ -724,8 +762,10 @@ class Context: ) else: message: str = 'NOT cancelling `Context._scope` !\n\n' + # from .devx import mk_pdb + # mk_pdb().set_trace() - fmt_str: str = 'No `self._scope: CancelScope` was set/used ?' + fmt_str: str = 'No `self._scope: CancelScope` was set/used ?\n' if ( cs and @@ -805,6 +845,7 @@ class Context: # f'{ci.api_nsp}()\n' # ) + # TODO: use `.dev._frame_stack` scanning to find caller! return 'Portal.open_context()' async def cancel( @@ -1304,17 +1345,6 @@ class Context: ctx=self, hide_tb=hide_tb, ) - for msg in drained_msgs: - - # TODO: mask this by default.. - if isinstance(msg, Return): - # from .devx import pause - # await pause() - # raise InternalError( - log.warning( - 'Final `return` msg should never be drained !?!?\n\n' - f'{msg}\n' - ) drained_status: str = ( 'Ctx drained to final outcome msg\n\n' @@ -1435,6 +1465,10 @@ class Context: self._result ) + @property + def has_outcome(self) -> bool: + return bool(self.maybe_error) or self._final_result_is_set() + # @property def repr_outcome( self, @@ -1637,8 +1671,6 @@ class Context: ) if rt_started != started_msg: - # TODO: break these methods out from the struct subtype? - # TODO: make that one a mod func too.. diff = pretty_struct.Struct.__sub__( rt_started, @@ -1674,6 +1706,8 @@ class Context: ) from verr self._started_called = True + self._started_msg = started_msg + self._started_pld = value async def _drain_overflows( self, @@ -1961,6 +1995,7 @@ async def open_context_from_portal( portal: Portal, func: Callable, + pld_spec: TypeAlias|None = None, allow_overruns: bool = False, # TODO: if we set this the wrapping `@acm` body will @@ -2026,7 +2061,7 @@ async def open_context_from_portal( # XXX NOTE XXX: currenly we do NOT allow opening a contex # with "self" since the local feeder mem-chan processing # is not built for it. - if portal.channel.uid == portal.actor.uid: + if (uid := portal.channel.uid) == portal.actor.uid: raise RuntimeError( '** !! Invalid Operation !! **\n' 'Can not open an IPC ctx with the local actor!\n' @@ -2054,32 +2089,45 @@ async def open_context_from_portal( assert ctx._caller_info _ctxvar_Context.set(ctx) - # XXX NOTE since `._scope` is NOT set BEFORE we retreive the - # `Started`-msg any cancellation triggered - # in `._maybe_cancel_and_set_remote_error()` will - # NOT actually cancel the below line! - # -> it's expected that if there is an error in this phase of - # the dialog, the `Error` msg should be raised from the `msg` - # handling block below. - first: Any = await ctx._pld_rx.recv_pld( - ctx=ctx, - expect_msg=Started, - ) - ctx._started_called: bool = True - - uid: tuple = portal.channel.uid - cid: str = ctx.cid - # placeholder for any exception raised in the runtime # or by user tasks which cause this context's closure. scope_err: BaseException|None = None ctxc_from_callee: ContextCancelled|None = None try: - async with trio.open_nursery() as nurse: + async with ( + trio.open_nursery() as tn, + msgops.maybe_limit_plds( + ctx=ctx, + spec=pld_spec, + ) as maybe_msgdec, + ): + if maybe_msgdec: + assert maybe_msgdec.pld_spec == pld_spec - # NOTE: used to start overrun queuing tasks - ctx._scope_nursery: trio.Nursery = nurse - ctx._scope: trio.CancelScope = nurse.cancel_scope + # XXX NOTE since `._scope` is NOT set BEFORE we retreive the + # `Started`-msg any cancellation triggered + # in `._maybe_cancel_and_set_remote_error()` will + # NOT actually cancel the below line! + # -> it's expected that if there is an error in this phase of + # the dialog, the `Error` msg should be raised from the `msg` + # handling block below. + started_msg, first = await ctx._pld_rx.recv_msg_w_pld( + ipc=ctx, + expect_msg=Started, + passthrough_non_pld_msgs=False, + ) + + # from .devx import pause + # await pause() + ctx._started_called: bool = True + ctx._started_msg: bool = started_msg + ctx._started_pld: bool = first + + # NOTE: this in an implicit runtime nursery used to, + # - start overrun queuing tasks when as well as + # for cancellation of the scope opened by the user. + ctx._scope_nursery: trio.Nursery = tn + ctx._scope: trio.CancelScope = tn.cancel_scope # deliver context instance and .started() msg value # in enter tuple. @@ -2126,13 +2174,13 @@ async def open_context_from_portal( # when in allow_overruns mode there may be # lingering overflow sender tasks remaining? - if nurse.child_tasks: + if tn.child_tasks: # XXX: ensure we are in overrun state # with ``._allow_overruns=True`` bc otherwise # there should be no tasks in this nursery! if ( not ctx._allow_overruns - or len(nurse.child_tasks) > 1 + or len(tn.child_tasks) > 1 ): raise InternalError( 'Context has sub-tasks but is ' @@ -2304,8 +2352,8 @@ async def open_context_from_portal( ): log.warning( 'IPC connection for context is broken?\n' - f'task:{cid}\n' - f'actor:{uid}' + f'task: {ctx.cid}\n' + f'actor: {uid}' ) raise # duh @@ -2455,9 +2503,8 @@ async def open_context_from_portal( and ctx.cancel_acked ): log.cancel( - 'Context cancelled by {ctx.side!r}-side task\n' + f'Context cancelled by {ctx.side!r}-side task\n' f'|_{ctx._task}\n\n' - f'{repr(scope_err)}\n' ) @@ -2485,7 +2532,7 @@ async def open_context_from_portal( f'cid: {ctx.cid}\n' ) portal.actor._contexts.pop( - (uid, cid), + (uid, ctx.cid), None, ) @@ -2516,8 +2563,9 @@ def mk_context( from .devx._frame_stack import find_caller_info caller_info: CallerInfo|None = find_caller_info() - # TODO: when/how do we apply `.limit_plds()` from here? - pld_rx: msgops.PldRx = msgops.current_pldrx() + pld_rx = msgops.PldRx( + _pld_dec=msgops._def_any_pldec, + ) ctx = Context( chan=chan, @@ -2531,13 +2579,16 @@ def mk_context( _caller_info=caller_info, **kwargs, ) + pld_rx._ctx = ctx ctx._result = Unresolved return ctx # TODO: use the new type-parameters to annotate this in 3.13? # -[ ] https://peps.python.org/pep-0718/#unknown-types -def context(func: Callable) -> Callable: +def context( + func: Callable, +) -> Callable: ''' Mark an (async) function as an SC-supervised, inter-`Actor`, child-`trio.Task`, IPC endpoint otherwise known more diff --git a/tractor/_streaming.py b/tractor/_streaming.py index dd4cd0e1..a008eaf5 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -52,6 +52,7 @@ from tractor.msg import ( if TYPE_CHECKING: from ._context import Context + from ._ipc import Channel log = get_logger(__name__) @@ -65,10 +66,10 @@ log = get_logger(__name__) class MsgStream(trio.abc.Channel): ''' A bidirectional message stream for receiving logically sequenced - values over an inter-actor IPC ``Channel``. + values over an inter-actor IPC `Channel`. This is the type returned to a local task which entered either - ``Portal.open_stream_from()`` or ``Context.open_stream()``. + `Portal.open_stream_from()` or `Context.open_stream()`. Termination rules: @@ -95,6 +96,22 @@ class MsgStream(trio.abc.Channel): self._eoc: bool|trio.EndOfChannel = False self._closed: bool|trio.ClosedResourceError = False + @property + def ctx(self) -> Context: + ''' + This stream's IPC `Context` ref. + + ''' + return self._ctx + + @property + def chan(self) -> Channel: + ''' + Ref to the containing `Context`'s transport `Channel`. + + ''' + return self._ctx.chan + # TODO: could we make this a direct method bind to `PldRx`? # -> receive_nowait = PldRx.recv_pld # |_ means latter would have to accept `MsgStream`-as-`self`? @@ -109,7 +126,7 @@ class MsgStream(trio.abc.Channel): ): ctx: Context = self._ctx return ctx._pld_rx.recv_pld_nowait( - ctx=ctx, + ipc=self, expect_msg=expect_msg, ) @@ -148,7 +165,7 @@ class MsgStream(trio.abc.Channel): try: ctx: Context = self._ctx - return await ctx._pld_rx.recv_pld(ctx=ctx) + return await ctx._pld_rx.recv_pld(ipc=self) # XXX: the stream terminates on either of: # - via `self._rx_chan.receive()` raising after manual closure diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 3b0b8339..3014c15b 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -22,10 +22,9 @@ operational helpers for processing transaction flows. ''' from __future__ import annotations from contextlib import ( - # asynccontextmanager as acm, + asynccontextmanager as acm, contextmanager as cm, ) -from contextvars import ContextVar from typing import ( Any, Type, @@ -50,6 +49,7 @@ from tractor._exceptions import ( _mk_msg_type_err, pack_from_raise, ) +from tractor._state import current_ipc_ctx from ._codec import ( mk_dec, MsgDec, @@ -75,7 +75,7 @@ if TYPE_CHECKING: log = get_logger(__name__) -_def_any_pldec: MsgDec = mk_dec() +_def_any_pldec: MsgDec[Any] = mk_dec() class PldRx(Struct): @@ -104,15 +104,19 @@ class PldRx(Struct): ''' # TODO: better to bind it here? # _rx_mc: trio.MemoryReceiveChannel - _pldec: MsgDec + _pld_dec: MsgDec + _ctx: Context|None = None _ipc: Context|MsgStream|None = None @property def pld_dec(self) -> MsgDec: - return self._pldec + return self._pld_dec + # TODO: a better name? + # -[ ] when would this be used as it avoids needingn to pass the + # ipc prim to every method @cm - def apply_to_ipc( + def wraps_ipc( self, ipc_prim: Context|MsgStream, @@ -140,49 +144,50 @@ class PldRx(Struct): exit. ''' - orig_dec: MsgDec = self._pldec + orig_dec: MsgDec = self._pld_dec limit_dec: MsgDec = mk_dec(spec=spec) try: - self._pldec = limit_dec + self._pld_dec = limit_dec yield limit_dec finally: - self._pldec = orig_dec + self._pld_dec = orig_dec @property def dec(self) -> msgpack.Decoder: - return self._pldec.dec + return self._pld_dec.dec def recv_pld_nowait( self, # TODO: make this `MsgStream` compat as well, see above^ # ipc_prim: Context|MsgStream, - ctx: Context, + ipc: Context|MsgStream, ipc_msg: MsgType|None = None, expect_msg: Type[MsgType]|None = None, - + hide_tb: bool = False, **dec_msg_kwargs, ) -> Any|Raw: - __tracebackhide__: bool = True + __tracebackhide__: bool = hide_tb msg: MsgType = ( ipc_msg or # sync-rx msg from underlying IPC feeder (mem-)chan - ctx._rx_chan.receive_nowait() + ipc._rx_chan.receive_nowait() ) return self.dec_msg( msg, - ctx=ctx, + ipc=ipc, expect_msg=expect_msg, + hide_tb=hide_tb, **dec_msg_kwargs, ) async def recv_pld( self, - ctx: Context, + ipc: Context|MsgStream, ipc_msg: MsgType|None = None, expect_msg: Type[MsgType]|None = None, hide_tb: bool = True, @@ -200,11 +205,11 @@ class PldRx(Struct): or # async-rx msg from underlying IPC feeder (mem-)chan - await ctx._rx_chan.receive() + await ipc._rx_chan.receive() ) return self.dec_msg( msg=msg, - ctx=ctx, + ipc=ipc, expect_msg=expect_msg, **dec_msg_kwargs, ) @@ -212,7 +217,7 @@ class PldRx(Struct): def dec_msg( self, msg: MsgType, - ctx: Context, + ipc: Context|MsgStream, expect_msg: Type[MsgType]|None, raise_error: bool = True, @@ -225,6 +230,9 @@ class PldRx(Struct): ''' __tracebackhide__: bool = hide_tb + + _src_err = None + src_err: BaseException|None = None match msg: # payload-data shuttle msg; deliver the `.pld` value # directly to IPC (primitive) client-consumer code. @@ -234,7 +242,7 @@ class PldRx(Struct): |Return(pld=pld) # termination phase ): try: - pld: PayloadT = self._pldec.decode(pld) + pld: PayloadT = self._pld_dec.decode(pld) log.runtime( 'Decoded msg payload\n\n' f'{msg}\n\n' @@ -243,25 +251,30 @@ class PldRx(Struct): ) return pld - # XXX pld-type failure - except ValidationError as src_err: + # XXX pld-value type failure + except ValidationError as valerr: + # pack mgterr into error-msg for + # reraise below; ensure remote-actor-err + # info is displayed nicely? msgterr: MsgTypeError = _mk_msg_type_err( msg=msg, codec=self.pld_dec, - src_validation_error=src_err, + src_validation_error=valerr, is_invalid_payload=True, ) msg: Error = pack_from_raise( local_err=msgterr, cid=msg.cid, - src_uid=ctx.chan.uid, + src_uid=ipc.chan.uid, ) + src_err = valerr # XXX some other decoder specific failure? # except TypeError as src_error: # from .devx import mk_pdb # mk_pdb().set_trace() # raise src_error + # ^-TODO-^ can remove? # a runtime-internal RPC endpoint response. # always passthrough since (internal) runtime @@ -299,6 +312,7 @@ class PldRx(Struct): return src_err case Stop(cid=cid): + ctx: Context = getattr(ipc, 'ctx', ipc) message: str = ( f'{ctx.side!r}-side of ctx received stream-`Stop` from ' f'{ctx.peer_side!r} peer ?\n' @@ -341,14 +355,21 @@ class PldRx(Struct): # |_https://docs.python.org/3.11/library/exceptions.html#BaseException.add_note # # fallthrough and raise from `src_err` - _raise_from_unexpected_msg( - ctx=ctx, - msg=msg, - src_err=src_err, - log=log, - expect_msg=expect_msg, - hide_tb=hide_tb, - ) + try: + _raise_from_unexpected_msg( + ctx=getattr(ipc, 'ctx', ipc), + msg=msg, + src_err=src_err, + log=log, + expect_msg=expect_msg, + hide_tb=hide_tb, + ) + except UnboundLocalError: + # XXX if there's an internal lookup error in the above + # code (prolly on `src_err`) we want to show this frame + # in the tb! + __tracebackhide__: bool = False + raise async def recv_msg_w_pld( self, @@ -378,52 +399,13 @@ class PldRx(Struct): # msg instance? pld: PayloadT = self.dec_msg( msg, - ctx=ipc, + ipc=ipc, expect_msg=expect_msg, **kwargs, ) return msg, pld -# Always maintain a task-context-global `PldRx` -_def_pld_rx: PldRx = PldRx( - _pldec=_def_any_pldec, -) -_ctxvar_PldRx: ContextVar[PldRx] = ContextVar( - 'pld_rx', - default=_def_pld_rx, -) - - -def current_pldrx() -> PldRx: - ''' - Return the current `trio.Task.context`'s msg-payload-receiver. - - A payload receiver is the IPC-msg processing sub-sys which - filters inter-actor-task communicated payload data, i.e. the - `PayloadMsg.pld: PayloadT` field value, AFTER it's container - shuttlle msg (eg. `Started`/`Yield`/`Return) has been delivered - up from `tractor`'s transport layer but BEFORE the data is - yielded to application code, normally via an IPC primitive API - like, for ex., `pld_data: PayloadT = MsgStream.receive()`. - - Modification of the current payload spec via `limit_plds()` - allows a `tractor` application to contextually filter IPC - payload content with a type specification as supported by - the interchange backend. - - - for `msgspec` see . - - NOTE that the `PldRx` itself is a per-`Context` global sub-system - that normally does not change other then the applied pld-spec - for the current `trio.Task`. - - ''' - # ctx: context = current_ipc_ctx() - # return ctx._pld_rx - return _ctxvar_PldRx.get() - - @cm def limit_plds( spec: Union[Type[Struct]], @@ -439,29 +421,55 @@ def limit_plds( ''' __tracebackhide__: bool = True try: - # sanity on orig settings - orig_pldrx: PldRx = current_pldrx() - orig_pldec: MsgDec = orig_pldrx.pld_dec + curr_ctx: Context = current_ipc_ctx() + rx: PldRx = curr_ctx._pld_rx + orig_pldec: MsgDec = rx.pld_dec - with orig_pldrx.limit_plds( + with rx.limit_plds( spec=spec, **kwargs, ) as pldec: - log.info( + log.runtime( 'Applying payload-decoder\n\n' f'{pldec}\n' ) yield pldec finally: - log.info( + log.runtime( 'Reverted to previous payload-decoder\n\n' f'{orig_pldec}\n' ) - assert ( - (pldrx := current_pldrx()) is orig_pldrx - and - pldrx.pld_dec is orig_pldec - ) + # sanity on orig settings + assert rx.pld_dec is orig_pldec + + +@acm +async def maybe_limit_plds( + ctx: Context, + spec: Union[Type[Struct]]|None = None, + **kwargs, +) -> MsgDec|None: + ''' + Async compat maybe-payload type limiter. + + Mostly for use inside other internal `@acm`s such that a separate + indent block isn't needed when an async one is already being + used. + + ''' + if spec is None: + yield None + return + + # sanity on scoping + curr_ctx: Context = current_ipc_ctx() + assert ctx is curr_ctx + + with ctx._pld_rx.limit_plds(spec=spec) as msgdec: + yield msgdec + + curr_ctx: Context = current_ipc_ctx() + assert ctx is curr_ctx async def drain_to_final_msg( @@ -543,21 +551,12 @@ async def drain_to_final_msg( match msg: # final result arrived! - case Return( - # cid=cid, - # pld=res, - ): - # ctx._result: Any = res - ctx._result: Any = pld + case Return(): log.runtime( 'Context delivered final draining msg:\n' f'{pretty_struct.pformat(msg)}' ) - # XXX: only close the rx mem chan AFTER - # a final result is retreived. - # if ctx._rx_chan: - # await ctx._rx_chan.aclose() - # TODO: ^ we don't need it right? + ctx._result: Any = pld result_msg = msg break @@ -664,24 +663,6 @@ async def drain_to_final_msg( result_msg = msg break # OOOOOF, yeah obvi we need this.. - # XXX we should never really get here - # right! since `._deliver_msg()` should - # always have detected an {'error': ..} - # msg and already called this right!?! - # elif error := unpack_error( - # msg=msg, - # chan=ctx._portal.channel, - # hide_tb=False, - # ): - # log.critical('SHOULD NEVER GET HERE!?') - # assert msg is ctx._cancel_msg - # assert error.msgdata == ctx._remote_error.msgdata - # assert error.ipc_msg == ctx._remote_error.ipc_msg - # from .devx._debug import pause - # await pause() - # ctx._maybe_cancel_and_set_remote_error(error) - # ctx._maybe_raise_remote_err(error) - else: # bubble the original src key error raise -- 2.34.1 From 7dc98084804c91bf4a8bddefd1948d3ffc2d6eca Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 20 May 2024 15:47:01 -0400 Subject: [PATCH 106/190] Expose `tractor.current_ipc_ctx()` at pkg level --- tractor/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tractor/__init__.py b/tractor/__init__.py index bd9b8610..c15a391b 100644 --- a/tractor/__init__.py +++ b/tractor/__init__.py @@ -43,6 +43,7 @@ from ._supervise import ( from ._state import ( current_actor as current_actor, is_root_process as is_root_process, + current_ipc_ctx as current_ipc_ctx, ) from ._exceptions import ( ContextCancelled as ContextCancelled, -- 2.34.1 From 0d4d461c415816a83ef71a7f2d9221d9c4458f25 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 20 May 2024 16:07:57 -0400 Subject: [PATCH 107/190] Adjust `Portal` usage of `Context.pld_rx` Pass the new `ipc` arg and try to show api frames when an unexpected internal error is detected. --- tractor/_portal.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/tractor/_portal.py b/tractor/_portal.py index 700f2fdc..2c676e12 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -166,13 +166,19 @@ class Portal: assert self._expect_result_ctx if self._final_result_msg is None: - ( - self._final_result_msg, - self._final_result_pld, - ) = await self._expect_result_ctx._pld_rx.recv_msg_w_pld( - ipc=self._expect_result_ctx, - expect_msg=Return, - ) + try: + ( + self._final_result_msg, + self._final_result_pld, + ) = await self._expect_result_ctx._pld_rx.recv_msg_w_pld( + ipc=self._expect_result_ctx, + expect_msg=Return, + ) + except BaseException as err: + # TODO: wrap this into `@api_frame` optionally with + # some kinda filtering mechanism like log levels? + __tracebackhide__: bool = False + raise err return self._final_result_pld @@ -306,7 +312,7 @@ class Portal: portal=self, ) return await ctx._pld_rx.recv_pld( - ctx=ctx, + ipc=ctx, expect_msg=Return, ) @@ -325,6 +331,8 @@ class Portal: remote rpc task or a local async generator instance. ''' + __runtimeframe__: int = 1 # noqa + if isinstance(func, str): warnings.warn( "`Portal.run(namespace: str, funcname: str)` is now" @@ -358,7 +366,7 @@ class Portal: portal=self, ) return await ctx._pld_rx.recv_pld( - ctx=ctx, + ipc=ctx, expect_msg=Return, ) -- 2.34.1 From cc6b2d4057a7df08686b6874c77a274b6bbc7526 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 20 May 2024 16:11:59 -0400 Subject: [PATCH 108/190] Shield channel closing in `_connect_chan()` --- tractor/_ipc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tractor/_ipc.py b/tractor/_ipc.py index 70774bed..511a053c 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -716,4 +716,5 @@ async def _connect_chan( chan = Channel((host, port)) await chan.connect() yield chan - await chan.aclose() + with trio.CancelScope(shield=True): + await chan.aclose() -- 2.34.1 From da770f70d6f0c90b42f43b399c7268ea3ba4b42b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 20 May 2024 16:12:51 -0400 Subject: [PATCH 109/190] Add error suppress flag to `current_ipc_ctx()` --- tractor/_state.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tractor/_state.py b/tractor/_state.py index a3729833..8c5cca14 100644 --- a/tractor/_state.py +++ b/tractor/_state.py @@ -124,9 +124,15 @@ _ctxvar_Context: ContextVar[Context] = ContextVar( ) -def current_ipc_ctx() -> Context: +def current_ipc_ctx( + error_on_not_set: bool = False, +) -> Context|None: ctx: Context = _ctxvar_Context.get() - if not ctx: + + if ( + not ctx + and error_on_not_set + ): from ._exceptions import InternalError raise InternalError( 'No IPC context has been allocated for this task yet?\n' -- 2.34.1 From 117d6177e86e15bdbdeebbf15b73ababbe9bbe12 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 20 May 2024 16:18:42 -0400 Subject: [PATCH 110/190] Set `_ctxvar_Context` for child-side RPC tasks Just inside `._invoke()` after the `ctx: Context` is retrieved. Also try our best to *not hide* internal frames when a non-user-code crash happens, normally either due to a runtime RPC EP bug or a transport failure. --- tractor/_rpc.py | 55 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index b8dc42b6..0f200d0e 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -69,7 +69,6 @@ from .msg import ( from tractor.msg.types import ( CancelAck, Error, - Msg, MsgType, Return, Start, @@ -248,10 +247,17 @@ async def _errors_relayed_via_ipc( ] = trio.TASK_STATUS_IGNORED, ) -> None: + # NOTE: we normally always hide this frame in call-stack tracebacks + # if the crash originated from an RPC task (since normally the + # user is only going to care about their own code not this + # internal runtime frame) and we DID NOT + # fail due to an IPC transport error! __tracebackhide__: bool = hide_tb + # TODO: a debug nursery when in debug mode! # async with maybe_open_debugger_nursery() as debug_tn: # => see matching comment in side `._debug._pause()` + rpc_err: BaseException|None = None try: yield # run RPC invoke body @@ -262,16 +268,7 @@ async def _errors_relayed_via_ipc( BaseExceptionGroup, KeyboardInterrupt, ) as err: - - # NOTE: always hide this frame from debug REPL call stack - # if the crash originated from an RPC task and we DID NOT - # fail due to an IPC transport error! - if ( - is_rpc - and - chan.connected() - ): - __tracebackhide__: bool = hide_tb + rpc_err = err # TODO: maybe we'll want different "levels" of debugging # eventualy such as ('app', 'supervisory', 'runtime') ? @@ -316,11 +313,19 @@ async def _errors_relayed_via_ipc( api_frame=inspect.currentframe(), ) if not entered_debug: + # if we prolly should have entered the REPL but + # didn't, maybe there was an internal error in + # the above code and we do want to show this + # frame! + if _state.debug_mode(): + __tracebackhide__: bool = False + log.exception( 'RPC task crashed\n' f'|_{ctx}' ) + # ALWAYS try to ship RPC errors back to parent/caller task if is_rpc: @@ -353,6 +358,20 @@ async def _errors_relayed_via_ipc( # `Actor._service_n`, we add "handles" to each such that # they can be individually ccancelled. finally: + + # if the error is not from user code and instead a failure + # of a runtime RPC or transport failure we do prolly want to + # show this frame + if ( + rpc_err + and ( + not is_rpc + or + not chan.connected() + ) + ): + __tracebackhide__: bool = False + try: ctx: Context func: Callable @@ -442,9 +461,10 @@ async def _invoke( # open the stream with this option. # allow_overruns=True, ) - context: bool = False + context_ep_func: bool = False - assert not _state._ctxvar_Context.get() + # set the current IPC ctx var for this RPC task + _state._ctxvar_Context.set(ctx) # TODO: deprecate this style.. if getattr(func, '_tractor_stream_function', False): @@ -473,7 +493,7 @@ async def _invoke( # handle decorated ``@tractor.context`` async function elif getattr(func, '_tractor_context_function', False): kwargs['ctx'] = ctx - context = True + context_ep_func = True # errors raised inside this block are propgated back to caller async with _errors_relayed_via_ipc( @@ -499,7 +519,7 @@ async def _invoke( raise # TODO: impl all these cases in terms of the `Context` one! - if not context: + if not context_ep_func: await _invoke_non_context( actor, cancel_scope, @@ -569,7 +589,6 @@ async def _invoke( async with trio.open_nursery() as tn: ctx._scope_nursery = tn ctx._scope = tn.cancel_scope - _state._ctxvar_Context.set(ctx) task_status.started(ctx) # TODO: should would be nice to have our @@ -829,7 +848,7 @@ async def process_messages( (as utilized inside `Portal.cancel_actor()` ). ''' - assert actor._service_n # state sanity + assert actor._service_n # runtime state sanity # TODO: once `trio` get's an "obvious way" for req/resp we # should use it? @@ -842,7 +861,7 @@ async def process_messages( # - https://github.com/aiortc/aioquic/blob/main/src/aioquic/quic/connection.py#L1175 # - https://github.com/aiortc/aioquic/blob/main/src/aioquic/quic/connection.py#L659 nursery_cancelled_before_task: bool = False - msg: Msg|None = None + msg: MsgType|None = None try: # NOTE: this internal scope allows for keeping this # message loop running despite the current task having -- 2.34.1 From 8b85b023f526a7e67079b31700b82b04a79c4fd4 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 20 May 2024 17:04:30 -0400 Subject: [PATCH 111/190] Show runtime nursery frames on internal errors Much like other recent changes attempt to detect runtime-bug-causing crashes and only show the runtime-endpoint frame when present. Adds a `ActorNursery._scope_error: BaseException|None` attr to aid with detection. Also toss in some todo notes for removing and replacing the `.run_in_actor()` method API. --- tractor/_supervise.py | 50 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/tractor/_supervise.py b/tractor/_supervise.py index 59ec728b..8f3574bb 100644 --- a/tractor/_supervise.py +++ b/tractor/_supervise.py @@ -84,6 +84,7 @@ class ActorNursery: ria_nursery: trio.Nursery, da_nursery: trio.Nursery, errors: dict[tuple[str, str], BaseException], + ) -> None: # self.supervisor = supervisor # TODO self._actor: Actor = actor @@ -105,6 +106,7 @@ class ActorNursery: self._at_least_one_child_in_debug: bool = False self.errors = errors self.exited = trio.Event() + self._scope_error: BaseException|None = None # NOTE: when no explicit call is made to # `.open_root_actor()` by application code, @@ -117,7 +119,9 @@ class ActorNursery: async def start_actor( self, name: str, + *, + bind_addrs: list[tuple[str, int]] = [_default_bind_addr], rpc_module_paths: list[str]|None = None, enable_modules: list[str]|None = None, @@ -125,6 +129,7 @@ class ActorNursery: nursery: trio.Nursery|None = None, debug_mode: bool|None = None, infect_asyncio: bool = False, + ) -> Portal: ''' Start a (daemon) actor: an process that has no designated @@ -189,6 +194,13 @@ class ActorNursery: ) ) + # TODO: DEPRECATE THIS: + # -[ ] impl instead as a hilevel wrapper on + # top of a `@context` style invocation. + # |_ dynamic @context decoration on child side + # |_ implicit `Portal.open_context() as (ctx, first):` + # and `return first` on parent side. + # -[ ] use @api_frame on the wrapper async def run_in_actor( self, @@ -221,7 +233,7 @@ class ActorNursery: # use the explicit function name if not provided name = fn.__name__ - portal = await self.start_actor( + portal: Portal = await self.start_actor( name, enable_modules=[mod_path] + ( enable_modules or rpc_module_paths or [] @@ -250,6 +262,7 @@ class ActorNursery: ) return portal + # @api_frame async def cancel( self, hard_kill: bool = False, @@ -346,7 +359,12 @@ async def _open_and_supervise_one_cancels_all_nursery( actor: Actor, ) -> typing.AsyncGenerator[ActorNursery, None]: - __tracebackhide__ = True + + # normally don't need to show user by default + __tracebackhide__: bool = True + + outer_err: BaseException|None = None + inner_err: BaseException|None = None # the collection of errors retreived from spawned sub-actors errors: dict[tuple[str, str], BaseException] = {} @@ -356,7 +374,7 @@ async def _open_and_supervise_one_cancels_all_nursery( # handling errors that are generated by the inner nursery in # a supervisor strategy **before** blocking indefinitely to wait for # actors spawned in "daemon mode" (aka started using - # ``ActorNursery.start_actor()``). + # `ActorNursery.start_actor()`). # errors from this daemon actor nursery bubble up to caller async with trio.open_nursery() as da_nursery: @@ -391,7 +409,8 @@ async def _open_and_supervise_one_cancels_all_nursery( ) an._join_procs.set() - except BaseException as inner_err: + except BaseException as _inner_err: + inner_err = _inner_err errors[actor.uid] = inner_err # If we error in the root but the debugger is @@ -469,8 +488,10 @@ async def _open_and_supervise_one_cancels_all_nursery( Exception, BaseExceptionGroup, trio.Cancelled + ) as _outer_err: + outer_err = _outer_err - ) as err: + an._scope_error = outer_err or inner_err # XXX: yet another guard before allowing the cancel # sequence in case a (single) child is in debug. @@ -485,7 +506,7 @@ async def _open_and_supervise_one_cancels_all_nursery( if an._children: log.cancel( 'Actor-nursery cancelling due error type:\n' - f'{err}\n' + f'{outer_err}\n' ) with trio.CancelScope(shield=True): await an.cancel() @@ -512,6 +533,13 @@ async def _open_and_supervise_one_cancels_all_nursery( else: raise list(errors.values())[0] + # show frame on any (likely) internal error + if ( + not an.cancelled + and an._scope_error + ): + __tracebackhide__: bool = False + # da_nursery scope end - nursery checkpoint # final exit @@ -537,7 +565,7 @@ async def open_nursery( which cancellation scopes correspond to each spawned subactor set. ''' - __tracebackhide__ = True + __tracebackhide__: bool = True implicit_runtime: bool = False actor: Actor = current_actor(err_on_no_runtime=False) an: ActorNursery|None = None @@ -588,6 +616,14 @@ async def open_nursery( an.exited.set() finally: + # show frame on any internal runtime-scope error + if ( + an + and not an.cancelled + and an._scope_error + ): + __tracebackhide__: bool = False + msg: str = ( 'Actor-nursery exited\n' f'|_{an}\n' -- 2.34.1 From e1857413a3ec7888fc7c9a7d0be85dafa7f54b8b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 21 May 2024 09:19:56 -0400 Subject: [PATCH 112/190] Resolve remaining debug-request race causing hangs More or less by pedantically separating and managing root and subactor request syncing events to always be managed by the locking IPC context task-funcs: - for the root's "child"-side, `lock_tty_for_child()` directly creates and sets a new `Lock.req_handler_finished` inside a `finally:` - for the sub's "parent"-side, `request_root_stdio_lock()` does the same with a new `DebugStatus.req_finished` event and separates it from the `.repl_release` event (which indicates a "c" or "q" from user and thus exit of the REPL session) as well as sets a new `.req_task: trio.Task` to explicitly distinguish from the app-user-task that enters the REPL vs. the paired bg task used to request the global root's stdio mutex alongside it. - apply the `__pld_spec__` on "child"-side of the ctx using the new `Portal.open_context(pld_spec)` parameter support; drops use of any `ContextVar` malarky used prior for `PldRx` mgmt. - removing `Lock.no_remote_has_tty` since it was a nebulous name and from the prior "everything is in a `Lock`" design.. ------ - ------ More rigorous impl to handle various edge cases in `._pause()`: - rejig `_enter_repl_sync()` to wrap the `debug_func == None` case inside maybe-internal-error handler blocks. - better logic for recurrent vs. multi-task contention for REPL entry in subactors, by guarding using `DebugStatus.req_task` and by now waiting on the new `DebugStatus.req_finished` for the multi-task contention case. - even better internal error handling and reporting for when this code is hacked on and possibly broken ;p ------ - ------ Updates to `.pause_from_sync()` support: - add optional `actor`, `task` kwargs to `_set_trace()` to allow compat with the new explicit `debug_func` calling in `._pause()` and pass a `threading.Thread` for `task` in the `.to_thread()` usage case. - add an `except` block that tries to show the frame on any internal error. ------ - ------ Relatedly includes a buncha cleanups/simplifications somewhat in prep for some coming refinements (around `DebugStatus`): - use all the new attrs mentioned above as needed in the SIGINT shielder. - wait on `Lock.req_handler_finished` in `maybe_wait_for_debugger()`. - dropping a ton of masked legacy code left in during the recent reworks. - better comments, like on the use of `Context._scope` for shielding on the "child"-side to avoid the need to manage yet another cs. - add/change-to lotsa `log.devx()` level emissions for those infos which are handy while hacking on the debugger but not ideal/necessary to be user visible. - obvi add lotsa follow up todo notes! --- tractor/devx/_debug.py | 824 ++++++++++++++++++++++------------------- 1 file changed, 446 insertions(+), 378 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 1e82122c..877d2de6 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -73,10 +73,10 @@ from tractor._state import ( debug_mode, current_ipc_ctx, ) -from .pformat import ( - # pformat_caller_frame, - pformat_cs, -) +# from .pformat import ( +# pformat_caller_frame, +# pformat_cs, +# ) if TYPE_CHECKING: from tractor._ipc import Channel @@ -190,8 +190,8 @@ class Lock: # a stale lock condition (eg. IPC failure with the locking # child ctx_in_debug: Context|None = None + req_handler_finished: trio.Event|None = None - no_remote_has_tty: trio.Event|None = None _debug_lock: trio.StrictFIFOLock = trio.StrictFIFOLock() _blocked: set[ tuple[str, str] # `Actor.uid` for per actor @@ -209,13 +209,12 @@ class Lock: if is_root_process(): lock_stats: trio.LockStatistics = cls._debug_lock.statistics() fields += ( - f'no_remote_has_tty: {cls.no_remote_has_tty}\n' + f'req_handler_finished: {cls.req_handler_finished}\n' + f'_blocked: {cls._blocked}\n\n' - - f'ctx_in_debug: {cls.ctx_in_debug}\n\n' - f'_debug_lock: {cls._debug_lock}\n' f'lock_stats: {lock_stats}\n' + ) body: str = textwrap.indent( @@ -225,7 +224,9 @@ class Lock: return ( f'<{cls.__name__}(\n' f'{body}' - ')>' + ')>\n\n' + + f'{cls.ctx_in_debug}\n' ) @classmethod @@ -234,16 +235,23 @@ class Lock: cls, force: bool = False, ): + message: str = 'TTY lock not held by any child\n' + + if not (is_trio_main := DebugStatus.is_main_trio_thread()): + task: threading.Thread = threading.current_thread() + else: + task: trio.Task = current_task() + try: lock: trio.StrictFIFOLock = cls._debug_lock owner: Task = lock.statistics().owner if ( lock.locked() and - owner is current_task() + owner is task # ^-NOTE-^ if not will raise a RTE.. ): - if not DebugStatus.is_main_trio_thread(): + if not is_trio_main: trio.from_thread.run_sync( cls._debug_lock.release ) @@ -251,45 +259,27 @@ class Lock: cls._debug_lock.release() message: str = 'TTY lock released for child\n' - else: - message: str = 'TTY lock not held by any child\n' - finally: # IFF there are no more requesting tasks queued up fire, the # "tty-unlocked" event thereby alerting any monitors of the lock that # we are now back in the "tty unlocked" state. This is basically # and edge triggered signal around an empty queue of sub-actor # tasks that may have tried to acquire the lock. - stats = cls._debug_lock.statistics() + lock_stats = cls._debug_lock.statistics() + req_handler_finished: trio.Event|None = Lock.req_handler_finished if ( - not stats.owner + not lock_stats.owner or force - # and cls.no_remote_has_tty is not None + and req_handler_finished is None ): message += '-> No more child ctx tasks hold the TTY lock!\n' - # set and release - if cls.no_remote_has_tty is not None: - cls.no_remote_has_tty.set() - cls.no_remote_has_tty = None - - # cls.remote_task_in_debug = None - - else: - message += ( - f'-> Not signalling `Lock.no_remote_has_tty` since it has value:{cls.no_remote_has_tty}\n' - ) - - else: - # wakeup any waiters since the lock was released - # (presumably) temporarily. - if no_remote_has_tty := cls.no_remote_has_tty: - no_remote_has_tty.set() - no_remote_has_tty = trio.Event() - + elif req_handler_finished: + req_stats = req_handler_finished.statistics() message += ( f'-> A child ctx task still owns the `Lock` ??\n' - f' |_owner task: {stats.owner}\n' + f' |_lock_stats: {lock_stats}\n' + f' |_req_stats: {req_stats}\n' ) cls.ctx_in_debug = None @@ -299,8 +289,6 @@ class Lock: async def acquire( cls, ctx: Context, - # subactor_uid: tuple[str, str], - # remote_task_uid: str, ) -> AsyncIterator[trio.StrictFIFOLock]: ''' @@ -328,7 +316,6 @@ class Lock: ) stats = cls._debug_lock.statistics() if owner := stats.owner: - # and cls.no_remote_has_tty is not None pre_msg += ( f'\n' f'`Lock` already held by local task?\n' @@ -347,12 +334,6 @@ class Lock: await cls._debug_lock.acquire() cls.ctx_in_debug = ctx we_acquired = True - if cls.no_remote_has_tty is None: - # mark the tty lock as being in use so that the runtime - # can try to avoid clobbering any connection from a child - # that's currently relying on it. - cls.no_remote_has_tty = trio.Event() - # cls.remote_task_in_debug = remote_task_uid log.runtime( f'TTY lock acquired for sub-actor\n' @@ -373,11 +354,7 @@ class Lock: finally: message :str = 'Exiting `Lock.acquire()` on behalf of sub-actor\n' - if ( - we_acquired - # and - # cls._debug_lock.locked() - ): + if we_acquired: message += '-> TTY lock released by child\n' cls.release() @@ -392,7 +369,6 @@ class Lock: @tractor.context async def lock_tty_for_child( - ctx: Context, subactor_task_uid: tuple[str, int], @@ -409,13 +385,11 @@ async def lock_tty_for_child( ''' subactor_uid: tuple[str, str] = ctx.chan.uid - # NOTE: we use the IPC ctx's cancel scope directly in order to - # ensure that on any transport failure, or cancellation request - # from the child we expect - # `Context._maybe_cancel_and_set_remote_error()` to cancel this - # scope despite the shielding we apply below. - debug_lock_cs: CancelScope = ctx._scope + # mark the tty lock as being in use so that the runtime + # can try to avoid clobbering any connection from a child + # that's currently relying on it. + we_finished = Lock.req_handler_finished = trio.Event() try: if ctx.cid in Lock._blocked: raise RuntimeError( @@ -437,18 +411,15 @@ async def lock_tty_for_child( f'remote task: {subactor_task_uid}\n' ) ctx._enter_debugger_on_cancel: bool = False - await ctx.cancel(f'Debug lock blocked for {subactor_uid}') - # TODO: remove right? - # return LockStatus( - # subactor_uid=subactor_uid, - # cid=ctx.cid, - # locked=False, - # ) + message: str = ( + f'Debug lock blocked for {subactor_uid}\n' + 'Cancelling debug request!\n' + ) + log.cancel(message) + await ctx.cancel() + raise DebugRequestError(message) - # TODO: when we get to true remote debugging - # this will deliver stdin data? - - log.debug( + log.devx( 'Subactor attempting to acquire TTY lock\n' f'root task: {root_task_name}\n' f'subactor_uid: {subactor_uid}\n' @@ -456,13 +427,33 @@ async def lock_tty_for_child( ) DebugStatus.shield_sigint() Lock._blocked.add(ctx.cid) - with ( - # enable the locking msgspec - apply_debug_pldec(), - ): + + # NOTE: we use the IPC ctx's cancel scope directly in order to + # ensure that on any transport failure, or cancellation request + # from the child we expect + # `Context._maybe_cancel_and_set_remote_error()` to cancel this + # scope despite the shielding we apply below. + debug_lock_cs: CancelScope = ctx._scope + + # TODO: use `.msg._ops.maybe_limit_plds()` here instead so we + # can merge into a single async with, with the + # `Lock.acquire()` enter below? + # + # enable the locking msgspec + with apply_debug_pldec(): async with Lock.acquire(ctx=ctx): debug_lock_cs.shield = True + log.devx( + 'Subactor acquired debugger request lock!\n' + f'root task: {root_task_name}\n' + f'subactor_uid: {subactor_uid}\n' + f'remote task: {subactor_task_uid}\n\n' + + 'Sending `ctx.started(LockStatus)`..\n' + + ) + # indicate to child that we've locked stdio await ctx.started( LockStatus( @@ -472,7 +463,9 @@ async def lock_tty_for_child( ) ) - log.debug( f'Actor {subactor_uid} acquired TTY lock') + log.devx( + f'Actor {subactor_uid} acquired `Lock` via debugger request' + ) # wait for unlock pdb by child async with ctx.open_stream() as stream: @@ -480,14 +473,16 @@ async def lock_tty_for_child( # TODO: security around only releasing if # these match? - log.pdb( + log.devx( f'TTY lock released requested\n\n' f'{release_msg}\n' ) assert release_msg.cid == ctx.cid assert release_msg.subactor_uid == tuple(subactor_uid) - log.debug(f'Actor {subactor_uid} released TTY lock') + log.devx( + f'Actor {subactor_uid} released TTY lock' + ) return LockStatus( subactor_uid=subactor_uid, @@ -497,29 +492,33 @@ async def lock_tty_for_child( except BaseException as req_err: message: str = ( - 'Forcing `Lock.release()` since likely an internal error!\n' + 'Forcing `Lock.release()` for req-ctx since likely an ' + 'internal error!\n\n' + f'{ctx}' ) if isinstance(req_err, trio.Cancelled): - log.cancel( + message = ( 'Cancelled during root TTY-lock dialog?\n' + message ) else: - log.exception( + message = ( 'Errored during root TTY-lock dialog?\n' + message ) + log.exception(message) Lock.release(force=True) raise finally: Lock._blocked.remove(ctx.cid) - if (no_locker := Lock.no_remote_has_tty): - no_locker.set() + # wakeup any waiters since the lock was (presumably) + # released, possibly only temporarily. + we_finished.set() DebugStatus.unshield_sigint() @@ -538,14 +537,23 @@ class DebugStatus: ''' repl: PdbREPL|None = None + + # TODO: yet again this looks like a task outcome where we need + # to sync to the completion of one task (and get its result) + # being used everywhere for syncing.. + # -[ ] see if we can get our proto oco task-mngr to work for + # this? repl_task: Task|None = None + repl_release: trio.Event|None = None + + req_task: Task|None = None req_ctx: Context|None = None req_cs: CancelScope|None = None - repl_release: trio.Event|None = None req_finished: trio.Event|None = None - lock_status: LockStatus|None = None req_err: BaseException|None = None + lock_status: LockStatus|None = None + _orig_sigint_handler: Callable|None = None _trio_handler: ( Callable[[int, FrameType|None], Any] @@ -715,13 +723,13 @@ class DebugStatus: f'{cls.repl_task}\n' ) - # restore original sigint handler - cls.unshield_sigint() - # actor-local state, irrelevant for non-root. cls.repl_task = None cls.repl = None + # restore original sigint handler + cls.unshield_sigint() + class TractorConfig(pdbp.DefaultConfig): ''' @@ -814,17 +822,6 @@ class PdbREPL(pdbp.Pdb): ): Lock.release() - # TODO: special handling where we just want the next LOC and - # not to resume to the next pause/crash point? - # def set_next( - # self, - # frame: FrameType - # ) -> None: - # try: - # super().set_next(frame) - # finally: - # pdbp.set_trace() - # XXX NOTE: we only override this because apparently the stdlib pdb # bois likes to touch the SIGINT handler as much as i like to touch # my d$%&. @@ -855,6 +852,9 @@ class PdbREPL(pdbp.Pdb): return None +# TODO: prolly remove this and instead finally get our @context API +# supporting a msg/pld-spec via type annots as per, +# https://github.com/goodboy/tractor/issues/365 @cm def apply_debug_pldec() -> _codec.MsgCodec: ''' @@ -865,8 +865,9 @@ def apply_debug_pldec() -> _codec.MsgCodec: from tractor.msg import ( _ops as msgops, ) - orig_plrx: msgops.PldRx = msgops.current_pldrx() - orig_pldec: msgops.MsgDec = orig_plrx.pld_dec + cctx: Context = current_ipc_ctx() + rx: msgops.PldRx = cctx.pld_rx + orig_pldec: msgops.MsgDec = rx.pld_dec try: with msgops.limit_plds( @@ -875,9 +876,9 @@ def apply_debug_pldec() -> _codec.MsgCodec: assert ( debug_dec is - msgops.current_pldrx().pld_dec + rx.pld_dec ) - log.info( + log.runtime( 'Applied `.devx._debug` pld-spec\n\n' f'{debug_dec}\n' ) @@ -885,11 +886,9 @@ def apply_debug_pldec() -> _codec.MsgCodec: finally: assert ( - (plrx := msgops.current_pldrx()) is orig_plrx - and - plrx.pld_dec is orig_pldec + rx.pld_dec is orig_pldec ) - log.info( + log.runtime( 'Reverted to previous pld-spec\n\n' f'{orig_pldec}\n' ) @@ -898,7 +897,9 @@ def apply_debug_pldec() -> _codec.MsgCodec: async def request_root_stdio_lock( actor_uid: tuple[str, str], task_uid: tuple[str, int], - task_status: TaskStatus[CancelScope] = trio.TASK_STATUS_IGNORED + + shield: bool = False, + task_status: TaskStatus[CancelScope] = trio.TASK_STATUS_IGNORED, ): ''' Connect to the root actor of this process tree and RPC-invoke @@ -915,7 +916,7 @@ async def request_root_stdio_lock( ''' - log.pdb( + log.devx( 'Initing stdio-lock request task with root actor' ) # TODO: likely we can implement this mutex more generally as @@ -928,40 +929,22 @@ async def request_root_stdio_lock( # -[ ] technically we need a `RLock` since re-acquire should be a noop # - https://docs.python.org/3.8/library/multiprocessing.html#multiprocessing.RLock DebugStatus.req_finished = trio.Event() + DebugStatus.req_task = current_task() try: from tractor._discovery import get_root - from tractor.msg import _ops as msgops - debug_dec: msgops.MsgDec - with ( - # NOTE: we need this to ensure that this task exits - # BEFORE the REPl instance raises an error like - # `bdb.BdbQuit` directly, OW you get a trio cs stack - # corruption! - # Further, the since this task is spawned inside the - # `Context._scope_nursery: trio.Nursery`, once an RPC - # task errors that cs is cancel_called and so if we want - # to debug the TPC task that failed we need to shield - # against that expected `.cancel()` call and instead - # expect all of the `PdbREPL`.set_[continue/quit/]()` - # methods to unblock this task by setting the - # `.repl_release: # trio.Event`. - trio.CancelScope(shield=True) as req_cs, - - # NOTE: set it here in the locker request task bc it's - # possible for multiple such requests for the lock in any - # single sub-actor AND there will be a race between when the - # root locking task delivers the `Started(pld=LockStatus)` - # and when the REPL is actually entered by the requesting - # application task who called - # `.pause()`/`.post_mortem()`. - # - # SO, applying the pld-spec here means it is only applied to - # this IPC-ctx request task, NOT any other task(s) - # including the one that actually enters the REPL. This - # is oc desired bc ow the debugged task will msg-type-error. - # - apply_debug_pldec() as debug_dec, - ): + # NOTE: we need this to ensure that this task exits + # BEFORE the REPl instance raises an error like + # `bdb.BdbQuit` directly, OW you get a trio cs stack + # corruption! + # Further, the since this task is spawned inside the + # `Context._scope_nursery: trio.Nursery`, once an RPC + # task errors that cs is cancel_called and so if we want + # to debug the TPC task that failed we need to shield + # against that expected `.cancel()` call and instead + # expect all of the `PdbREPL`.set_[continue/quit/]()` + # methods to unblock this task by setting the + # `.repl_release: # trio.Event`. + with trio.CancelScope(shield=shield) as req_cs: # XXX: was orig for debugging cs stack corruption.. # log.info( # 'Request cancel-scope is:\n\n' @@ -972,46 +955,49 @@ async def request_root_stdio_lock( try: # TODO: merge into single async with ? async with get_root() as portal: - async with portal.open_context( lock_tty_for_child, subactor_task_uid=task_uid, + # NOTE: set it here in the locker request task bc it's + # possible for multiple such requests for the lock in any + # single sub-actor AND there will be a race between when the + # root locking task delivers the `Started(pld=LockStatus)` + # and when the REPL is actually entered by the requesting + # application task who called + # `.pause()`/`.post_mortem()`. + # + # SO, applying the pld-spec here means it is only applied to + # this IPC-ctx request task, NOT any other task(s) + # including the one that actually enters the REPL. This + # is oc desired bc ow the debugged task will msg-type-error. + pld_spec=__pld_spec__, + ) as (req_ctx, status): DebugStatus.req_ctx = req_ctx - - # sanity checks on pld-spec limit state - assert debug_dec - # curr_pldrx: msgops.PldRx = msgops.current_pldrx() - # assert ( - # curr_pldrx.pld_dec is debug_dec - # ) - - log.debug( + log.devx( 'Subactor locked TTY with msg\n\n' f'{status}\n' ) - # mk_pdb().set_trace() - try: - assert status.subactor_uid == actor_uid - assert status.cid - except AttributeError: - log.exception('failed pldspec asserts!') - raise + # try: + assert status.subactor_uid == actor_uid + assert status.cid + # except AttributeError: + # log.exception('failed pldspec asserts!') + # mk_pdb().set_trace() + # raise # set last rxed lock dialog status. DebugStatus.lock_status = status async with req_ctx.open_stream() as stream: - - assert DebugStatus.repl_release task_status.started(req_ctx) - # wait for local task to exit its - # `PdbREPL.interaction()`, call - # `DebugStatus.release()` and then - # unblock here. + # wait for local task to exit + # `PdbREPL.interaction()`, normally via + # a `DebugStatus.release()`call, and + # then unblock us here. await DebugStatus.repl_release.wait() await stream.send( LockRelease( @@ -1026,10 +1012,10 @@ async def request_root_stdio_lock( assert not status.locked DebugStatus.lock_status = status - log.pdb( + log.devx( 'TTY lock was released for subactor with msg\n\n' f'{status}\n\n' - f'Exitting {req_ctx.side!r}-side of locking req_ctx' + f'Exitting {req_ctx.side!r}-side of locking req_ctx\n' ) except ( @@ -1081,13 +1067,14 @@ async def request_root_stdio_lock( # ctl-c out of the currently hanging task! raise DebugRequestError( 'Failed to lock stdio from subactor IPC ctx!\n\n' - f'req_ctx: {req_ctx}\n' + f'req_ctx: {DebugStatus.req_ctx}\n' ) from req_err finally: - log.debug('Exiting debugger TTY lock request func from child') + log.devx('Exiting debugger TTY lock request func from child') # signal request task exit DebugStatus.req_finished.set() + DebugStatus.req_task = None def mk_pdb() -> PdbREPL: @@ -1321,31 +1308,40 @@ def shield_sigint_handler( DebugStatus.unshield_sigint() # do_cancel() - task: str|None = DebugStatus.repl_task + repl_task: str|None = DebugStatus.repl_task + req_task: str|None = DebugStatus.req_task if ( - task + repl_task and repl ): log.pdb( f'Ignoring SIGINT while local task using debug REPL\n' - f'|_{task}\n' + f'|_{repl_task}\n' f' |_{repl}\n' ) + elif req_task: + log.pdb( + f'Ignoring SIGINT while debug request task is open\n' + f'|_{req_task}\n' + ) else: msg: str = ( 'SIGINT shield handler still active BUT, \n\n' ) - if task is None: + if repl_task is None: msg += ( - f'- No local task claims to be in debug?\n' - f' |_{task}\n\n' + '- No local task claims to be in debug?\n' ) if repl is None: msg += ( - f'- No local REPL is currently active?\n' - f' |_{repl}\n\n' + '- No local REPL is currently active?\n' + ) + + if req_task is None: + msg += ( + '- No debug request task is active?\n' ) log.warning( @@ -1358,7 +1354,6 @@ def shield_sigint_handler( # XXX ensure that the reverted-to-handler actually is # able to rx what should have been **this** KBI ;) do_cancel() - # raise KeyboardInterrupt # TODO: how to handle the case of an intermediary-child actor # that **is not** marked in debug mode? See oustanding issue: @@ -1392,7 +1387,7 @@ def shield_sigint_handler( # https://github.com/prompt-toolkit/python-prompt-toolkit/blob/c2c6af8a0308f9e5d7c0e28cb8a02963fe0ce07a/prompt_toolkit/patch_stdout.py # XXX only for tracing this handler - # log.warning('exiting SIGINT') + log.devx('exiting SIGINT') _pause_msg: str = 'Attaching to pdb REPL in actor' @@ -1420,14 +1415,9 @@ async def _pause( # is always show in the debugger on entry.. and there seems to # be no way to override it?.. # - # shield: bool = False, - hide_tb: bool = True, - - # bc, `debug_func()`, `_enter_repl_sync()` and `_pause()` - # extra_frames_up_when_async: int = 3, - + shield: bool = False, + hide_tb: bool = False, task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED, - **debug_func_kwargs, ) -> None: @@ -1452,6 +1442,87 @@ async def _pause( 'for infected `asyncio` mode!' ) from rte + if debug_func is not None: + debug_func = partial(debug_func) + + repl: PdbREPL = repl or mk_pdb() + + # XXX NOTE XXX set it here to avoid ctl-c from cancelling a debug + # request from a subactor BEFORE the REPL is entered by that + # process. + DebugStatus.shield_sigint() + + # TODO: move this into a `open_debug_request()` @acm? + # -[ ] prolly makes the most sense to do the request + # task spawn as part of an `@acm` api which delivers the + # `DebugRequest` instance and ensures encapsing all the + # pld-spec and debug-nursery? + # -[ ] maybe make this a `PdbREPL` method or mod func? + # -[ ] factor out better, main reason for it is common logic for + # both root and sub repl entry + def _enter_repl_sync( + debug_func: Callable, + ) -> None: + __tracebackhide__: bool = hide_tb + + try: + # set local actor task to avoid recurrent + # entries/requests from the same local task (to the root + # process). + DebugStatus.repl_task = task + DebugStatus.repl = repl + + # TODO: do we want to support using this **just** for the + # locking / common code (prolly to help address #320)? + if debug_func is None: + task_status.started(DebugStatus) + + else: + log.warning( + 'Entering REPL for task fuck you!\n' + f'{task}\n' + ) + # block here one (at the appropriate frame *up*) where + # ``breakpoint()`` was awaited and begin handling stdio. + log.devx( + 'Entering sync world of the `pdb` REPL for task..\n' + f'{repl}\n' + f' |_{task}\n' + ) + + # invoke the low-level REPL activation routine which itself + # should call into a `Pdb.set_trace()` of some sort. + debug_func( + repl=repl, + hide_tb=hide_tb, + **debug_func_kwargs, + ) + + except trio.Cancelled: + log.exception( + 'Cancelled during invoke of internal `debug_func = ' + f'{debug_func.func.__name__}`\n' + ) + # XXX NOTE: DON'T release lock yet + raise + + except BaseException: + __tracebackhide__: bool = False + log.exception( + 'Failed to invoke internal `debug_func = ' + f'{debug_func.func.__name__}`\n' + ) + # NOTE: OW this is ONLY called from the + # `.set_continue/next` hooks! + DebugStatus.release(cancel_req_task=True) + + raise + + log.devx( + 'Entering `._pause()` for requesting task\n' + f'|_{task}\n' + ) + # TODO: this should be created as part of `DebugRequest()` init # which should instead be a one-shot-use singleton much like # the `PdbREPL`. @@ -1461,71 +1532,9 @@ async def _pause( DebugStatus.repl_release.is_set() ): DebugStatus.repl_release = trio.Event() - - if debug_func is not None: - debug_func = partial(debug_func) - - repl: PdbREPL = repl or mk_pdb() - - # TODO: maybe make this a `PdbREPL` method or mod func? - # -[ ] factor out better, main reason for it is common logic for - # both root and sub repl entry - def _enter_repl_sync( - debug_func: Callable, - ) -> None: - __tracebackhide__: bool = hide_tb - - # TODO: do we want to support using this **just** for the - # locking / common code (prolly to help address #320)? - # - if debug_func is None: - task_status.started(DebugStatus) - else: - # block here one (at the appropriate frame *up*) where - # ``breakpoint()`` was awaited and begin handling stdio. - log.debug('Entering sync world of the `pdb` REPL..') - - # XXX used by the SIGINT handler to check if - # THIS actor is in REPL interaction - try: - # TODO: move this into a `open_debug_request()` @acm? - # -[ ] prolly makes the most send to do the request - # task spawn as part of an `@acm` api which - # delivers the `DebugRequest` instance and ensures - # encapsing all the pld-spec and debug-nursery? - # - # set local actor task to avoid recurrent - # entries/requests from the same local task - # (to the root process). - DebugStatus.repl_task = task - DebugStatus.repl = repl - DebugStatus.shield_sigint() - - # enter `PdbREPL` specific method - debug_func( - repl=repl, - hide_tb=hide_tb, - **debug_func_kwargs, - ) - except trio.Cancelled: - log.exception( - 'Cancelled during invoke of internal `debug_func = ' - f'{debug_func.func.__name__}`\n' - ) - # NOTE: DON'T release lock yet - raise - - except BaseException: - __tracebackhide__: bool = False - log.exception( - 'Failed to invoke internal `debug_func = ' - f'{debug_func.func.__name__}`\n' - ) - # NOTE: OW this is ONLY called from the - # `.set_continue/next` hooks! - DebugStatus.release(cancel_req_task=True) - - raise + # ^-NOTE-^ this must be created BEFORE scheduling any subactor + # debug-req task since it needs to wait on it just after + # `.started()`-ing back its wrapping `.req_cs: CancelScope`. repl_err: BaseException|None = None try: @@ -1579,38 +1588,61 @@ async def _pause( not is_root_process() and actor._parent_chan # a connected child ): - if DebugStatus.repl_task: + repl_task: Task|None = DebugStatus.repl_task + req_task: Task|None = DebugStatus.req_task + if req_task: + log.warning( + f'Already an ongoing repl request?\n' + f'|_{req_task}\n\n' - # Recurrence entry case: this task already has the lock and - # is likely recurrently entering a breakpoint + f'REPL task is\n' + f'|_{repl_task}\n\n' + + ) + # Recurrent entry case. + # this task already has the lock and is likely + # recurrently entering a `.pause()`-point either bc, + # - someone is hacking on runtime internals and put + # one inside code that get's called on the way to + # this code, + # - a legit app task uses the 'next' command while in + # a REPL sesh, and actually enters another + # `.pause()` (in a loop or something). # - # NOTE: noop on recurrent entry case but we want to trigger - # a checkpoint to allow other actors error-propagate and - # potetially avoid infinite re-entries in some - # subactor that would otherwise not bubble until the - # next checkpoint was hit. + # XXX Any other cose is likely a bug. if ( - (repl_task := DebugStatus.repl_task) - and - repl_task is task + repl_task + ): + if repl_task is task: + log.warning( + f'{task.name}@{actor.uid} already has TTY lock\n' + f'ignoring..' + ) + await trio.lowlevel.checkpoint() + return + + else: + # if **this** actor is already in debug REPL we want + # to maintain actor-local-task mutex access, so block + # here waiting for the control to be released - this + # -> allows for recursive entries to `tractor.pause()` + log.warning( + f'{task}@{actor.uid} already has TTY lock\n' + f'waiting for release..' + ) + await DebugStatus.repl_release.wait() + await trio.sleep(0.1) + + elif ( + req_task ): log.warning( - f'{task.name}@{actor.uid} already has TTY lock\n' - f'ignoring..' - ) - await trio.lowlevel.checkpoint() - return + 'Local task already has active debug request\n' + f'|_{task}\n\n' - # if **this** actor is already in debug REPL we want - # to maintain actor-local-task mutex access, so block - # here waiting for the control to be released - this - # -> allows for recursive entries to `tractor.pause()` - log.warning( - f'{task.name}@{actor.uid} already has TTY lock\n' - f'waiting for release..' - ) - await DebugStatus.repl_release.wait() - await trio.sleep(0.1) + 'Waiting for previous request to complete..\n' + ) + await DebugStatus.req_finished.wait() # this **must** be awaited by the caller and is done using the # root nursery so that the debugger can continue to run without @@ -1642,16 +1674,23 @@ async def _pause( # -[ ] we probably only need to allocate the nursery when # we detect the runtime is already in debug mode. # - # ctx: Context = await curr_ctx._debug_tn.start( + curr_ctx: Context = current_ipc_ctx() + # req_ctx: Context = await curr_ctx._debug_tn.start( + log.devx( + 'Starting request task\n' + f'|_{task}\n' + ) req_ctx: Context = await actor._service_n.start( - request_root_stdio_lock, - actor.uid, - (task.name, id(task)), # task uuid (effectively) + partial( + request_root_stdio_lock, + actor_uid=actor.uid, + task_uid=(task.name, id(task)), # task uuid (effectively) + shield=shield, + ) ) # XXX sanity, our locker task should be the one which # entered a new IPC ctx with the root actor, NOT the one # that exists around the task calling into `._pause()`. - curr_ctx: Context = current_ipc_ctx() assert ( req_ctx is @@ -1665,8 +1704,8 @@ async def _pause( # TODO: prolly factor this plus the similar block from # `_enter_repl_sync()` into a common @cm? - except BaseException as repl_err: - if isinstance(repl_err, bdb.BdbQuit): + except BaseException as pause_err: + if isinstance(pause_err, bdb.BdbQuit): log.devx( 'REPL for pdb was quit!\n' ) @@ -1675,7 +1714,7 @@ async def _pause( # `Actor._service_n` might get closed before we can spawn # the request task, so just ignore expected RTE. elif ( - isinstance(repl_err, RuntimeError) + isinstance(pause_err, RuntimeError) and actor._cancel_called ): @@ -1698,13 +1737,22 @@ async def _pause( # sanity checks for ^ on request/status teardown assert DebugStatus.repl is None assert DebugStatus.repl_task is None - req_ctx: Context = DebugStatus.req_ctx - if req_ctx: - assert req_ctx._scope.cancel_called + + # sanity, for when hackin on all this? + if not isinstance(pause_err, trio.Cancelled): + req_ctx: Context = DebugStatus.req_ctx + if req_ctx: + # XXX, bc the child-task in root might cancel it? + # assert req_ctx._scope.cancel_called + assert req_ctx.maybe_error raise finally: + # set in finally block of func.. this can be synced-to + # eventually with a debug_nursery somehow? + # assert DebugStatus.req_task is None + # always show frame when request fails due to internal # failure in the above code (including an `BdbQuit`). if ( @@ -1721,9 +1769,15 @@ def _set_trace( # partial-ed in by `.pause()` api_frame: FrameType, + + # optionally passed in to provide support for + # `pause_from_sync()` where + actor: tractor.Actor|None = None, + task: trio.Task|None = None, ): __tracebackhide__: bool = hide_tb - actor: tractor.Actor = current_actor() + actor: tractor.Actor = actor or current_actor() + task: task or current_task() # else: # TODO: maybe print the actor supervion tree up to the @@ -1731,8 +1785,10 @@ def _set_trace( log.pdb( f'{_pause_msg}\n' '|\n' - # TODO: make an `Actor.__repr()__` - f'|_ {current_task()} @ {actor.uid}\n' + # TODO: more compact pformating? + # -[ ] make an `Actor.__repr()__` + # -[ ] should we use `log.pformat_task_uid()`? + f'|_ {task} @ {actor.uid}\n' ) # presuming the caller passed in the "api frame" # (the last frame before user code - like `.pause()`) @@ -1747,7 +1803,7 @@ def _set_trace( async def pause( *, - hide_tb: bool = True, + hide_tb: bool = False, api_frame: FrameType|None = None, # TODO: figure out how to still make this work: @@ -1798,8 +1854,7 @@ async def pause( _set_trace, api_frame=api_frame, ), - - # task_status=task_status, + shield=shield, **_pause_kwargs ) # XXX avoid cs stack corruption when `PdbREPL.interaction()` @@ -1867,88 +1922,97 @@ async def maybe_init_greenback( # normally by remapping python's builtin breakpoint() hook to this # runtime aware version which takes care of all . def pause_from_sync( + hide_tb: bool = False, + # proxied to `_pause()` + + **_pause_kwargs, + # for eg. + # shield: bool = False, + # api_frame: FrameType|None = None, + ) -> None: __tracebackhide__: bool = hide_tb - actor: tractor.Actor = current_actor( - err_on_no_runtime=False, - ) - log.debug( - f'{actor.uid}: JUST ENTERED `tractor.pause_from_sync()`' - f'|_{actor}\n' - ) - if not actor: - raise RuntimeError( - 'Not inside the `tractor`-runtime?\n' - '`tractor.pause_from_sync()` is not functional without a wrapping\n' - '- `async with tractor.open_nursery()` or,\n' - '- `async with tractor.open_root_actor()`\n' + try: + actor: tractor.Actor = current_actor( + err_on_no_runtime=False, ) - - # NOTE: once supported, remove this AND the one - # inside `._pause()`! - if actor.is_infected_aio(): - raise RuntimeError( - '`tractor.pause[_from_sync]()` not yet supported ' - 'for infected `asyncio` mode!' + log.debug( + f'{actor.uid}: JUST ENTERED `tractor.pause_from_sync()`' + f'|_{actor}\n' ) - - # raises on not-found by default - greenback: ModuleType = maybe_import_greenback() - mdb: PdbREPL = mk_pdb() - - # run async task which will lock out the root proc's TTY. - if not Lock.is_main_trio_thread(): - - # TODO: we could also check for a non-`.to_thread` context - # using `trio.from_thread.check_cancelled()` (says - # oremanj) wherein we get the following outputs: - # - # `RuntimeError`: non-`.to_thread` spawned thread - # noop: non-cancelled `.to_thread` - # `trio.Cancelled`: cancelled `.to_thread` - # - trio.from_thread.run( - partial( - pause, - debug_func=None, - pdb=mdb, - hide_tb=hide_tb, + if not actor: + raise RuntimeError( + 'Not inside the `tractor`-runtime?\n' + '`tractor.pause_from_sync()` is not functional without a wrapping\n' + '- `async with tractor.open_nursery()` or,\n' + '- `async with tractor.open_root_actor()`\n' ) - ) - # TODO: maybe the `trio.current_task()` id/name if avail? - DebugStatus.repl_task: str = str(threading.current_thread()) - else: # we are presumably the `trio.run()` + main thread - greenback.await_( - pause( - debug_func=None, - pdb=mdb, - hide_tb=hide_tb, + # NOTE: once supported, remove this AND the one + # inside `._pause()`! + if actor.is_infected_aio(): + raise RuntimeError( + '`tractor.pause[_from_sync]()` not yet supported ' + 'for infected `asyncio` mode!' ) + + # raises on not-found by default + greenback: ModuleType = maybe_import_greenback() + mdb: PdbREPL = mk_pdb() + + # run async task which will lock out the root proc's TTY. + if not DebugStatus.is_main_trio_thread(): + + # TODO: we could also check for a non-`.to_thread` context + # using `trio.from_thread.check_cancelled()` (says + # oremanj) wherein we get the following outputs: + # + # `RuntimeError`: non-`.to_thread` spawned thread + # noop: non-cancelled `.to_thread` + # `trio.Cancelled`: cancelled `.to_thread` + # + trio.from_thread.run( + partial( + _pause, + debug_func=None, + repl=mdb, + **_pause_kwargs + ), + ) + task: threading.Thread = threading.current_thread() + + else: # we are presumably the `trio.run()` + main thread + task: trio.Task = current_task() + greenback.await_( + _pause( + debug_func=None, + repl=mdb, + **_pause_kwargs, + ) + ) + DebugStatus.repl_task: str = current_task() + + # TODO: ensure we aggressively make the user aware about + # entering the global ``breakpoint()`` built-in from sync + # code? + _set_trace( + api_frame=inspect.currentframe(), + repl=mdb, + hide_tb=hide_tb, + actor=actor, + task=task, ) - DebugStatus.repl_task: str = current_task() - - # TODO: ensure we aggressively make the user aware about - # entering the global ``breakpoint()`` built-in from sync - # code? - _set_trace( - api_frame=inspect.current_frame(), - actor=actor, - pdb=mdb, - hide_tb=hide_tb, - - # TODO? will we ever need it? - # -> the gb._await() won't be affected by cancellation? - # shield=shield, - ) - # LEGACY NOTE on next LOC's frame showing weirdness.. - # - # XXX NOTE XXX no other LOC can be here without it - # showing up in the REPL's last stack frame !?! - # -[ ] tried to use `@pdbp.hideframe` decoration but - # still doesn't work + # LEGACY NOTE on next LOC's frame showing weirdness.. + # + # XXX NOTE XXX no other LOC can be here without it + # showing up in the REPL's last stack frame !?! + # -[ ] tried to use `@pdbp.hideframe` decoration but + # still doesn't work + except BaseException as err: + __tracebackhide__: bool = False + raise err # NOTE prefer a new "pause" semantic since it better describes @@ -2135,6 +2199,7 @@ async def maybe_wait_for_debugger( child_in_debug: bool = False, header_msg: str = '', + _ll: str = 'devx', ) -> bool: # was locked and we polled? @@ -2144,6 +2209,7 @@ async def maybe_wait_for_debugger( ): return False + logmeth: Callable = getattr(log, _ll) msg: str = header_msg if ( @@ -2156,7 +2222,11 @@ async def maybe_wait_for_debugger( # Instead try to wait for pdb to be released before # tearing down. ctx_in_debug: Context|None = Lock.ctx_in_debug - in_debug: tuple[str, str]|None = ctx_in_debug.chan.uid if ctx_in_debug else None + in_debug: tuple[str, str]|None = ( + ctx_in_debug.chan.uid + if ctx_in_debug + else None + ) if in_debug == current_actor().uid: log.debug( msg @@ -2176,7 +2246,7 @@ async def maybe_wait_for_debugger( # XXX => but it doesn't seem to work.. # await trio.testing.wait_all_tasks_blocked(cushion=0) else: - log.debug( + logmeth( msg + 'Root immediately acquired debug TTY LOCK' @@ -2185,13 +2255,13 @@ async def maybe_wait_for_debugger( for istep in range(poll_steps): if ( - Lock.no_remote_has_tty is not None - and not Lock.no_remote_has_tty.is_set() + Lock.req_handler_finished is not None + and not Lock.req_handler_finished.is_set() and in_debug is not None ): # caller_frame_info: str = pformat_caller_frame() - log.debug( + logmeth( msg + '\nRoot is waiting on tty lock to release from\n\n' @@ -2202,7 +2272,7 @@ async def maybe_wait_for_debugger( Lock.get_locking_task_cs().cancel() with trio.CancelScope(shield=True): - await Lock.no_remote_has_tty.wait() + await Lock.req_handler_finished.wait() log.pdb( f'Subactor released debug lock\n' @@ -2214,11 +2284,11 @@ async def maybe_wait_for_debugger( if ( in_debug is None and ( - Lock.no_remote_has_tty is None - or Lock.no_remote_has_tty.is_set() + Lock.req_handler_finished is None + or Lock.req_handler_finished.is_set() ) ): - log.pdb( + logmeth( msg + 'Root acquired tty lock!' @@ -2226,13 +2296,11 @@ async def maybe_wait_for_debugger( break else: - # TODO: don't need this right? - # await trio.lowlevel.checkpoint() - - log.debug( + logmeth( 'Root polling for debug:\n' f'poll step: {istep}\n' - f'poll delya: {poll_delay}' + f'poll delya: {poll_delay}\n\n' + f'{Lock.repr()}\n' ) with CancelScope(shield=True): await trio.sleep(poll_delay) -- 2.34.1 From 9c11b2b04d4a707a7012aa259cf7a1f5621c41e1 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 22 May 2024 10:22:51 -0400 Subject: [PATCH 113/190] Better context aware `RemoteActorError.pformat()` Such that when displaying with `.__str__()` we do not show the type header (style) since normally python's raising machinery already prints the type path like `'tractor._exceptions.RemoteActorError:'`, so doing it 2x is a bit ugly ;p In support, - include `.relay_uid` in `RemoteActorError.extra_body_fields`. - offer a `with_type_header: bool` to `.pformat()` and only put the opening type path and closing `')>'` tail line when `True`. - add `.is_inception() -> bool:` for an easy way to determine if the error is multi-hop relayed. - only repr the `'|_relay_uid='` field when an error is an inception. - tweak the invalid-payload case in `_mk_msg_type_err()` to explicitly state in the `message` how the `any_pld` value does not match the `MsgDec.pld_spec` by decoding the invalid `.pld` with an any-dec. - allow `_mk_msg_type_err(**mte_kwargs)` passthrough. - pass `boxed_type=cls` inside `MsgTypeError.from_decode()`. --- tractor/_exceptions.py | 101 +++++++++++++++++++++++++++++++++++------ 1 file changed, 87 insertions(+), 14 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 83675069..179b49a1 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -187,6 +187,9 @@ class RemoteActorError(Exception): ] extra_body_fields: list[str] = [ 'cid', + # NOTE: we only show this on relayed errors (aka + # "inceptions"). + 'relay_uid', 'boxed_type', ] @@ -273,7 +276,7 @@ class RemoteActorError(Exception): @property def ipc_msg(self) -> Struct: ''' - Re-render the underlying `._ipc_msg: Msg` as + Re-render the underlying `._ipc_msg: MsgType` as a `pretty_struct.Struct` for introspection such that the returned value is a read-only copy of the original. @@ -344,7 +347,7 @@ class RemoteActorError(Exception): return str(bt.__name__) @property - def boxed_type(self) -> str: + def boxed_type(self) -> Type[BaseException]: ''' Error type boxed by last actor IPC hop. @@ -409,7 +412,14 @@ class RemoteActorError(Exception): end_char: str = '\n', ) -> str: _repr: str = '' + for key in fields: + if ( + key == 'relay_uid' + and not self.is_inception() + ): + continue + val: Any|None = ( getattr(self, key, None) or @@ -427,6 +437,7 @@ class RemoteActorError(Exception): if val: _repr += f'{key}={val_str}{end_char}' + return _repr def reprol(self) -> str: @@ -455,15 +466,45 @@ class RemoteActorError(Exception): _repr ) - def pformat(self) -> str: + def is_inception(self) -> bool: + ''' + Predicate which determines if the shuttled error type + is the same as the container error type; IOW is this + an "error within and error" which points to some original + source error that was relayed through multiple + actor hops. + + Ex. a relayed remote error will generally be some form of + `RemoteActorError[RemoteActorError]` with a `.src_type` which + is not of that same type. + + ''' + # if a single hop boxed error it was not relayed + # more then one hop directly from the src actor. + if ( + self.boxed_type + is + self.src_type + ): + return False + + return True + + def pformat( + self, + with_type_header: bool = True, + ) -> str: ''' Nicely formatted boxed error meta data + traceback, OR just the normal message from `.args` (for eg. as you'd want shown by a locally raised `ContextCancelled`). ''' - tb_str: str = self.tb_str - if tb_str: + header: str = '' + if with_type_header: + header: str = f'<{type(self).__name__}(\n' + + if tb_str := self.tb_str: fields: str = self._mk_fields_str( _body_fields + @@ -481,19 +522,35 @@ class RemoteActorError(Exception): # |___ .. tb_body_indent=1, ) + if not with_type_header: + body = '\n' + body else: body: str = textwrap.indent( self._message, prefix=' ', ) + '\n' + + if with_type_header: + tail: str = ')>' + else: + tail = '' + return ( - f'<{type(self).__name__}(\n' + header + + f'{body}' - ')>' + + + tail ) __repr__ = pformat - __str__ = pformat + + # NOTE: apparently we need this so that + # the full fields show in debugger tests? + # |_ i guess `pexepect` relies on `str`-casing + # of output? + def __str__(self) -> str: + return self.pformat(with_type_header=False) def unwrap( self, @@ -682,6 +739,7 @@ class MsgTypeError( ) -> MsgTypeError: return cls( message=message, + boxed_type=cls, # NOTE: original "vanilla decode" of the msg-bytes # is placed inside a value readable from @@ -949,10 +1007,11 @@ def _raise_from_unexpected_msg( if isinstance(msg, Error): # match msg: # case Error(): - raise unpack_error( + exc: RemoteActorError = unpack_error( msg, ctx.chan, - ) from src_err + ) + raise exc from src_err # `MsgStream` termination msg. # TODO: does it make more sense to pack @@ -966,10 +1025,11 @@ def _raise_from_unexpected_msg( or isinstance(msg, Stop) ): - log.debug( + message: str = ( f'Context[{cid}] stream was stopped by remote side\n' f'cid: {cid}\n' ) + log.debug(message) # TODO: if the a local task is already blocking on # a `Context.result()` and thus a `.receive()` on the @@ -983,6 +1043,8 @@ def _raise_from_unexpected_msg( f'Context stream ended due to msg:\n\n' f'{pformat(msg)}\n' ) + eoc.add_note(message) + # XXX: important to set so that a new `.receive()` # call (likely by another task using a broadcast receiver) # doesn't accidentally pull the `return` message @@ -1007,6 +1069,7 @@ def _raise_from_unexpected_msg( ' BUT received a non-error msg:\n\n' f'{struct_format(msg)}' ) from src_err + # ^-TODO-^ maybe `MsgDialogError` is better? _raise_from_no_key_in_msg = _raise_from_unexpected_msg @@ -1023,6 +1086,8 @@ def _mk_msg_type_err( src_type_error: TypeError|None = None, is_invalid_payload: bool = False, + **mte_kwargs, + ) -> MsgTypeError: ''' Compose a `MsgTypeError` from an input runtime context. @@ -1081,12 +1146,20 @@ def _mk_msg_type_err( else: if is_invalid_payload: msg_type: str = type(msg) + any_pld: Any = msgpack.decode(msg.pld) message: str = ( f'invalid `{msg_type.__qualname__}` payload\n\n' - f'<{type(msg).__qualname__}(\n' - f' |_pld: {codec.pld_spec_str} = {msg.pld!r}' - f')>\n' + f'value: `{any_pld!r}` does not match type-spec: ' #\n' + f'`{type(msg).__qualname__}.pld: {codec.pld_spec_str}`' + # f'<{type(msg).__qualname__}(\n' + # f' |_pld: {codec.pld_spec_str}\n'# != {any_pld!r}\n' + # f')>\n\n' ) + # TODO: should we just decode the msg to a dict despite + # only the payload being wrong? + # -[ ] maybe the better design is to break this construct + # logic into a separate explicit helper raiser-func? + msg_dict: dict = {} else: # decode the msg-bytes using the std msgpack -- 2.34.1 From defe34dec243d877f9defa89ae4db0e9e986295a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 22 May 2024 14:56:54 -0400 Subject: [PATCH 114/190] Move runtime frame hiding into helper func Call it `hide_runtime_frames()` and stick all the lines from the top of the `._debug` mod in there along with a little `log.devx()` emission on what gets hidden by default ;) Other, - fix ref-error where internal-error handler might trigger despite the debug `req_ctx` not yet having init-ed, such that we don't try to cancel or log about it when it never was fully created/initialize.. - fix assignment typo iniside `_set_trace()` for `task`.. lel --- tractor/devx/_debug.py | 131 +++++++++++++++++++++++++++-------------- 1 file changed, 86 insertions(+), 45 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 877d2de6..753c1985 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -48,9 +48,11 @@ from typing import ( TYPE_CHECKING, ) from types import ( + FunctionType, FrameType, ModuleType, TracebackType, + CodeType, ) from msgspec import Struct @@ -90,43 +92,72 @@ if TYPE_CHECKING: log = get_logger(__name__) -# XXX HACKZONE XXX -# hide exit stack frames on nurseries and cancel-scopes! -# |_ so avoid seeing it when the `pdbp` REPL is first engaged from -# inside a `trio.open_nursery()` scope (with no line after it -# in before the block end??). -# -# TODO: FINALLY got this workin originally with -# `@pdbp.hideframe` around the `wrapper()` def embedded inside -# `_ki_protection_decoratior()`.. which is in the module: -# /home/goodboy/.virtualenvs/tractor311/lib/python3.11/site-packages/trio/_core/_ki.py -# -# -[ ] make an issue and patch for `trio` core? maybe linked -# to the long outstanding `pdb` one below? -# |_ it's funny that there's frame hiding throughout `._run.py` -# but not where it matters on the below exit funcs.. -# -# -[ ] provide a patchset for the lonstanding -# |_ https://github.com/python-trio/trio/issues/1155 -# -# -[ ] make a linked issue to ^ and propose allowing all the -# `._core._run` code to have their `__tracebackhide__` value -# configurable by a `RunVar` to allow getting scheduler frames -# if desired through configuration? -# -# -[ ] maybe dig into the core `pdb` issue why the extra frame is shown -# at all? -# -pdbp.hideframe(trio._core._run.NurseryManager.__aexit__) -pdbp.hideframe(trio._core._run.CancelScope.__exit__) -pdbp.hideframe(_GeneratorContextManager.__exit__) -pdbp.hideframe(_AsyncGeneratorContextManager.__aexit__) -pdbp.hideframe(trio.Event.wait) -__all__ = [ - 'breakpoint', - 'post_mortem', -] +def hide_runtime_frames() -> dict[FunctionType, CodeType]: + ''' + Hide call-stack frames for various std-lib and `trio`-API primitives + such that the tracebacks presented from our runtime are as minimized + as possible, particularly from inside a `PdbREPL`. + + ''' + # XXX HACKZONE XXX + # hide exit stack frames on nurseries and cancel-scopes! + # |_ so avoid seeing it when the `pdbp` REPL is first engaged from + # inside a `trio.open_nursery()` scope (with no line after it + # in before the block end??). + # + # TODO: FINALLY got this workin originally with + # `@pdbp.hideframe` around the `wrapper()` def embedded inside + # `_ki_protection_decoratior()`.. which is in the module: + # /home/goodboy/.virtualenvs/tractor311/lib/python3.11/site-packages/trio/_core/_ki.py + # + # -[ ] make an issue and patch for `trio` core? maybe linked + # to the long outstanding `pdb` one below? + # |_ it's funny that there's frame hiding throughout `._run.py` + # but not where it matters on the below exit funcs.. + # + # -[ ] provide a patchset for the lonstanding + # |_ https://github.com/python-trio/trio/issues/1155 + # + # -[ ] make a linked issue to ^ and propose allowing all the + # `._core._run` code to have their `__tracebackhide__` value + # configurable by a `RunVar` to allow getting scheduler frames + # if desired through configuration? + # + # -[ ] maybe dig into the core `pdb` issue why the extra frame is shown + # at all? + # + funcs: list[FunctionType] = [ + trio._core._run.NurseryManager.__aexit__, + trio._core._run.CancelScope.__exit__, + _GeneratorContextManager.__exit__, + _AsyncGeneratorContextManager.__aexit__, + _AsyncGeneratorContextManager.__aenter__, + trio.Event.wait, + ] + func_list_str: str = textwrap.indent( + "\n".join(f.__qualname__ for f in funcs), + prefix=' |_ ', + ) + log.devx( + 'Hiding the following runtime frames by default:\n' + f'{func_list_str}\n' + ) + + codes: dict[FunctionType, CodeType] = {} + for ref in funcs: + # stash a pre-modified version of each ref's code-obj + # so it can be reverted later if needed. + codes[ref] = ref.__code__ + pdbp.hideframe(ref) + # + # pdbp.hideframe(trio._core._run.NurseryManager.__aexit__) + # pdbp.hideframe(trio._core._run.CancelScope.__exit__) + # pdbp.hideframe(_GeneratorContextManager.__exit__) + # pdbp.hideframe(_AsyncGeneratorContextManager.__aexit__) + # pdbp.hideframe(_AsyncGeneratorContextManager.__aenter__) + # pdbp.hideframe(trio.Event.wait) + return codes class LockStatus( @@ -1032,15 +1063,24 @@ async def request_root_stdio_lock( except ( BaseException, - ): - log.exception( - 'Failed during root TTY-lock dialog?\n' - f'{req_ctx}\n' - - f'Cancelling IPC ctx!\n' + ) as ctx_err: + message: str = ( + 'Failed during debug request dialog with root actor?\n\n' ) - await req_ctx.cancel() - raise + + if req_ctx: + message += ( + f'{req_ctx}\n' + f'Cancelling IPC ctx!\n' + ) + await req_ctx.cancel() + + else: + message += 'Failed during `Portal.open_context()` ?\n' + + log.exception(message) + ctx_err.add_note(message) + raise ctx_err except ( @@ -1067,6 +1107,7 @@ async def request_root_stdio_lock( # ctl-c out of the currently hanging task! raise DebugRequestError( 'Failed to lock stdio from subactor IPC ctx!\n\n' + f'req_ctx: {DebugStatus.req_ctx}\n' ) from req_err @@ -1777,7 +1818,7 @@ def _set_trace( ): __tracebackhide__: bool = hide_tb actor: tractor.Actor = actor or current_actor() - task: task or current_task() + task: trio.Task = task or current_task() # else: # TODO: maybe print the actor supervion tree up to the -- 2.34.1 From 92ac95ce24c1bc8c7fbe16371473d440682eb252 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 22 May 2024 15:01:31 -0400 Subject: [PATCH 115/190] Update debugger tests to expect new pformatting Mostly the result of the `RemoteActorError.pformat()` and our new `_pause/crash_msg: str`s which include the `trio.Task.__repr__()` in the `log.pdb()` message. Obvi use the `in_prompt_msg()` to accomplish where not used prior. ToDo later: -[ ] still some outstanding questions on how detailed inceptions should look, eg. in `test_multi_nested_subactors_error_through_nurseries()` |_maybe we should be more pedantic at checking `.src_uid` vs. `.relay_uid` fields? -[ ] staged a placeholder test for verifying correct call-stack frame on crash handler REPL entry. -[ ] also need a test to verify that you can't pause from an already paused actor task such as can happen if you try to step through runtime code that has a recurrent entry to `._debug.pause()`. --- tests/test_debugger.py | 108 +++++++++++++++++++++++++++++------------ 1 file changed, 78 insertions(+), 30 deletions(-) diff --git a/tests/test_debugger.py b/tests/test_debugger.py index 0de2020d..6aa3bd53 100644 --- a/tests/test_debugger.py +++ b/tests/test_debugger.py @@ -144,9 +144,10 @@ def in_prompt_msg( log/REPL output for a given `pdb` interact point. ''' + __tracebackhide__: bool = False + for part in parts: if part not in prompt: - if pause_on_false: import pdbp pdbp.set_trace() @@ -165,6 +166,7 @@ def assert_before( **kwargs, ) -> None: + __tracebackhide__: bool = False # as in before the prompt end before: str = str(child.before.decode()) @@ -217,7 +219,10 @@ def ctlc( ], ids=lambda item: f'{item[0]} -> {item[1]}', ) -def test_root_actor_error(spawn, user_in_out): +def test_root_actor_error( + spawn, + user_in_out, +): ''' Demonstrate crash handler entering pdb from basic error in root actor. @@ -463,8 +468,12 @@ def test_subactor_breakpoint( child.expect(PROMPT) before = str(child.before.decode()) - assert "RemoteActorError: ('breakpoint_forever'" in before - assert 'bdb.BdbQuit' in before + assert in_prompt_msg( + before, + ['RemoteActorError:', + "('breakpoint_forever'", + 'bdb.BdbQuit',] + ) if ctlc: do_ctlc(child) @@ -476,8 +485,12 @@ def test_subactor_breakpoint( child.expect(pexpect.EOF) before = str(child.before.decode()) - assert "RemoteActorError: ('breakpoint_forever'" in before - assert 'bdb.BdbQuit' in before + assert in_prompt_msg( + before, + ['RemoteActorError:', + "('breakpoint_forever'", + 'bdb.BdbQuit',] + ) @has_nested_actors @@ -745,8 +758,9 @@ def test_multi_daemon_subactors( # boxed error raised in root task # "Attaching to pdb in crashed actor: ('root'", _crash_msg, - "('root'", - "_exceptions.RemoteActorError: ('name_error'", + "('root'", # should attach in root + "_exceptions.RemoteActorError:", # with an embedded RAE for.. + "('name_error'", # the src subactor which raised ] ) @@ -847,10 +861,11 @@ def test_multi_nested_subactors_error_through_nurseries( # https://github.com/goodboy/tractor/issues/320 # ctlc: bool, ): - """Verify deeply nested actors that error trigger debugger entries + ''' + Verify deeply nested actors that error trigger debugger entries at each actor nurserly (level) all the way up the tree. - """ + ''' # NOTE: previously, inside this script was a bug where if the # parent errors before a 2-levels-lower actor has released the lock, # the parent tries to cancel it but it's stuck in the debugger? @@ -870,22 +885,31 @@ def test_multi_nested_subactors_error_through_nurseries( except EOF: break - assert_before(child, [ + assert_before( + child, + [ # boxed source errors + "NameError: name 'doggypants' is not defined", + "tractor._exceptions.RemoteActorError:", + "('name_error'", + "bdb.BdbQuit", - # boxed source errors - "NameError: name 'doggypants' is not defined", - "tractor._exceptions.RemoteActorError: ('name_error'", - "bdb.BdbQuit", + # first level subtrees + # "tractor._exceptions.RemoteActorError: ('spawner0'", + "src_uid=('spawner0'", - # first level subtrees - "tractor._exceptions.RemoteActorError: ('spawner0'", - # "tractor._exceptions.RemoteActorError: ('spawner1'", + # "tractor._exceptions.RemoteActorError: ('spawner1'", - # propagation of errors up through nested subtrees - "tractor._exceptions.RemoteActorError: ('spawn_until_0'", - "tractor._exceptions.RemoteActorError: ('spawn_until_1'", - "tractor._exceptions.RemoteActorError: ('spawn_until_2'", - ]) + # propagation of errors up through nested subtrees + # "tractor._exceptions.RemoteActorError: ('spawn_until_0'", + # "tractor._exceptions.RemoteActorError: ('spawn_until_1'", + # "tractor._exceptions.RemoteActorError: ('spawn_until_2'", + # ^-NOTE-^ old RAE repr, new one is below with a field + # showing the src actor's uid. + "src_uid=('spawn_until_0'", + "relay_uid=('spawn_until_1'", + "src_uid=('spawn_until_2'", + ] + ) @pytest.mark.timeout(15) @@ -1019,13 +1043,16 @@ def test_different_debug_mode_per_actor( # msg reported back from the debug mode actor is processed. # assert "tractor._exceptions.RemoteActorError: ('debugged_boi'" in before - assert "tractor._exceptions.RemoteActorError: ('crash_boi'" in before - # the crash boi should not have made a debugger request but # instead crashed completely - assert "tractor._exceptions.RemoteActorError: ('crash_boi'" in before - assert "RuntimeError" in before - + assert_before( + child, + [ + "tractor._exceptions.RemoteActorError:", + "src_uid=('crash_boi'", + "RuntimeError", + ] + ) def test_pause_from_sync( @@ -1044,13 +1071,15 @@ def test_pause_from_sync( assert_before( child, [ - '`greenback` portal opened!', # pre-prompt line - _pause_msg, "('root'", + _pause_msg, + " Date: Wed, 22 May 2024 15:10:39 -0400 Subject: [PATCH 116/190] Port `Actor._stream_handler()` to use `.has_outcome`, fix indent bug.. --- tractor/_runtime.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 1d931cd7..f267ff67 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -694,21 +694,21 @@ class Actor: proc: trio.Process _, proc, _ = entry - if ( - (poll := getattr(proc, 'poll', None)) - and poll() is None - ): - log.cancel( - 'Root actor reports no-more-peers, BUT\n' - 'a DISCONNECTED child still has the debug ' - 'lock!\n\n' - # f'root uid: {self.uid}\n' - f'last disconnected child uid: {uid}\n' - f'locking child uid: {pdb_user_uid}\n' - ) - await _debug.maybe_wait_for_debugger( - child_in_debug=True - ) + if ( + (poll := getattr(proc, 'poll', None)) + and poll() is None + ): + log.cancel( + 'Root actor reports no-more-peers, BUT\n' + 'a DISCONNECTED child still has the debug ' + 'lock!\n\n' + # f'root uid: {self.uid}\n' + f'last disconnected child uid: {uid}\n' + f'locking child uid: {pdb_user_uid}\n' + ) + await _debug.maybe_wait_for_debugger( + child_in_debug=True + ) # TODO: just bc a child's transport dropped # doesn't mean it's not still using the pdb @@ -1142,7 +1142,6 @@ class Actor: requester_type, req_chan, log_meth, - ) = ( req_chan.uid, 'peer', @@ -1175,7 +1174,11 @@ class Actor: # with the root actor in this tree debug_req = _debug.DebugStatus lock_req_ctx: Context = debug_req.req_ctx - if lock_req_ctx is not None: + if ( + lock_req_ctx + and + lock_req_ctx.has_outcome + ): msg += ( '-> Cancelling active debugger request..\n' f'|_{_debug.Lock.repr()}\n\n' -- 2.34.1 From da03deddf1c5f790301ed6b7eb6c014fdccada6d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 22 May 2024 15:11:21 -0400 Subject: [PATCH 117/190] Call `.devx._debug.hide_runtime_frames()` by default From both `open_root_actor()` and `._entry._trio_main()`. Other `breakpoint()`-from-sync-func fixes: - properly disable the default hook using `"0"` XD - offer a `hide_tb: bool` from `open_root_actor()`. - disable hiding the `._trio_main()` frame, bc pretty sure it doesn't help anyone (either way) when REPL-ing/tb-ing from a subactor..? --- tractor/_entry.py | 7 ++++--- tractor/_root.py | 13 ++++++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/tractor/_entry.py b/tractor/_entry.py index 750dc59f..e22a4f1f 100644 --- a/tractor/_entry.py +++ b/tractor/_entry.py @@ -33,6 +33,7 @@ from .log import ( get_logger, ) from . import _state +from .devx import _debug from .to_asyncio import run_as_asyncio_guest from ._runtime import ( async_main, @@ -96,7 +97,6 @@ def _mp_main( def _trio_main( - actor: Actor, *, parent_addr: tuple[str, int] | None = None, @@ -107,7 +107,9 @@ def _trio_main( Entry point for a `trio_run_in_process` subactor. ''' - __tracebackhide__: bool = True + # __tracebackhide__: bool = True + _debug.hide_runtime_frames() + _state._current_actor = actor trio_main = partial( async_main, @@ -146,7 +148,6 @@ def _trio_main( + actor_info ) - finally: log.info( 'Subactor terminated\n' diff --git a/tractor/_root.py b/tractor/_root.py index 4c0bb4f6..4ddfde51 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -92,12 +92,16 @@ async def open_root_actor( # and that this call creates it. ensure_registry: bool = False, + hide_tb: bool = True, + ) -> Actor: ''' Runtime init entry point for ``tractor``. ''' - __tracebackhide__ = True + __tracebackhide__: bool = hide_tb + _debug.hide_runtime_frames() + # TODO: stick this in a `@cm` defined in `devx._debug`? # # Override the global debugger hook to make it play nice with @@ -126,7 +130,7 @@ async def open_root_actor( # usage by a clobbered TTY's stdstreams! def block_bps(*args, **kwargs): raise RuntimeError( - 'Trying to use `breakpoint()` eh?\n' + 'Trying to use `breakpoint()` eh?\n\n' 'Welp, `tractor` blocks `breakpoint()` built-in calls by default!\n' 'If you need to use it please install `greenback` and set ' '`debug_mode=True` when opening the runtime ' @@ -134,7 +138,9 @@ async def open_root_actor( ) sys.breakpointhook = block_bps - # os.environ['PYTHONBREAKPOINT'] = None + # lol ok, + # https://docs.python.org/3/library/sys.html#sys.breakpointhook + os.environ['PYTHONBREAKPOINT'] = "0" # attempt to retreive ``trio``'s sigint handler and stash it # on our debugger lock state. @@ -204,6 +210,7 @@ async def open_root_actor( ): loglevel = 'PDB' + elif debug_mode: raise RuntimeError( "Debug mode is only supported for the `trio` backend!" -- 2.34.1 From e92972a5f47147414645f7bc83f0dab88e5a6f88 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 22 May 2024 15:18:45 -0400 Subject: [PATCH 118/190] Mk `MsgDec.spec_str` have a more compact ` --- tractor/msg/_codec.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 6ba23b78..cd86552f 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -162,7 +162,10 @@ class MsgDec(Struct): # TODO: would get moved into `FieldSpec.__str__()` right? @property def spec_str(self) -> str: - return pformat_msgspec(codec=self) + return pformat_msgspec( + codec=self, + join_char='|', + ) pld_spec_str = spec_str @@ -211,7 +214,7 @@ def mk_msgspec_table( msgtypes = [msgspec] msgt_table: dict[str, MsgType] = { - msgt: str(msgt) + msgt: str(msgt.__name__) for msgt in msgtypes } if msg: -- 2.34.1 From 24c9c5397f452e70242fa684f9c937305785fd26 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 22 May 2024 15:21:01 -0400 Subject: [PATCH 119/190] Add debug check-n-wait inside `._spawn.soft_kill()` And IFF the `await wait_func(proc)` is cancelled such that we avoid clobbering some subactor that might be REPL-ing even though its parent actor is in the midst of (gracefully) cancelling it. --- tractor/_spawn.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tractor/_spawn.py b/tractor/_spawn.py index 09d9aff8..481e2981 100644 --- a/tractor/_spawn.py +++ b/tractor/_spawn.py @@ -43,6 +43,7 @@ from tractor._state import ( is_main_process, is_root_process, debug_mode, + _runtime_vars, ) from tractor.log import get_logger from tractor._portal import Portal @@ -303,7 +304,6 @@ async def hard_kill( async def soft_kill( - proc: ProcessType, wait_func: Callable[ [ProcessType], @@ -333,6 +333,18 @@ async def soft_kill( await wait_func(proc) except trio.Cancelled: + with trio.CancelScope(shield=True): + await maybe_wait_for_debugger( + child_in_debug=_runtime_vars.get( + '_debug_mode', False + ), + header_msg=( + 'Delaying `soft_kill()` subproc reaper while debugger locked..\n' + ), + # TODO: need a diff value then default? + # poll_steps=9999999, + ) + # if cancelled during a soft wait, cancel the child # actor before entering the hard reap sequence # below. This means we try to do a graceful teardown -- 2.34.1 From cbfabac813220bc72c15ddcf7bf97f0b4dcdd47b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 22 May 2024 15:26:48 -0400 Subject: [PATCH 120/190] Even smarter `RemoteActorError.pformat()`-ing Related to the prior patch, re the new `with_type_header: bool`: - in the `with_type_header == True` use case make sure we keep the first `._message: str` line non-indented since it'll show just after the header-line's type path with ':'. - when `False` drop the `)>` `repr()`-instance style as well so that we just get the ascii boxed traceback as though it's the error message-`str` not the `repr()` of the error obj. Other, - hide `pack_from_raise()` call frame since it'll show in debug mode crash handling.. - mk `MsgTypeError.from_decode()` explicitly accept and proxy an optional `ipc_msg` and change `msgdict` to also be optional, only reading out the `**extra_msgdata` when provided. - expose a `_mk_msg_type_err(src_err_msg: Error|None = None,)` for callers who which to inject a `._ipc_msg: Msgtype` to the MTE. |_ add a note how we can't use it due to a causality-dilemma when pld validating `Started` on the send side.. --- tractor/_exceptions.py | 84 +++++++++++++++++++++++++++++++----------- 1 file changed, 62 insertions(+), 22 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 179b49a1..9a94bbdb 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -35,7 +35,6 @@ import trio from msgspec import ( defstruct, msgpack, - Raw, structs, ValidationError, ) @@ -44,11 +43,12 @@ from tractor._state import current_actor from tractor.log import get_logger from tractor.msg import ( Error, + PayloadMsg, MsgType, - Stop, - types as msgtypes, MsgCodec, MsgDec, + Stop, + types as msgtypes, ) from tractor.msg.pretty_struct import ( iter_fields, @@ -156,6 +156,7 @@ def pack_from_raise( `Error`-msg using `pack_error()` to extract the tb info. ''' + __tracebackhide__: bool = True try: raise local_err except type(local_err) as local_err: @@ -525,10 +526,26 @@ class RemoteActorError(Exception): if not with_type_header: body = '\n' + body else: - body: str = textwrap.indent( - self._message, - prefix=' ', - ) + '\n' + first: str = '' + message: str = self._message + + # split off the first line so it isn't indented + # the same like the "boxed content". + if not with_type_header: + lines: list[str] = message.splitlines() + first = lines[0] + message = ''.join(lines[1:]) + + body: str = ( + first + + + textwrap.indent( + message, + prefix=' ', + ) + + + '\n' + ) if with_type_header: tail: str = ')>' @@ -734,25 +751,38 @@ class MsgTypeError( def from_decode( cls, message: str, - msgdict: dict, + + ipc_msg: PayloadMsg|None = None, + msgdict: dict|None = None, ) -> MsgTypeError: - return cls( - message=message, - boxed_type=cls, + ''' + Constuctor for easy creation from (presumably) catching + the backend interchange lib's underlying validation error + and passing context-specific meta-data to `_mk_msg_type_err()` + (which is normally the caller of this). - # NOTE: original "vanilla decode" of the msg-bytes - # is placed inside a value readable from - # `.msgdata['_msg_dict']` - _msg_dict=msgdict, - - # expand and pack all RAE compat fields - # into the `._extra_msgdata` aux `dict`. - **{ + ''' + # if provided, expand and pack all RAE compat fields into the + # `._extra_msgdata` auxillary data `dict` internal to + # `RemoteActorError`. + extra_msgdata: dict = {} + if msgdict: + extra_msgdata: dict = { k: v for k, v in msgdict.items() if k in _ipcmsg_keys - }, + } + # NOTE: original "vanilla decode" of the msg-bytes + # is placed inside a value readable from + # `.msgdata['_msg_dict']` + extra_msgdata['_msg_dict'] = msgdict + + return cls( + message=message, + boxed_type=cls, + ipc_msg=ipc_msg, + **extra_msgdata, ) @@ -1076,7 +1106,7 @@ _raise_from_no_key_in_msg = _raise_from_unexpected_msg def _mk_msg_type_err( - msg: Any|bytes|Raw, + msg: Any|bytes|MsgType, codec: MsgCodec|MsgDec, message: str|None = None, @@ -1085,6 +1115,7 @@ def _mk_msg_type_err( src_validation_error: ValidationError|None = None, src_type_error: TypeError|None = None, is_invalid_payload: bool = False, + src_err_msg: Error|None = None, **mte_kwargs, @@ -1159,9 +1190,10 @@ def _mk_msg_type_err( # only the payload being wrong? # -[ ] maybe the better design is to break this construct # logic into a separate explicit helper raiser-func? - msg_dict: dict = {} + msg_dict = None else: + msg: bytes # decode the msg-bytes using the std msgpack # interchange-prot (i.e. without any # `msgspec.Struct` handling) so that we can @@ -1206,6 +1238,14 @@ def _mk_msg_type_err( msgtyperr = MsgTypeError.from_decode( message=message, msgdict=msg_dict, + + # NOTE: for the send-side `.started()` pld-validate + # case we actually set the `._ipc_msg` AFTER we return + # from here inside `Context.started()` since we actually + # want to emulate the `Error` from the mte we build here + # Bo + # so by default in that case this is set to `None` + ipc_msg=src_err_msg, ) msgtyperr.__cause__ = src_validation_error return msgtyperr -- 2.34.1 From 07ba69f6973518936bf5274b03a16011cc84a737 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 27 May 2024 13:52:35 -0400 Subject: [PATCH 121/190] Add basic payload-spec test suite Starts with some very basic cases: - verify both subactor-as-child-ctx-task send side validation (failures) as well as relay and raise on root-parent-side-task. - wrap failure expectation cases that bubble out of `@acm`s with a `maybe_expect_raises()` equiv wrapper with an embedded timeout. - add `Return` cases including invalid by `str` and valid by a `None`. Still ToDo: - commit impl changes to make the bulk of this suite pass. - adjust how `MsgTypeError`s format the local (`.started()`) send side `.tb_str` such that we don't do a "boxed" error prior to `pack_error()` being called normally prior to `Error` transit. --- tests/test_pldrx_limiting.py | 316 +++++++++++++++++++++++++++++++++++ 1 file changed, 316 insertions(+) create mode 100644 tests/test_pldrx_limiting.py diff --git a/tests/test_pldrx_limiting.py b/tests/test_pldrx_limiting.py new file mode 100644 index 00000000..d658fb51 --- /dev/null +++ b/tests/test_pldrx_limiting.py @@ -0,0 +1,316 @@ +''' +Audit sub-sys APIs from `.msg._ops` +mostly for ensuring correct `contextvars` +related settings around IPC contexts. + +''' +from contextlib import ( + asynccontextmanager as acm, + contextmanager as cm, +) +# import typing +from typing import ( + # Any, + TypeAlias, + # Union, +) +from contextvars import ( + Context, +) + +from msgspec import ( + # structs, + # msgpack, + Struct, + # ValidationError, +) +import pytest +import trio + +import tractor +from tractor import ( + # _state, + MsgTypeError, + current_ipc_ctx, + Portal, +) +from tractor.msg import ( + _ops as msgops, + Return, +) +from tractor.msg import ( + _codec, + # _ctxvar_MsgCodec, + + # NamespacePath, + # MsgCodec, + # mk_codec, + # apply_codec, + # current_codec, +) +from tractor.msg.types import ( + log, + # _payload_msgs, + # PayloadMsg, + # Started, + # mk_msg_spec, +) + + +class PldMsg(Struct): + field: str + + +maybe_msg_spec = PldMsg|None + + +@cm +def custom_spec( + ctx: Context, + spec: TypeAlias, +) -> _codec.MsgCodec: + ''' + Apply a custom payload spec, remove on exit. + + ''' + rx: msgops.PldRx = ctx._pld_rx + + +@acm +async def maybe_expect_raises( + raises: BaseException|None = None, + ensure_in_message: list[str]|None = None, + + reraise: bool = False, + timeout: int = 3, +) -> None: + ''' + Async wrapper for ensuring errors propagate from the inner scope. + + ''' + with trio.fail_after(timeout): + try: + yield + except BaseException as _inner_err: + inner_err = _inner_err + # wasn't-expected to error.. + if raises is None: + raise + + else: + assert type(inner_err) is raises + + # maybe check for error txt content + if ensure_in_message: + part: str + for part in ensure_in_message: + for i, arg in enumerate(inner_err.args): + if part in arg: + break + # if part never matches an arg, then we're + # missing a match. + else: + raise ValueError( + 'Failed to find error message content?\n\n' + f'expected: {ensure_in_message!r}\n' + f'part: {part!r}\n\n' + f'{inner_err.args}' + ) + + if reraise: + raise inner_err + + else: + if raises: + raise RuntimeError( + f'Expected a {raises.__name__!r} to be raised?' + ) + + +@tractor.context +async def child( + ctx: Context, + started_value: int|PldMsg|None, + return_value: str|None, + validate_pld_spec: bool, + raise_on_started_mte: bool = True, + +) -> None: + ''' + Call ``Context.started()`` more then once (an error). + + ''' + expect_started_mte: bool = started_value == 10 + + # sanaity check that child RPC context is the current one + curr_ctx: Context = current_ipc_ctx() + assert ctx is curr_ctx + + rx: msgops.PldRx = ctx._pld_rx + orig_pldec: _codec.MsgDec = rx.pld_dec + # senity that default pld-spec should be set + assert ( + rx.pld_dec + is + msgops._def_any_pldec + ) + + try: + with msgops.limit_plds( + spec=maybe_msg_spec, + ) as pldec: + # sanity on `MsgDec` state + assert rx.pld_dec is pldec + assert pldec.spec is maybe_msg_spec + + # 2 cases: hdndle send-side and recv-only validation + # - when `raise_on_started_mte == True`, send validate + # - else, parent-recv-side only validation + try: + await ctx.started( + value=started_value, + validate_pld_spec=validate_pld_spec, + ) + + except MsgTypeError: + log.exception('started()` raised an MTE!\n') + if not expect_started_mte: + raise RuntimeError( + 'Child-ctx-task SHOULD NOT HAVE raised an MTE for\n\n' + f'{started_value!r}\n' + ) + + # propagate to parent? + if raise_on_started_mte: + raise + else: + if expect_started_mte: + raise RuntimeError( + 'Child-ctx-task SHOULD HAVE raised an MTE for\n\n' + f'{started_value!r}\n' + ) + + # XXX should always fail on recv side since we can't + # really do much else beside terminate and relay the + # msg-type-error from this RPC task ;) + return return_value + + finally: + # sanity on `limit_plds()` reversion + assert ( + rx.pld_dec + is + msgops._def_any_pldec + ) + log.runtime( + 'Reverted to previous pld-spec\n\n' + f'{orig_pldec}\n' + ) + + +@pytest.mark.parametrize( + 'return_value', + [ + None, + 'yo', + ], + ids=[ + 'return[invalid-"yo"]', + 'return[valid-None]', + ], +) +@pytest.mark.parametrize( + 'started_value', + [ + 10, + PldMsg(field='yo'), + ], + ids=[ + 'Started[invalid-10]', + 'Started[valid-PldMsg]', + ], +) +@pytest.mark.parametrize( + 'pld_check_started_value', + [ + True, + False, + ], + ids=[ + 'check-started-pld', + 'no-started-pld-validate', + ], +) +def test_basic_payload_spec( + debug_mode: bool, + loglevel: str, + return_value: str|None, + started_value: int|PldMsg, + pld_check_started_value: bool, +): + ''' + Validate the most basic `PldRx` msg-type-spec semantics around + a IPC `Context` endpoint start, started-sync, and final return + value depending on set payload types and the currently applied + pld-spec. + + ''' + invalid_return: bool = return_value == 'yo' + invalid_started: bool = started_value == 10 + + async def main(): + async with tractor.open_nursery( + debug_mode=debug_mode, + loglevel=loglevel, + ) as an: + p: Portal = await an.start_actor( + 'child', + enable_modules=[__name__], + ) + + # since not opened yet. + assert current_ipc_ctx() is None + + async with ( + maybe_expect_raises( + raises=MsgTypeError if ( + invalid_return + or + invalid_started + ) else None, + ensure_in_message=[ + "invalid `Return` payload", + "value: `'yo'` does not match type-spec: `Return.pld: PldMsg|NoneType`", + ], + ), + p.open_context( + child, + return_value=return_value, + started_value=started_value, + pld_spec=maybe_msg_spec, + validate_pld_spec=pld_check_started_value, + ) as (ctx, first), + ): + # now opened with 'child' sub + assert current_ipc_ctx() is ctx + + assert type(first) is PldMsg + assert first.field == 'yo' + + try: + assert (await ctx.result()) is None + except MsgTypeError as mte: + if not invalid_return: + raise + + else: # expected this invalid `Return.pld` + assert mte.cid == ctx.cid + + # verify expected remote mte deats + await tractor.pause() + assert ctx._remote_error is mte + assert mte.expected_msg_type is Return + + await p.cancel_actor() + + trio.run(main) -- 2.34.1 From aefdc9c09421d80cdf617efb2845ddee478dbf85 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 27 May 2024 14:59:40 -0400 Subject: [PATCH 122/190] More correct/explicit `.started()` send-side validation In the sense that we handle it as a special case that exposed through to `RxPld.dec_msg()` with a new `is_started_send_side: bool`. (Non-ideal) `Context.started()` impl deats: - only do send-side pld-spec validation when a new `validate_pld_spec` is set (by default it's not). - call `self.pld_rx.dec_msg(is_started_send_side=True)` to validate the payload field from the just codec-ed `Started` msg's `msg_bytes` by passing the `roundtripped` msg (with it's `.pld: Raw`) directly. - add a `hide_tb: bool` param and proxy it to the `.dec_msg()` call. (Non-ideal) `PldRx.dec_msg()` impl deats: - for now we're packing the MTE inside an `Error` via a manual call to `pack_error()` and then setting that as the `msg` passed to `_raise_from_unexpected_msg()` (though really we should just raise inline?). - manually set the `MsgTypeError._ipc_msg` to the above.. Other, - more comprehensive `Context` type doc string. - various `hide_tb: bool` kwarg additions through `._ops.PldRx` meths. - proto a `.msg._ops.validate_payload_msg()` helper planned to get the logic from this version of `.started()`'s send-side validation so as to be useful more generally elsewhere.. (like for raising back `Return` values on the child side?). Warning: this commit may have been made out of order from required changes to `._exceptions` which will come in a follow up! --- tractor/_context.py | 128 +++++++++++++++++++++----------------------- tractor/msg/_ops.py | 70 ++++++++++++++++++++---- 2 files changed, 122 insertions(+), 76 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 152efdee..fed9f2bc 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -15,12 +15,22 @@ # along with this program. If not, see . ''' -The fundamental cross process SC abstraction: an inter-actor, -cancel-scope linked task "context". +The fundamental cross-process SC abstraction: an inter-actor, +transitively cancel-scope linked, (dual) task IPC coupled "context". -A ``Context`` is very similar to the ``trio.Nursery.cancel_scope`` built -into each ``trio.Nursery`` except it links the lifetimes of memory space -disjoint, parallel executing tasks in separate actors. +A `Context` is very similar to the look and feel of the +`.cancel_scope: trio.CancelScope` built into each `trio.Nursery` +except that it links the lifetimes of 2 memory space disjoint, +parallel executing, tasks scheduled in separate "actors". + +So while a `trio.Nursery` has a `.parent_task` which exists both +before (open) and then inside the body of the `async with` of the +nursery's scope (/block), a `Context` contains 2 tasks, a "parent" +and a "child" side, where both execute independently in separate +memory domains of different (host's) processes linked through +a SC-transitive IPC "shuttle dialog protocol". The underlying IPC +dialog-(un)protocol allows for the maintainance of SC properties +end-2-end between the tasks. ''' from __future__ import annotations @@ -71,13 +81,11 @@ from .msg import ( MsgCodec, NamespacePath, PayloadT, - Return, Started, Stop, Yield, current_codec, pretty_struct, - types as msgtypes, _ops as msgops, ) from ._ipc import ( @@ -90,7 +98,7 @@ from ._state import ( debug_mode, _ctxvar_Context, ) - +# ------ - ------ if TYPE_CHECKING: from ._portal import Portal from ._runtime import Actor @@ -1598,16 +1606,15 @@ class Context: async def started( self, - # TODO: how to type this so that it's the - # same as the payload type? Is this enough? value: PayloadT|None = None, + validate_pld_spec: bool = True, + strict_pld_parity: bool = False, - strict_parity: bool = False, + # TODO: this will always emit for msgpack for any () vs. [] + # inside the value.. do we want to offer warnings on that? + # complain_no_parity: bool = False, - # TODO: this will always emit now that we do `.pld: Raw` - # passthrough.. so maybe just only complain when above strict - # flag is set? - complain_no_parity: bool = False, + hide_tb: bool = True, ) -> None: ''' @@ -1648,63 +1655,54 @@ class Context: # # https://zguide.zeromq.org/docs/chapter7/#The-Cheap-or-Nasty-Pattern # - codec: MsgCodec = current_codec() - msg_bytes: bytes = codec.encode(started_msg) - try: - # be a "cheap" dialog (see above!) - if ( - strict_parity - or - complain_no_parity - ): - rt_started: Started = codec.decode(msg_bytes) - - # XXX something is prolly totes cucked with the - # codec state! - if isinstance(rt_started, dict): - rt_started = msgtypes.from_dict_msg( - dict_msg=rt_started, - ) - raise RuntimeError( - 'Failed to roundtrip `Started` msg?\n' - f'{pretty_struct.pformat(rt_started)}\n' - ) - - if rt_started != started_msg: + __tracebackhide__: bool = hide_tb + if validate_pld_spec: + # __tracebackhide__: bool = False + codec: MsgCodec = current_codec() + msg_bytes: bytes = codec.encode(started_msg) + try: + roundtripped: Started = codec.decode(msg_bytes) + # pld: PayloadT = await self.pld_rx.recv_pld( + pld: PayloadT = self.pld_rx.dec_msg( + msg=roundtripped, + ipc=self, + expect_msg=Started, + hide_tb=hide_tb, + is_started_send_side=True, + ) + if ( + strict_pld_parity + and + pld != value + ): # TODO: make that one a mod func too.. diff = pretty_struct.Struct.__sub__( - rt_started, + roundtripped, started_msg, ) complaint: str = ( 'Started value does not match after roundtrip?\n\n' f'{diff}' ) + raise ValidationError(complaint) - # TODO: rn this will pretty much always fail with - # any other sequence type embeded in the - # payload... - if ( - self._strict_started - or - strict_parity - ): - raise ValueError(complaint) - else: - log.warning(complaint) + # raise any msg type error NO MATTER WHAT! + except ValidationError as verr: + # always show this src frame in the tb + # __tracebackhide__: bool = False + raise _mk_msg_type_err( + msg=roundtripped, + codec=codec, + src_validation_error=verr, + verb_header='Trying to send ', + is_invalid_payload=True, + ) from verr - await self.chan.send(started_msg) - - # raise any msg type error NO MATTER WHAT! - except ValidationError as verr: - raise _mk_msg_type_err( - msg=msg_bytes, - codec=codec, - src_validation_error=verr, - verb_header='Trying to send payload' - # > 'invalid `Started IPC msgs\n' - ) from verr + # TODO: maybe a flag to by-pass encode op if already done + # here in caller? + await self.chan.send(started_msg) + # set msg-related internal runtime-state self._started_called = True self._started_msg = started_msg self._started_pld = value @@ -1997,12 +1995,7 @@ async def open_context_from_portal( pld_spec: TypeAlias|None = None, allow_overruns: bool = False, - - # TODO: if we set this the wrapping `@acm` body will - # still be shown (awkwardly) on pdb REPL entry. Ideally - # we can similarly annotate that frame to NOT show? for now - # we DO SHOW this frame since it's awkward ow.. - hide_tb: bool = False, + hide_tb: bool = True, # proxied to RPC **kwargs, @@ -2115,6 +2108,7 @@ async def open_context_from_portal( ipc=ctx, expect_msg=Started, passthrough_non_pld_msgs=False, + hide_tb=hide_tb, ) # from .devx import pause diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 3014c15b..6faf78ef 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -47,7 +47,7 @@ from tractor._exceptions import ( _raise_from_unexpected_msg, MsgTypeError, _mk_msg_type_err, - pack_from_raise, + pack_error, ) from tractor._state import current_ipc_ctx from ._codec import ( @@ -203,7 +203,6 @@ class PldRx(Struct): msg: MsgType = ( ipc_msg or - # async-rx msg from underlying IPC feeder (mem-)chan await ipc._rx_chan.receive() ) @@ -223,6 +222,10 @@ class PldRx(Struct): raise_error: bool = True, hide_tb: bool = True, + # XXX for special (default?) case of send side call with + # `Context.started(validate_pld_spec=True)` + is_started_send_side: bool = False, + ) -> PayloadT|Raw: ''' Decode a msg's payload field: `MsgType.pld: PayloadT|Raw` and @@ -230,8 +233,6 @@ class PldRx(Struct): ''' __tracebackhide__: bool = hide_tb - - _src_err = None src_err: BaseException|None = None match msg: # payload-data shuttle msg; deliver the `.pld` value @@ -256,18 +257,58 @@ class PldRx(Struct): # pack mgterr into error-msg for # reraise below; ensure remote-actor-err # info is displayed nicely? - msgterr: MsgTypeError = _mk_msg_type_err( + mte: MsgTypeError = _mk_msg_type_err( msg=msg, codec=self.pld_dec, src_validation_error=valerr, is_invalid_payload=True, + expected_msg=expect_msg, + # ipc_msg=msg, ) - msg: Error = pack_from_raise( - local_err=msgterr, + # NOTE: override the `msg` passed to + # `_raise_from_unexpected_msg()` (below) so so that + # we're effectively able to use that same func to + # unpack and raise an "emulated remote `Error`" of + # this local MTE. + err_msg: Error = pack_error( + exc=mte, cid=msg.cid, - src_uid=ipc.chan.uid, + src_uid=( + ipc.chan.uid + if not is_started_send_side + else ipc._actor.uid + ), + # tb=valerr.__traceback__, + tb_str=mte._message, ) + # ^-TODO-^ just raise this inline instead of all the + # pack-unpack-repack non-sense! + + mte._ipc_msg = err_msg + msg = err_msg + + # set emulated remote error more-or-less as the + # runtime would + ctx: Context = getattr(ipc, 'ctx', ipc) + + # TODO: should we instead make this explicit and + # use the above masked `is_started_send_decode`, + # expecting the `Context.started()` caller to set + # it? Rn this is kinda, howyousayyy, implicitly + # edge-case-y.. + if ( + expect_msg is not Started + and not is_started_send_side + ): + ctx._maybe_cancel_and_set_remote_error(mte) + + # XXX NOTE: so when the `_raise_from_unexpected_msg()` + # raises the boxed `err_msg` from above it raises + # it from `None`. src_err = valerr + # if is_started_send_side: + # src_err = None + # XXX some other decoder specific failure? # except TypeError as src_error: @@ -379,6 +420,7 @@ class PldRx(Struct): # NOTE: generally speaking only for handling `Stop`-msgs that # arrive during a call to `drain_to_final_msg()` above! passthrough_non_pld_msgs: bool = True, + hide_tb: bool = True, **kwargs, ) -> tuple[MsgType, PayloadT]: @@ -387,6 +429,7 @@ class PldRx(Struct): the pair of refs. ''' + __tracebackhide__: bool = hide_tb msg: MsgType = await ipc._rx_chan.receive() if passthrough_non_pld_msgs: @@ -401,6 +444,7 @@ class PldRx(Struct): msg, ipc=ipc, expect_msg=expect_msg, + hide_tb=hide_tb, **kwargs, ) return msg, pld @@ -414,7 +458,7 @@ def limit_plds( ) -> MsgDec: ''' Apply a `MsgCodec` that will natively decode the SC-msg set's - `Msg.pld: Union[Type[Struct]]` payload fields using + `PayloadMsg.pld: Union[Type[Struct]]` payload fields using tagged-unions of `msgspec.Struct`s from the `payload_types` for all IPC contexts in use by the current `trio.Task`. @@ -691,3 +735,11 @@ async def drain_to_final_msg( result_msg, pre_result_drained, ) + + +# TODO: factor logic from `.Context.started()` for send-side +# validate raising! +def validate_payload_msg( + msg: Started|Yield|Return, +) -> MsgTypeError|None: + ... -- 2.34.1 From cbd47d800ebde06f2e4c6da532bfceb92b466c6f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 27 May 2024 22:36:05 -0400 Subject: [PATCH 123/190] Add `MsgTypeError` "bad msg" capture Such that if caught by user code and/or the runtime we can introspect the original msg which caused the type error. Previously this was kinda half-baked with a `.msg_dict` which was delivered from an `Any`-decode of the shuttle msg in `_mk_msg_type_err()` but now this more explicitly refines the API and supports both `PayloadMsg`-instance or the msg-dict style injection: - allow passing either of `bad_msg: PayloadMsg|None` or `bad_msg_as_dict: dict|None` to `MsgTypeError.from_decode()`. - expose public props for both ^ whilst dropping prior `.msgdict`. - rework `.from_decode()` to explicitly accept `**extra_msgdata: dict` |_ only overriding it from any `bad_msg_as_dict` if the keys are found in `_ipcmsg_keys`, **except** for `_bad_msg` when `bad_msg` is passed. |_ drop `.ipc_msg` passthrough. |_ drop `msgdict` input. - adjust `.cid` to only pull from the `.bad_msg` if set. Related fixes/adjustments: - `pack_from_raise()` should pull `boxed_type_str` from `boxed_type.__name__`, not the `type()` of it.. also add a `hide_tb: bool` flag. - don't include `_msg_dict` and `_bad_msg` in the `_body_fields` set. - allow more granular boxed traceback-str controls: |_ allow passing a `tb_str: str` explicitly in which case we use it verbatim and presume caller knows what they're doing. |_ when not provided, use the more explicit `traceback.format_exception(exc)` since the error instance is a required input (we still fail back to the old `.format_exc()` call if for some reason the caller passes `None`; but that should be a bug right?). |_ if a `tb: TracebackType` and a `tb_str` is passed, concat them. - in `RemoteActorError.pformat()` don't indent the `._message` part used for the `body` when `with_type_header == False`. - update `_mk_msg_type_err()` to use `bad_msg`/`bad_msg_as_dict` appropriately and drop passing `ipc_msg`. --- tractor/_exceptions.py | 220 +++++++++++++++++++++++++++-------------- 1 file changed, 148 insertions(+), 72 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 9a94bbdb..85957356 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -22,6 +22,9 @@ from __future__ import annotations import builtins import importlib from pprint import pformat +from types import ( + TracebackType, +) from typing import ( Any, Callable, @@ -92,26 +95,30 @@ _ipcmsg_keys: list[str] = [ fi.name for fi, k, v in iter_fields(Error) - ] _body_fields: list[str] = list( set(_ipcmsg_keys) - # NOTE: don't show fields that either don't provide - # any extra useful info or that are already shown - # as part of `.__repr__()` output. + # XXX NOTE: DON'T-SHOW-FIELDS + # - don't provide any extra useful info or, + # - are already shown as part of `.__repr__()` or, + # - are sub-type specific. - { 'src_type_str', 'boxed_type_str', 'tb_str', 'relay_path', - '_msg_dict', 'cid', - # since only ctxc should show it but `Error` does + # only ctxc should show it but `Error` does # have it as an optional field. 'canceller', + + # only for MTEs and generally only used + # when devving/testing/debugging. + '_msg_dict', + '_bad_msg', } ) @@ -146,6 +153,7 @@ def pack_from_raise( |MsgTypeError ), cid: str, + hide_tb: bool = True, **rae_fields, @@ -156,7 +164,7 @@ def pack_from_raise( `Error`-msg using `pack_error()` to extract the tb info. ''' - __tracebackhide__: bool = True + __tracebackhide__: bool = hide_tb try: raise local_err except type(local_err) as local_err: @@ -231,7 +239,8 @@ class RemoteActorError(Exception): if ( extra_msgdata - and ipc_msg + and + ipc_msg ): # XXX mutate the orig msg directly from # manually provided input params. @@ -261,17 +270,16 @@ class RemoteActorError(Exception): # either by customizing `ContextCancelled.__init__()` or # through a special factor func? elif boxed_type: - boxed_type_str: str = type(boxed_type).__name__ + boxed_type_str: str = boxed_type.__name__ if ( ipc_msg - and not self._ipc_msg.boxed_type_str + and + self._ipc_msg.boxed_type_str != boxed_type_str ): self._ipc_msg.boxed_type_str = boxed_type_str assert self.boxed_type_str == self._ipc_msg.boxed_type_str - else: - self._extra_msgdata['boxed_type_str'] = boxed_type_str - + # ensure any roundtripping evals to the input value assert self.boxed_type is boxed_type @property @@ -309,7 +317,9 @@ class RemoteActorError(Exception): if self._ipc_msg else {} ) - return self._extra_msgdata | msgdata + return { + k: v for k, v in self._extra_msgdata.items() + } | msgdata @property def src_type_str(self) -> str: @@ -502,6 +512,8 @@ class RemoteActorError(Exception): ''' header: str = '' + body: str = '' + if with_type_header: header: str = f'<{type(self).__name__}(\n' @@ -525,24 +537,22 @@ class RemoteActorError(Exception): ) if not with_type_header: body = '\n' + body - else: - first: str = '' - message: str = self._message + elif message := self._message: # split off the first line so it isn't indented # the same like the "boxed content". if not with_type_header: lines: list[str] = message.splitlines() - first = lines[0] - message = ''.join(lines[1:]) + first: str = lines[0] + message: str = message.removeprefix(first) + + else: + first: str = '' body: str = ( first + - textwrap.indent( - message, - prefix=' ', - ) + message + '\n' ) @@ -708,52 +718,72 @@ class MsgTypeError( ] @property - def msg_dict(self) -> dict[str, Any]: + def bad_msg(self) -> PayloadMsg|None: ''' - If the underlying IPC `MsgType` was received from a remote - actor but was unable to be decoded to a native - `Yield`|`Started`|`Return` struct, the interchange backend - native format decoder can be used to stash a `dict` - version for introspection by the invalidating RPC task. + Ref to the the original invalid IPC shuttle msg which failed + to decode thus providing for the reason for this error. ''' - return self.msgdata.get('_msg_dict') + if ( + (_bad_msg := self.msgdata.get('_bad_msg')) + and + isinstance(_bad_msg, PayloadMsg) + ): + return _bad_msg - @property - def expected_msg(self) -> MsgType|None: - ''' - Attempt to construct what would have been the original - `MsgType`-with-payload subtype (i.e. an instance from the set - of msgs in `.msg.types._payload_msgs`) which failed - validation. - - ''' - if msg_dict := self.msg_dict.copy(): + elif bad_msg_dict := self.bad_msg_as_dict: return msgtypes.from_dict_msg( - dict_msg=msg_dict, + dict_msg=bad_msg_dict.copy(), # use_pretty=True, # ^-TODO-^ would luv to use this BUT then the # `field_prefix` in `pformat_boxed_tb()` cucks it # all up.. XD ) + return None + @property + def bad_msg_as_dict(self) -> dict[str, Any]: + ''' + If the underlying IPC `MsgType` was received from a remote + actor but was unable to be decoded to a native `PayloadMsg` + (`Yield`|`Started`|`Return`) struct, the interchange backend + native format decoder can be used to stash a `dict` version + for introspection by the invalidating RPC task. + + Optionally when this error is constructed from + `.from_decode()` the caller can attempt to construct what + would have been the original `MsgType`-with-payload subtype + (i.e. an instance from the set of msgs in + `.msg.types._payload_msgs`) which failed validation. + + ''' + return self.msgdata.get('_bad_msg_as_dict') + @property def expected_msg_type(self) -> Type[MsgType]|None: - return type(self.expected_msg) + return type(self.bad_msg) @property def cid(self) -> str: - # pre-packed using `.from_decode()` constructor - return self.msgdata.get('cid') + # pull from required `.bad_msg` ref (or src dict) + if bad_msg := self.bad_msg: + return bad_msg.cid + + return self.msgdata['cid'] @classmethod def from_decode( cls, message: str, - ipc_msg: PayloadMsg|None = None, - msgdict: dict|None = None, + bad_msg: PayloadMsg|None = None, + bad_msg_as_dict: dict|None = None, + + # if provided, expand and pack all RAE compat fields into the + # `._extra_msgdata` auxillary data `dict` internal to + # `RemoteActorError`. + **extra_msgdata, ) -> MsgTypeError: ''' @@ -763,25 +793,44 @@ class MsgTypeError( (which is normally the caller of this). ''' - # if provided, expand and pack all RAE compat fields into the - # `._extra_msgdata` auxillary data `dict` internal to - # `RemoteActorError`. - extra_msgdata: dict = {} - if msgdict: - extra_msgdata: dict = { - k: v - for k, v in msgdict.items() - if k in _ipcmsg_keys - } + if bad_msg_as_dict: # NOTE: original "vanilla decode" of the msg-bytes # is placed inside a value readable from # `.msgdata['_msg_dict']` - extra_msgdata['_msg_dict'] = msgdict + extra_msgdata['_bad_msg_as_dict'] = bad_msg_as_dict + + # scrape out any underlying fields from the + # msg that failed validation. + for k, v in bad_msg_as_dict.items(): + if ( + # always skip a duplicate entry + # if already provided as an arg + k == '_bad_msg' and bad_msg + or + # skip anything not in the default msg-field set. + k not in _ipcmsg_keys + # k not in _body_fields + ): + continue + + extra_msgdata[k] = v + + + elif bad_msg: + if not isinstance(bad_msg, PayloadMsg): + raise TypeError( + 'The provided `bad_msg` is not a `PayloadMsg` type?\n\n' + f'{bad_msg}' + ) + extra_msgdata['_bad_msg'] = bad_msg + extra_msgdata['cid'] = bad_msg.cid + + if 'cid' not in extra_msgdata: + import pdbp; pdbp.set_trace() return cls( message=message, boxed_type=cls, - ipc_msg=ipc_msg, **extra_msgdata, ) @@ -836,9 +885,10 @@ class MessagingError(Exception): def pack_error( exc: BaseException|RemoteActorError, - tb: str|None = None, cid: str|None = None, src_uid: tuple[str, str]|None = None, + tb: TracebackType|None = None, + tb_str: str = '', ) -> Error: ''' @@ -848,10 +898,28 @@ def pack_error( the receiver side using `unpack_error()` below. ''' - if tb: - tb_str = ''.join(traceback.format_tb(tb)) + if not tb_str: + tb_str: str = ( + ''.join(traceback.format_exception(exc)) + + # TODO: can we remove this is `exc` is required? + or + # NOTE: this is just a shorthand for the "last error" as + # provided by `sys.exeception()`, see: + # - https://docs.python.org/3/library/traceback.html#traceback.print_exc + # - https://docs.python.org/3/library/traceback.html#traceback.format_exc + traceback.format_exc() + ) else: - tb_str = traceback.format_exc() + if tb_str[-2:] != '\n': + tb_str += '\n' + + # when caller provides a tb instance (say pulled from some other + # src error's `.__traceback__`) we use that as the "boxed" + # tb-string instead. + if tb: + # https://docs.python.org/3/library/traceback.html#traceback.format_list + tb_str: str = ''.join(traceback.format_tb(tb)) + tb_str error_msg: dict[ # for IPC str, @@ -1115,7 +1183,7 @@ def _mk_msg_type_err( src_validation_error: ValidationError|None = None, src_type_error: TypeError|None = None, is_invalid_payload: bool = False, - src_err_msg: Error|None = None, + # src_err_msg: Error|None = None, **mte_kwargs, @@ -1164,10 +1232,10 @@ def _mk_msg_type_err( '|_ https://jcristharif.com/msgspec/extending.html#defining-a-custom-extension-messagepack-only\n' ) - msgtyperr = MsgTypeError( message=message, ipc_msg=msg, + bad_msg=msg, ) # ya, might be `None` msgtyperr.__cause__ = src_type_error @@ -1175,6 +1243,9 @@ def _mk_msg_type_err( # `Channel.recv()` case else: + msg_dict: dict|None = None + bad_msg: PayloadMsg|None = None + if is_invalid_payload: msg_type: str = type(msg) any_pld: Any = msgpack.decode(msg.pld) @@ -1186,19 +1257,20 @@ def _mk_msg_type_err( # f' |_pld: {codec.pld_spec_str}\n'# != {any_pld!r}\n' # f')>\n\n' ) + # src_err_msg = msg + bad_msg = msg # TODO: should we just decode the msg to a dict despite # only the payload being wrong? # -[ ] maybe the better design is to break this construct # logic into a separate explicit helper raiser-func? - msg_dict = None else: - msg: bytes # decode the msg-bytes using the std msgpack - # interchange-prot (i.e. without any - # `msgspec.Struct` handling) so that we can - # determine what `.msg.types.Msg` is the culprit - # by reporting the received value. + # interchange-prot (i.e. without any `msgspec.Struct` + # handling) so that we can determine what + # `.msg.types.PayloadMsg` is the culprit by reporting the + # received value. + msg: bytes msg_dict: dict = msgpack.decode(msg) msg_type_name: str = msg_dict['msg_type'] msg_type = getattr(msgtypes, msg_type_name) @@ -1235,9 +1307,13 @@ def _mk_msg_type_err( if verb_header: message = f'{verb_header} ' + message + # if not isinstance(bad_msg, PayloadMsg): + # import pdbp; pdbp.set_trace() + msgtyperr = MsgTypeError.from_decode( message=message, - msgdict=msg_dict, + bad_msg=bad_msg, + bad_msg_as_dict=msg_dict, # NOTE: for the send-side `.started()` pld-validate # case we actually set the `._ipc_msg` AFTER we return @@ -1245,7 +1321,7 @@ def _mk_msg_type_err( # want to emulate the `Error` from the mte we build here # Bo # so by default in that case this is set to `None` - ipc_msg=src_err_msg, + # ipc_msg=src_err_msg, ) msgtyperr.__cause__ = src_validation_error return msgtyperr -- 2.34.1 From a520951928ebd1599ef5eecd3bb90432986c845b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 28 May 2024 09:36:26 -0400 Subject: [PATCH 124/190] Parameterize the `return_msg_type` in `._invoke()` Since we also handle a runtime-specific `CancelAck`, allow the caller-scheduler to pass in the expected return-type msg per the RPC msg endpoint loop. --- tractor/_rpc.py | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 0f200d0e..1ea9bce3 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -63,6 +63,7 @@ from .log import get_logger from .msg import ( current_codec, MsgCodec, + PayloadT, NamespacePath, pretty_struct, ) @@ -96,7 +97,7 @@ async def _invoke_non_context( treat_as_gen: bool, is_rpc: bool, - return_msg: Return|CancelAck = Return, + return_msg_type: Return|CancelAck = Return, task_status: TaskStatus[ Context | BaseException @@ -218,7 +219,7 @@ async def _invoke_non_context( and chan.connected() ): try: - ret_msg = return_msg( + ret_msg = return_msg_type( cid=cid, pld=result, ) @@ -417,7 +418,7 @@ async def _invoke( is_rpc: bool = True, hide_tb: bool = True, - return_msg: Return|CancelAck = Return, + return_msg_type: Return|CancelAck = Return, task_status: TaskStatus[ Context | BaseException @@ -531,7 +532,7 @@ async def _invoke( kwargs, treat_as_gen, is_rpc, - return_msg, + return_msg_type, task_status, ) # XXX below fallthrough is ONLY for `@context` eps @@ -591,18 +592,21 @@ async def _invoke( ctx._scope = tn.cancel_scope task_status.started(ctx) - # TODO: should would be nice to have our - # `TaskMngr` nursery here! - res: Any = await coro - ctx._result = res - - # deliver final result to caller side. - await chan.send( - return_msg( - cid=cid, - pld=res, - ) + # TODO: better `trionics` tooling: + # -[ ] should would be nice to have our `TaskMngr` + # nursery here! + # -[ ] payload value checking like we do with + # `.started()` such that the debbuger can engage + # here in the child task instead of waiting for the + # parent to crash with it's own MTE.. + res: Any|PayloadT = await coro + return_msg: Return|CancelAck = return_msg_type( + cid=cid, + pld=res, ) + # set and shuttle final result to "parent"-side task. + ctx._result = res + await chan.send(return_msg) # NOTE: this happens IFF `ctx._scope.cancel()` is # called by any of, @@ -938,7 +942,7 @@ async def process_messages( actor.cancel, kwargs, is_rpc=False, - return_msg=CancelAck, + return_msg_type=CancelAck, ) log.runtime( @@ -972,7 +976,7 @@ async def process_messages( actor._cancel_task, kwargs, is_rpc=False, - return_msg=CancelAck, + return_msg_type=CancelAck, ) except BaseException: log.exception( -- 2.34.1 From eaddde94c5f4253344e3e6c14eed54c30b13f1e1 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 28 May 2024 09:55:16 -0400 Subject: [PATCH 125/190] Drop `msg.types.Msg` for new replacement types The `TypeAlias` for the msg type-group is now `MsgType` and any user touching shuttle messages can now be typed as `PayloadMsg`. Relatedly, add MTE specific `Error._bad_msg[_as_dict]` fields which are handy for introspection of remote decode failures. --- tractor/_ipc.py | 2 +- tractor/msg/_codec.py | 10 +++++----- tractor/msg/types.py | 39 +++++++++++++++++++++++---------------- 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/tractor/_ipc.py b/tractor/_ipc.py index 511a053c..ec7d348a 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -291,7 +291,7 @@ class MsgpackTCPStream(MsgTransport): async def send( self, - msg: msgtypes.Msg, + msg: msgtypes.MsgType, strict_types: bool = True, # hide_tb: bool = False, diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index cd86552f..e1c59e94 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -140,7 +140,7 @@ class MsgDec(Struct): # * also a `.__contains__()` for doing `None in # TypeSpec[None|int]` since rn you need to do it on # `.__args__` for unions.. - # - `MsgSpec: Union[Type[Msg]] + # - `MsgSpec: Union[MsgType] # # -[ ] auto-genning this from new (in 3.12) type parameter lists Bo # |_ https://docs.python.org/3/reference/compound_stmts.html#type-params @@ -188,7 +188,7 @@ def mk_dec( return MsgDec( _dec=msgpack.Decoder( - type=spec, # like `Msg[Any]` + type=spec, # like `MsgType[Any]` dec_hook=dec_hook, ) ) @@ -561,7 +561,7 @@ def mk_codec( ''' # (manually) generate a msg-payload-spec for all relevant - # god-boxing-msg subtypes, parameterizing the `Msg.pld: PayloadT` + # god-boxing-msg subtypes, parameterizing the `PayloadMsg.pld: PayloadT` # for the decoder such that all sub-type msgs in our SCIPP # will automatically decode to a type-"limited" payload (`Struct`) # object (set). @@ -607,7 +607,7 @@ _def_msgspec_codec: MsgCodec = mk_codec(ipc_pld_spec=Any) # The built-in IPC `Msg` spec. # Our composing "shuttle" protocol which allows `tractor`-app code -# to use any `msgspec` supported type as the `Msg.pld` payload, +# to use any `msgspec` supported type as the `PayloadMsg.pld` payload, # https://jcristharif.com/msgspec/supported-types.html # _def_tractor_codec: MsgCodec = mk_codec( @@ -743,7 +743,7 @@ def limit_msg_spec( ) -> MsgCodec: ''' Apply a `MsgCodec` that will natively decode the SC-msg set's - `Msg.pld: Union[Type[Struct]]` payload fields using + `PayloadMsg.pld: Union[Type[Struct]]` payload fields using tagged-unions of `msgspec.Struct`s from the `payload_types` for all IPC contexts in use by the current `trio.Task`. diff --git a/tractor/msg/types.py b/tractor/msg/types.py index f8205c23..08511ec0 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -89,11 +89,12 @@ class PayloadMsg( # -[ ] `uuid.UUID` which has multi-protocol support # https://jcristharif.com/msgspec/supported-types.html#uuid - # The msgs "payload" (spelled without vowels): + # The msg's "payload" (spelled without vowels): # https://en.wikipedia.org/wiki/Payload_(computing) - # - # NOTE: inherited from any `Msg` (and maybe overriden - # by use of `limit_msg_spec()`), but by default is + pld: Raw + + # ^-NOTE-^ inherited from any `PayloadMsg` (and maybe type + # overriden via the `._ops.limit_plds()` API), but by default is # parameterized to be `Any`. # # XXX this `Union` must strictly NOT contain `Any` if @@ -106,7 +107,6 @@ class PayloadMsg( # TODO: could also be set to `msgspec.Raw` if the sub-decoders # approach is preferred over the generic parameterization # approach as take by `mk_msg_spec()` below. - pld: Raw # TODO: complete rename @@ -412,19 +412,24 @@ class Error( relay_path: list[tuple[str, str]] tb_str: str - cid: str|None = None - - # TODO: use UNSET or don't include them via + # TODO: only optionally include sub-type specfic fields? + # -[ ] use UNSET or don't include them via `omit_defaults` (see + # inheritance-line options above) # - # `ContextCancelled` + # `ContextCancelled` reports the src cancelling `Actor.uid` canceller: tuple[str, str]|None = None - # `StreamOverrun` + # `StreamOverrun`-specific src `Actor.uid` sender: tuple[str, str]|None = None - # for the `MsgTypeError` case where the receiver side - # decodes the underlying original `Msg`-subtype - _msg_dict: dict|None = None + # `MsgTypeError` meta-data + cid: str|None = None + # when the receiver side fails to decode a delivered + # `PayloadMsg`-subtype; one and/or both the msg-struct instance + # and `Any`-decoded to `dict` of the msg are set and relayed + # (back to the sender) for introspection. + _bad_msg: Started|Yield|Return|None = None + _bad_msg_as_dict: dict|None = None def from_dict_msg( @@ -436,9 +441,11 @@ def from_dict_msg( ) -> MsgType: ''' - Helper to build a specific `MsgType` struct from - a "vanilla" decoded `dict`-ified equivalent of the - msg: i.e. if the `msgpack.Decoder.type == Any`. + Helper to build a specific `MsgType` struct from a "vanilla" + decoded `dict`-ified equivalent of the msg: i.e. if the + `msgpack.Decoder.type == Any`, the default when using + `msgspec.msgpack` and not "typed decoding" using + `msgspec.Struct`. ''' msg_type_tag_field: str = ( -- 2.34.1 From 9dc7602f21967f9927ac863ff8fb9503c9c36cc7 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 28 May 2024 11:05:44 -0400 Subject: [PATCH 126/190] Fix `test_basic_payload_spec` bad msg matching Expecting `Started` or `Return` with respective bad `.pld` values depending on what type of failure is test parametrized. This makes the suite run green it seems B) --- tests/test_pldrx_limiting.py | 50 ++++++++++-------------------------- 1 file changed, 13 insertions(+), 37 deletions(-) diff --git a/tests/test_pldrx_limiting.py b/tests/test_pldrx_limiting.py index d658fb51..562164c7 100644 --- a/tests/test_pldrx_limiting.py +++ b/tests/test_pldrx_limiting.py @@ -6,30 +6,19 @@ related settings around IPC contexts. ''' from contextlib import ( asynccontextmanager as acm, - contextmanager as cm, -) -# import typing -from typing import ( - # Any, - TypeAlias, - # Union, ) from contextvars import ( Context, ) from msgspec import ( - # structs, - # msgpack, Struct, - # ValidationError, ) import pytest import trio import tractor from tractor import ( - # _state, MsgTypeError, current_ipc_ctx, Portal, @@ -40,20 +29,9 @@ from tractor.msg import ( ) from tractor.msg import ( _codec, - # _ctxvar_MsgCodec, - - # NamespacePath, - # MsgCodec, - # mk_codec, - # apply_codec, - # current_codec, ) from tractor.msg.types import ( log, - # _payload_msgs, - # PayloadMsg, - # Started, - # mk_msg_spec, ) @@ -64,23 +42,10 @@ class PldMsg(Struct): maybe_msg_spec = PldMsg|None -@cm -def custom_spec( - ctx: Context, - spec: TypeAlias, -) -> _codec.MsgCodec: - ''' - Apply a custom payload spec, remove on exit. - - ''' - rx: msgops.PldRx = ctx._pld_rx - - @acm async def maybe_expect_raises( raises: BaseException|None = None, ensure_in_message: list[str]|None = None, - reraise: bool = False, timeout: int = 3, ) -> None: @@ -271,6 +236,17 @@ def test_basic_payload_spec( # since not opened yet. assert current_ipc_ctx() is None + if invalid_started: + msg_type_str: str = 'Started' + bad_value_str: str = '10' + elif invalid_return: + msg_type_str: str = 'Return' + bad_value_str: str = "'yo'" + else: + # XXX but should never be used below then.. + msg_type_str: str = '' + bad_value_str: str = '' + async with ( maybe_expect_raises( raises=MsgTypeError if ( @@ -279,8 +255,8 @@ def test_basic_payload_spec( invalid_started ) else None, ensure_in_message=[ - "invalid `Return` payload", - "value: `'yo'` does not match type-spec: `Return.pld: PldMsg|NoneType`", + f"invalid `{msg_type_str}` payload", + f"value: `{bad_value_str}` does not match type-spec: `{msg_type_str}.pld: PldMsg|NoneType`", ], ), p.open_context( -- 2.34.1 From b992ff73da6868989a09534c37f3385e9d053e2c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 28 May 2024 11:08:27 -0400 Subject: [PATCH 127/190] Factor `.started()` validation into `.msg._ops` Filling out the helper `validate_payload_msg()` staged in a prior commit and adjusting all imports to match. Also add a `raise_mte: bool` flag for potential usage where the caller wants to handle the MTE instance themselves. --- tractor/_context.py | 57 +++++++---------------------------------- tractor/msg/_ops.py | 62 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 67 insertions(+), 52 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index fed9f2bc..68a23da0 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -58,9 +58,6 @@ from typing import ( import warnings # ------ - ------ import trio -from msgspec import ( - ValidationError, -) # ------ - ------ from ._exceptions import ( ContextCancelled, @@ -78,19 +75,16 @@ from .log import ( from .msg import ( Error, MsgType, - MsgCodec, NamespacePath, PayloadT, Started, Stop, Yield, - current_codec, pretty_struct, _ops as msgops, ) from ._ipc import ( Channel, - _mk_msg_type_err, ) from ._streaming import MsgStream from ._state import ( @@ -1657,54 +1651,21 @@ class Context: # __tracebackhide__: bool = hide_tb if validate_pld_spec: - # __tracebackhide__: bool = False - codec: MsgCodec = current_codec() - msg_bytes: bytes = codec.encode(started_msg) - try: - roundtripped: Started = codec.decode(msg_bytes) - # pld: PayloadT = await self.pld_rx.recv_pld( - pld: PayloadT = self.pld_rx.dec_msg( - msg=roundtripped, - ipc=self, - expect_msg=Started, - hide_tb=hide_tb, - is_started_send_side=True, - ) - if ( - strict_pld_parity - and - pld != value - ): - # TODO: make that one a mod func too.. - diff = pretty_struct.Struct.__sub__( - roundtripped, - started_msg, - ) - complaint: str = ( - 'Started value does not match after roundtrip?\n\n' - f'{diff}' - ) - raise ValidationError(complaint) - - # raise any msg type error NO MATTER WHAT! - except ValidationError as verr: - # always show this src frame in the tb - # __tracebackhide__: bool = False - raise _mk_msg_type_err( - msg=roundtripped, - codec=codec, - src_validation_error=verr, - verb_header='Trying to send ', - is_invalid_payload=True, - ) from verr + msgops.validate_payload_msg( + pld_msg=started_msg, + pld_value=value, + ipc=self, + strict_pld_parity=strict_pld_parity, + hide_tb=hide_tb, + ) # TODO: maybe a flag to by-pass encode op if already done # here in caller? await self.chan.send(started_msg) # set msg-related internal runtime-state - self._started_called = True - self._started_msg = started_msg + self._started_called: bool = True + self._started_msg: Started = started_msg self._started_pld = value async def _drain_overflows( diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 6faf78ef..e22d39f1 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -53,6 +53,8 @@ from tractor._state import current_ipc_ctx from ._codec import ( mk_dec, MsgDec, + MsgCodec, + current_codec, ) from .types import ( CancelAck, @@ -737,9 +739,61 @@ async def drain_to_final_msg( ) -# TODO: factor logic from `.Context.started()` for send-side -# validate raising! def validate_payload_msg( - msg: Started|Yield|Return, + pld_msg: Started|Yield|Return, + pld_value: PayloadT, + ipc: Context|MsgStream, + + raise_mte: bool = True, + strict_pld_parity: bool = False, + hide_tb: bool = True, + ) -> MsgTypeError|None: - ... + ''' + Validate a `PayloadMsg.pld` value with the current + IPC ctx's `PldRx` and raise an appropriate `MsgTypeError` + on failure. + + ''' + __tracebackhide__: bool = hide_tb + codec: MsgCodec = current_codec() + msg_bytes: bytes = codec.encode(pld_msg) + try: + roundtripped: Started = codec.decode(msg_bytes) + ctx: Context = getattr(ipc, 'ctx', ipc) + pld: PayloadT = ctx.pld_rx.dec_msg( + msg=roundtripped, + ipc=ipc, + expect_msg=Started, + hide_tb=hide_tb, + is_started_send_side=True, + ) + if ( + strict_pld_parity + and + pld != pld_value + ): + # TODO: make that one a mod func too.. + diff = pretty_struct.Struct.__sub__( + roundtripped, + pld_msg, + ) + complaint: str = ( + 'Started value does not match after roundtrip?\n\n' + f'{diff}' + ) + raise ValidationError(complaint) + + # raise any msg type error NO MATTER WHAT! + except ValidationError as verr: + mte: MsgTypeError = _mk_msg_type_err( + msg=roundtripped, + codec=codec, + src_validation_error=verr, + verb_header='Trying to send ', + is_invalid_payload=True, + ) + if not raise_mte: + return mte + + raise mte from verr -- 2.34.1 From 7bde00d7117a1cad44b8eb2b6c708ec30272aab2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 28 May 2024 15:30:30 -0400 Subject: [PATCH 128/190] Set remote errors in `_raise_from_unexpected_msg()` By calling `Context._maybe_cancel_and_set_remote_error(exc)` on any unpacked `Error` msg; provides for `Context.maybe_error` consistency to match all other error delivery cases. --- tractor/_exceptions.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 85957356..0dfaf675 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -902,7 +902,7 @@ def pack_error( tb_str: str = ( ''.join(traceback.format_exception(exc)) - # TODO: can we remove this is `exc` is required? + # TODO: can we remove this since `exc` is required.. right? or # NOTE: this is just a shorthand for the "last error" as # provided by `sys.exeception()`, see: @@ -917,8 +917,8 @@ def pack_error( # when caller provides a tb instance (say pulled from some other # src error's `.__traceback__`) we use that as the "boxed" # tb-string instead. + # https://docs.python.org/3/library/traceback.html#traceback.format_list if tb: - # https://docs.python.org/3/library/traceback.html#traceback.format_list tb_str: str = ''.join(traceback.format_tb(tb)) + tb_str error_msg: dict[ # for IPC @@ -961,15 +961,15 @@ def pack_error( error_msg['src_type_str'] = type(exc).__name__ error_msg['boxed_type_str'] = type(exc).__name__ - # XXX alawys append us the last relay in error propagation path + # XXX always append us the last relay in error propagation path error_msg.setdefault( 'relay_path', [], ).append(our_uid) - # XXX NOTE: always ensure the traceback-str is from the - # locally raised error (**not** the prior relay's boxed - # content's in `._ipc_msg.tb_str`). + # XXX NOTE XXX always ensure the traceback-str content is from + # the locally raised error (so, NOT the prior relay's boxed + # `._ipc_msg.tb_str`). error_msg['tb_str'] = tb_str if cid is not None: @@ -1109,6 +1109,7 @@ def _raise_from_unexpected_msg( msg, ctx.chan, ) + ctx._maybe_cancel_and_set_remote_error(exc) raise exc from src_err # `MsgStream` termination msg. -- 2.34.1 From a0091b77d8aa7d9d0a6bf21cd3596dd66a60933b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 28 May 2024 15:52:54 -0400 Subject: [PATCH 129/190] Raise send-side MTEs inline in `PldRx.dec_msg()` So when `is_started_send_side is True` we raise the newly created `MsgTypeError` (MTE) directly instead of doing all the `Error`-msg pack and unpack to raise stuff via `_raise_from_unexpected_msg()` since the raise should happen send side anyway and so doesn't emulate any remote fault like in a bad `Return` or `Started` without send-side pld-spec validation. Oh, and proxy-through the `hide_tb: bool` input from `.drain_to_final_msg()` to `.recv_msg_w_pld()`. --- tractor/msg/_ops.py | 75 ++++++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 28 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index e22d39f1..6f472afd 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -215,6 +215,9 @@ class PldRx(Struct): **dec_msg_kwargs, ) + # TODO: rename to, + # -[ ] `.decode_pld()`? + # -[ ] `.dec_pld()`? def dec_msg( self, msg: MsgType, @@ -248,8 +251,8 @@ class PldRx(Struct): pld: PayloadT = self._pld_dec.decode(pld) log.runtime( 'Decoded msg payload\n\n' - f'{msg}\n\n' - f'where payload is\n' + f'{msg}\n' + f'where payload decoded as\n' f'|_pld={pld!r}\n' ) return pld @@ -265,13 +268,7 @@ class PldRx(Struct): src_validation_error=valerr, is_invalid_payload=True, expected_msg=expect_msg, - # ipc_msg=msg, ) - # NOTE: override the `msg` passed to - # `_raise_from_unexpected_msg()` (below) so so that - # we're effectively able to use that same func to - # unpack and raise an "emulated remote `Error`" of - # this local MTE. err_msg: Error = pack_error( exc=mte, cid=msg.cid, @@ -283,34 +280,55 @@ class PldRx(Struct): # tb=valerr.__traceback__, tb_str=mte._message, ) - # ^-TODO-^ just raise this inline instead of all the - # pack-unpack-repack non-sense! - mte._ipc_msg = err_msg - msg = err_msg - # set emulated remote error more-or-less as the - # runtime would - ctx: Context = getattr(ipc, 'ctx', ipc) + # NOTE: just raise the MTE inline instead of all + # the pack-unpack-repack non-sense when this is + # a "send side" validation error. + if is_started_send_side: + raise mte + + # XXX TODO: remove this right? + # => any bad stated/return values should + # always be treated a remote errors right? + # + # if ( + # expect_msg is Return + # or expect_msg is Started + # ): + # # set emulated remote error more-or-less as the + # # runtime would + # ctx: Context = getattr(ipc, 'ctx', ipc) + # ctx._maybe_cancel_and_set_remote_error(mte) + + + # XXX override the `msg` passed to + # `_raise_from_unexpected_msg()` (below) so so + # that we're effectively able to use that same + # func to unpack and raise an "emulated remote + # `Error`" of this local MTE. + msg = err_msg + # XXX NOTE: so when the `_raise_from_unexpected_msg()` + # raises the boxed `err_msg` from above it raises + # it from the above caught interchange-lib + # validation error. + src_err = valerr # TODO: should we instead make this explicit and # use the above masked `is_started_send_decode`, # expecting the `Context.started()` caller to set # it? Rn this is kinda, howyousayyy, implicitly # edge-case-y.. - if ( - expect_msg is not Started - and not is_started_send_side - ): - ctx._maybe_cancel_and_set_remote_error(mte) - - # XXX NOTE: so when the `_raise_from_unexpected_msg()` - # raises the boxed `err_msg` from above it raises - # it from `None`. - src_err = valerr - # if is_started_send_side: - # src_err = None - + # TODO: remove this since it's been added to + # `_raise_from_unexpected_msg()`..? + # if ( + # expect_msg is not Started + # and not is_started_send_side + # ): + # # set emulated remote error more-or-less as the + # # runtime would + # ctx: Context = getattr(ipc, 'ctx', ipc) + # ctx._maybe_cancel_and_set_remote_error(mte) # XXX some other decoder specific failure? # except TypeError as src_error: @@ -561,6 +579,7 @@ async def drain_to_final_msg( ipc=ctx, expect_msg=Return, raise_error=False, + hide_tb=hide_tb, ) # ^-TODO-^ some bad ideas? # -[ ] wrap final outcome .receive() in a scope so -- 2.34.1 From b3387aca6178db4c5d5f0dbb4477caeb03bbde0a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 28 May 2024 16:03:36 -0400 Subject: [PATCH 130/190] Don't (noisly) log about runtime cancel RPC tasks Since in the case of the `Actor._cancel_task()` related runtime eps we actually don't EVER register them in `Actor._rpc_tasks`.. logging about them is just needless noise, though maybe we should track them in a diff table; something like a `._runtime_rpc_tasks`? Drop the cancel-request-for-stale-RPC-task (`KeyError` case in `Actor._cancel_task()`) log-emit level in to `.runtime()`; it's generally not useful info other then for granular race condition eval when hacking the runtime. --- tractor/_rpc.py | 18 ++++++++++++------ tractor/_runtime.py | 7 ++++--- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 1ea9bce3..84b81c32 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -395,12 +395,18 @@ async def _errors_relayed_via_ipc( f'|_{ctx._task}\n' f' >> {ctx.repr_rpc}\n' ) - else: - log.cancel( - 'Failed to de-alloc internal runtime cancel task?\n' - f'|_{ctx._task}\n' - f' >> {ctx.repr_rpc}\n' - ) + # TODO: remove this right? rn the only non-`is_rpc` cases + # are cancellation methods and according the RPC loop eps + # for thoses below, nothing is ever registered in + # `Actor._rpc_tasks` for those cases.. but should we? + # + # -[ ] maybe we should have an equiv `Actor._runtime_rpc_tasks`? + # else: + # log.cancel( + # 'Failed to de-alloc internal runtime cancel task?\n' + # f'|_{ctx._task}\n' + # f' >> {ctx.repr_rpc}\n' + # ) finally: if not actor._rpc_tasks: diff --git a/tractor/_runtime.py b/tractor/_runtime.py index f267ff67..fe2ecc00 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -1258,9 +1258,10 @@ class Actor: # - child returns a result before cancel-msg/ctxc-raised # - child self raises ctxc before parent send request, # - child errors prior to cancel req. - log.cancel( - 'Cancel request invalid, RPC task already completed?\n\n' - f'<= canceller: {requesting_uid}\n\n' + log.runtime( + 'Cancel request for invalid RPC task.\n' + 'The task likely already completed or was never started!\n\n' + f'<= canceller: {requesting_uid}\n' f'=> {cid}@{parent_chan.uid}\n' f' |_{parent_chan}\n' ) -- 2.34.1 From 4d935dcfb03dafbeb386ab674a240075ffe2a098 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 28 May 2024 16:11:01 -0400 Subject: [PATCH 131/190] Raise remote errors rxed during `Context` child-sync More specifically, if `.open_context()` is cancelled when awaiting the first `Context.started()` during the child task sync phase, check to see if it was due to `._scope.cancel_called` and raise any remote error via `.maybe_raise()` instead the `trio.Cancelled` like in every other remote-error handling case. Ensure we set `._scope[_nursery]` only after the `Started` has arrived and audited. --- tractor/_context.py | 74 ++++++++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 68a23da0..291bed16 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -664,7 +664,7 @@ class Context: 'Setting remote error for ctx\n\n' f'<= {self.peer_side!r}: {self.chan.uid}\n' f'=> {self.side!r}: {self._actor.uid}\n\n' - f'{error}' + f'{error!r}' ) self._remote_error: BaseException = error @@ -718,7 +718,7 @@ class Context: log.error( f'Remote context error:\n\n' # f'{pformat(self)}\n' - f'{error}' + f'{error!r}' ) if self._canceller is None: @@ -742,26 +742,27 @@ class Context: and not cs.cancel_called and not cs.cancelled_caught ): - if not ( + if ( msgerr # NOTE: we allow user to config not cancelling the # local scope on `MsgTypeError`s - and not self._cancel_on_msgerr + and + not self._cancel_on_msgerr ): - # TODO: it'd sure be handy to inject our own - # `trio.Cancelled` subtype here ;) - # https://github.com/goodboy/tractor/issues/368 - message: str = 'Cancelling `Context._scope` !\n\n' - self._scope.cancel() - - else: message: str = ( 'NOT Cancelling `Context._scope` since,\n' f'Context._cancel_on_msgerr = {self._cancel_on_msgerr}\n\n' f'AND we got a msg-type-error!\n' f'{error}\n' ) + else: + # TODO: it'd sure be handy to inject our own + # `trio.Cancelled` subtype here ;) + # https://github.com/goodboy/tractor/issues/368 + message: str = 'Cancelling `Context._scope` !\n\n' + self._scope.cancel() + else: message: str = 'NOT cancelling `Context._scope` !\n\n' # from .devx import mk_pdb @@ -2058,6 +2059,12 @@ async def open_context_from_portal( if maybe_msgdec: assert maybe_msgdec.pld_spec == pld_spec + # NOTE: this in an implicit runtime nursery used to, + # - start overrun queuing tasks when as well as + # for cancellation of the scope opened by the user. + ctx._scope_nursery: trio.Nursery = tn + ctx._scope: trio.CancelScope = tn.cancel_scope + # XXX NOTE since `._scope` is NOT set BEFORE we retreive the # `Started`-msg any cancellation triggered # in `._maybe_cancel_and_set_remote_error()` will @@ -2065,25 +2072,42 @@ async def open_context_from_portal( # -> it's expected that if there is an error in this phase of # the dialog, the `Error` msg should be raised from the `msg` # handling block below. - started_msg, first = await ctx._pld_rx.recv_msg_w_pld( - ipc=ctx, - expect_msg=Started, - passthrough_non_pld_msgs=False, - hide_tb=hide_tb, - ) + try: + started_msg, first = await ctx._pld_rx.recv_msg_w_pld( + ipc=ctx, + expect_msg=Started, + passthrough_non_pld_msgs=False, + hide_tb=hide_tb, + ) + except trio.Cancelled as taskc: + ctx_cs: trio.CancelScope = ctx._scope + if not ctx_cs.cancel_called: + raise + + # from .devx import pause + # await pause(shield=True) + + log.cancel( + 'IPC ctx was cancelled during "child" task sync due to\n\n' + f'{ctx.maybe_error}\n' + ) + # OW if the ctx's scope was cancelled manually, + # likely the `Context` was cancelled via a call to + # `._maybe_cancel_and_set_remote_error()` so ensure + # we raise the underlying `._remote_error` directly + # instead of bubbling that taskc. + ctx.maybe_raise() + + # OW, some other unexpected cancel condition + # that should prolly never happen right? + raise InternalError( + 'Invalid cancellation during IPC ctx sync phase?\n' + ) from taskc - # from .devx import pause - # await pause() ctx._started_called: bool = True ctx._started_msg: bool = started_msg ctx._started_pld: bool = first - # NOTE: this in an implicit runtime nursery used to, - # - start overrun queuing tasks when as well as - # for cancellation of the scope opened by the user. - ctx._scope_nursery: trio.Nursery = tn - ctx._scope: trio.CancelScope = tn.cancel_scope - # deliver context instance and .started() msg value # in enter tuple. yield ctx, first -- 2.34.1 From fa2893cc874dfaf31387e8c4b76c179783bd0d51 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 28 May 2024 19:46:42 -0400 Subject: [PATCH 132/190] Ensure ctx error-state matches the MTE scenario Namely checking that `Context._remote_error` is set to the raised MTE in the invalid started and return value cases since prior to the recent underlying changes to the `Context.result()` impl, it would not match. Further, - do asserts for non-MTE raising cases in both the parent and child. - add todos for testing ctx-outcomes for per-side-validation policies i anticipate supporting and implied msg-dialog race cases therein. --- tests/test_pldrx_limiting.py | 89 ++++++++++++++++++++++++++++-------- 1 file changed, 71 insertions(+), 18 deletions(-) diff --git a/tests/test_pldrx_limiting.py b/tests/test_pldrx_limiting.py index 562164c7..ee61dca0 100644 --- a/tests/test_pldrx_limiting.py +++ b/tests/test_pldrx_limiting.py @@ -148,12 +148,44 @@ async def child( # propagate to parent? if raise_on_started_mte: raise - else: - if expect_started_mte: - raise RuntimeError( - 'Child-ctx-task SHOULD HAVE raised an MTE for\n\n' - f'{started_value!r}\n' - ) + + # no-send-side-error fallthrough + if ( + validate_pld_spec + and + expect_started_mte + ): + raise RuntimeError( + 'Child-ctx-task SHOULD HAVE raised an MTE for\n\n' + f'{started_value!r}\n' + ) + + assert ( + not expect_started_mte + or + not validate_pld_spec + ) + + # if wait_for_parent_to_cancel: + # ... + # + # ^-TODO-^ logic for diff validation policies on each side: + # + # -[ ] ensure that if we don't validate on the send + # side, that we are eventually error-cancelled by our + # parent due to the bad `Started` payload! + # -[ ] the boxed error should be srced from the parent's + # runtime NOT ours! + # -[ ] we should still error on bad `return_value`s + # despite the parent not yet error-cancelling us? + # |_ how do we want the parent side to look in that + # case? + # -[ ] maybe the equiv of "during handling of the + # above error another occurred" for the case where + # the parent sends a MTE to this child and while + # waiting for the child to terminate it gets back + # the MTE for this case? + # # XXX should always fail on recv side since we can't # really do much else beside terminate and relay the @@ -247,13 +279,17 @@ def test_basic_payload_spec( msg_type_str: str = '' bad_value_str: str = '' + maybe_mte: MsgTypeError|None = None + should_raise: Exception|None = ( + MsgTypeError if ( + invalid_return + or + invalid_started + ) else None + ) async with ( maybe_expect_raises( - raises=MsgTypeError if ( - invalid_return - or - invalid_started - ) else None, + raises=should_raise, ensure_in_message=[ f"invalid `{msg_type_str}` payload", f"value: `{bad_value_str}` does not match type-spec: `{msg_type_str}.pld: PldMsg|NoneType`", @@ -274,18 +310,35 @@ def test_basic_payload_spec( assert first.field == 'yo' try: - assert (await ctx.result()) is None + res: None|PldMsg = await ctx.result(hide_tb=False) + assert res is None except MsgTypeError as mte: + maybe_mte = mte if not invalid_return: raise - else: # expected this invalid `Return.pld` - assert mte.cid == ctx.cid + # expected this invalid `Return.pld` so audit + # the error state + meta-data + assert mte.expected_msg_type is Return + assert mte.cid == ctx.cid - # verify expected remote mte deats - await tractor.pause() - assert ctx._remote_error is mte - assert mte.expected_msg_type is Return + # verify expected remote mte deats + try: + assert ctx._local_error is None + assert ( + mte is + ctx._remote_error is + ctx.maybe_error is + ctx.outcome + ) + except: + # XXX should never get here.. + await tractor.pause(shield=True) + raise + + + if should_raise is None: + assert maybe_mte is None await p.cancel_actor() -- 2.34.1 From 4b843d6219d5f35e3c1365d26ef521a37a5e0df7 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 01:11:29 -0400 Subject: [PATCH 133/190] Ensure only a boxed traceback for MTE on parent side --- tests/test_pldrx_limiting.py | 58 ++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/tests/test_pldrx_limiting.py b/tests/test_pldrx_limiting.py index ee61dca0..0c5c7ee8 100644 --- a/tests/test_pldrx_limiting.py +++ b/tests/test_pldrx_limiting.py @@ -53,6 +53,9 @@ async def maybe_expect_raises( Async wrapper for ensuring errors propagate from the inner scope. ''' + if tractor._state.debug_mode(): + timeout += 999 + with trio.fail_after(timeout): try: yield @@ -68,9 +71,10 @@ async def maybe_expect_raises( # maybe check for error txt content if ensure_in_message: part: str + err_repr: str = repr(inner_err) for part in ensure_in_message: for i, arg in enumerate(inner_err.args): - if part in arg: + if part in err_repr: break # if part never matches an arg, then we're # missing a match. @@ -97,7 +101,7 @@ async def child( ctx: Context, started_value: int|PldMsg|None, return_value: str|None, - validate_pld_spec: bool, + validate_pld_spec: bool, raise_on_started_mte: bool = True, ) -> None: @@ -131,13 +135,15 @@ async def child( # 2 cases: hdndle send-side and recv-only validation # - when `raise_on_started_mte == True`, send validate # - else, parent-recv-side only validation + mte: MsgTypeError|None = None try: await ctx.started( value=started_value, validate_pld_spec=validate_pld_spec, ) - except MsgTypeError: + except MsgTypeError as _mte: + mte = _mte log.exception('started()` raised an MTE!\n') if not expect_started_mte: raise RuntimeError( @@ -145,6 +151,19 @@ async def child( f'{started_value!r}\n' ) + boxed_div: str = '------ - ------' + assert boxed_div not in mte._message + assert boxed_div not in mte.tb_str + assert boxed_div not in repr(mte) + assert boxed_div not in str(mte) + mte_repr: str = repr(mte) + for line in mte.message.splitlines(): + assert line in mte_repr + + # since this is a *local error* there should be no + # boxed traceback content! + assert not mte.tb_str + # propagate to parent? if raise_on_started_mte: raise @@ -208,8 +227,8 @@ async def child( @pytest.mark.parametrize( 'return_value', [ - None, 'yo', + None, ], ids=[ 'return[invalid-"yo"]', @@ -291,8 +310,9 @@ def test_basic_payload_spec( maybe_expect_raises( raises=should_raise, ensure_in_message=[ - f"invalid `{msg_type_str}` payload", - f"value: `{bad_value_str}` does not match type-spec: `{msg_type_str}.pld: PldMsg|NoneType`", + f"invalid `{msg_type_str}` msg payload", + f"value: `{bad_value_str}` does not " + f"match type-spec: `{msg_type_str}.pld: PldMsg|NoneType`", ], ), p.open_context( @@ -321,21 +341,21 @@ def test_basic_payload_spec( # the error state + meta-data assert mte.expected_msg_type is Return assert mte.cid == ctx.cid + mte_repr: str = repr(mte) + for line in mte.message.splitlines(): + assert line in mte_repr + + assert mte.tb_str + # await tractor.pause(shield=True) # verify expected remote mte deats - try: - assert ctx._local_error is None - assert ( - mte is - ctx._remote_error is - ctx.maybe_error is - ctx.outcome - ) - except: - # XXX should never get here.. - await tractor.pause(shield=True) - raise - + assert ctx._local_error is None + assert ( + mte is + ctx._remote_error is + ctx.maybe_error is + ctx.outcome + ) if should_raise is None: assert maybe_mte is None -- 2.34.1 From 4b3c6b7e3951dff548c99b1c01d4f12836e4fe96 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 09:05:23 -0400 Subject: [PATCH 134/190] Don't need to pack an `Error` with send-side MTEs --- tractor/msg/_ops.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 6f472afd..97cd3f29 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -269,19 +269,6 @@ class PldRx(Struct): is_invalid_payload=True, expected_msg=expect_msg, ) - err_msg: Error = pack_error( - exc=mte, - cid=msg.cid, - src_uid=( - ipc.chan.uid - if not is_started_send_side - else ipc._actor.uid - ), - # tb=valerr.__traceback__, - tb_str=mte._message, - ) - mte._ipc_msg = err_msg - # NOTE: just raise the MTE inline instead of all # the pack-unpack-repack non-sense when this is # a "send side" validation error. @@ -301,6 +288,22 @@ class PldRx(Struct): # ctx: Context = getattr(ipc, 'ctx', ipc) # ctx._maybe_cancel_and_set_remote_error(mte) + # NOTE: the `.message` is automatically + # transferred into the message as long as we + # define it as a `Error.message` field. + err_msg: Error = pack_error( + exc=mte, + cid=msg.cid, + src_uid=( + ipc.chan.uid + if not is_started_send_side + else ipc._actor.uid + ), + # tb=valerr.__traceback__, + # tb_str=mte._message, + # message=mte._message, + ) + mte._ipc_msg = err_msg # XXX override the `msg` passed to # `_raise_from_unexpected_msg()` (below) so so -- 2.34.1 From 830df00152a4e5fb60a46457d6e7cc0a1c275ec4 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 09:06:10 -0400 Subject: [PATCH 135/190] Fix missing newline in task-cancel log-message --- tractor/_rpc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 84b81c32..e4cc5750 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -391,7 +391,7 @@ async def _errors_relayed_via_ipc( # cancel scope will not have been inserted yet if is_rpc: log.warning( - 'RPC task likely errored or cancelled before start?' + 'RPC task likely errored or cancelled before start?\n' f'|_{ctx._task}\n' f' >> {ctx.repr_rpc}\n' ) -- 2.34.1 From 56a46b1bf04f670762c27e1b55eebc15b8b5216f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 09:14:04 -0400 Subject: [PATCH 136/190] Add `Error.message: str` Allows passing a custom error msg other then the traceback-str over the wire. Make `.tb_str` optional (in the blank `''` sense) since it's treated that way thus far in `._exceptions.pack_error()`. --- tractor/msg/types.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 08511ec0..ad6d6fb8 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -410,7 +410,13 @@ class Error( src_type_str: str boxed_type_str: str relay_path: list[tuple[str, str]] - tb_str: str + + # normally either both are provided or just + # a message for certain special cases where + # we pack a message for a locally raised + # mte or ctxc. + message: str|None = None + tb_str: str = '' # TODO: only optionally include sub-type specfic fields? # -[ ] use UNSET or don't include them via `omit_defaults` (see -- 2.34.1 From 2e522d003f7acfa9669d24d5fad1ae8f71af12a5 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 10:04:54 -0400 Subject: [PATCH 137/190] Better RAE `.pformat()`-ing for send-side MTEs Send-side `MsgTypeError`s actually shouldn't have any "boxed" traceback per say since they're raised in the transmitting actor's local task env and we (normally) don't want the ascii decoration added around the error's `._message: str`, that is not until the exc is `pack_error()`-ed before transit. As such, the presentation of an embedded traceback (and its ascii box) gets bypassed when only a `._message: str` is set (as we now do for pld-spec failures in `_mk_msg_type_err()`). Further this tweaks the `.pformat()` output to include the `._message` part to look like ` ) ..` instead of jamming it implicitly to the end of the embedded `.tb_str` (as was done implicitly by `unpack_error()`) and also adds better handling for the `with_type_header == False` case including forcing that case when we detect that the currently handled exc is the RAE in `.pformat()`. Toss in a lengthier doc-str explaining it all. Surrounding/supporting changes, - better `unpack_error()` message which just briefly reports the remote task's error type. - add public `.message: str` prop. - always set a `._extra_msgdata: dict` since some MTE props rely on it. - handle `.boxed_type == None` for `.boxed_type_str`. - maybe pack any detected input or `exc.message` in `pack_error()`. - comment cruft cleanup in `_mk_msg_type_err()`. --- tractor/_exceptions.py | 199 +++++++++++++++++++++++++---------------- 1 file changed, 124 insertions(+), 75 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 0dfaf675..52048c17 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -22,6 +22,7 @@ from __future__ import annotations import builtins import importlib from pprint import pformat +import sys from types import ( TracebackType, ) @@ -110,6 +111,7 @@ _body_fields: list[str] = list( 'tb_str', 'relay_path', 'cid', + 'message', # only ctxc should show it but `Error` does # have it as an optional field. @@ -236,6 +238,7 @@ class RemoteActorError(Exception): self._boxed_type: BaseException = boxed_type self._src_type: BaseException|None = None self._ipc_msg: Error|None = ipc_msg + self._extra_msgdata = extra_msgdata if ( extra_msgdata @@ -250,8 +253,6 @@ class RemoteActorError(Exception): k, v, ) - else: - self._extra_msgdata = extra_msgdata # TODO: mask out eventually or place in `pack_error()` # pre-`return` lines? @@ -282,6 +283,17 @@ class RemoteActorError(Exception): # ensure any roundtripping evals to the input value assert self.boxed_type is boxed_type + @property + def message(self) -> str: + ''' + Be explicit, instead of trying to read it from the the parent + type's loosely defined `.args: tuple`: + + https://docs.python.org/3/library/exceptions.html#BaseException.args + + ''' + return self._message + @property def ipc_msg(self) -> Struct: ''' @@ -355,7 +367,10 @@ class RemoteActorError(Exception): ''' bt: Type[BaseException] = self.boxed_type - return str(bt.__name__) + if bt: + return str(bt.__name__) + + return '' @property def boxed_type(self) -> Type[BaseException]: @@ -426,8 +441,7 @@ class RemoteActorError(Exception): for key in fields: if ( - key == 'relay_uid' - and not self.is_inception() + key == 'relay_uid' and not self.is_inception() ): continue @@ -504,19 +518,80 @@ class RemoteActorError(Exception): def pformat( self, with_type_header: bool = True, + # with_ascii_box: bool = True, + ) -> str: ''' - Nicely formatted boxed error meta data + traceback, OR just - the normal message from `.args` (for eg. as you'd want shown - by a locally raised `ContextCancelled`). + Format any boxed remote error by multi-line display of, + + - error's src or relay actor meta-data, + - remote runtime env's traceback, + + With optional control over the format of, + + - whether the boxed traceback is ascii-decorated with + a surrounding "box" annotating the embedded stack-trace. + - if the error's type name should be added as margins + around the field and tb content like: + + `> .. )>` + + - the placement of the `.message: str` (explicit equiv of + `.args[0]`), either placed below the `.tb_str` or in the + first line's header when the error is raised locally (since + the type name is already implicitly shown by python). ''' header: str = '' body: str = '' + message: str = '' + # XXX when the currently raised exception is this instance, + # we do not ever use the "type header" style repr. + is_being_raised: bool = False + if ( + (exc := sys.exception()) + and + exc is self + ): + is_being_raised: bool = True + + with_type_header: bool = ( + with_type_header + and + not is_being_raised + ) + + # style if with_type_header: - header: str = f'<{type(self).__name__}(\n' + header: str = f'<{type(self).__name__}(' + if message := self._message: + + # split off the first line so, if needed, it isn't + # indented the same like the "boxed content" which + # since there is no `.tb_str` is just the `.message`. + lines: list[str] = message.splitlines() + first: str = lines[0] + message: str = message.removeprefix(first) + + # with a type-style header we, + # - have no special message "first line" extraction/handling + # - place the message a space in from the header: + # `MsgTypeError( ..` + # ^-here + # - indent the `.message` inside the type body. + if with_type_header: + first = f' {first} )>' + + message: str = textwrap.indent( + message, + prefix=' '*2, + ) + message: str = first + message + + # IFF there is an embedded traceback-str we always + # draw the ascii-box around it. if tb_str := self.tb_str: fields: str = self._mk_fields_str( _body_fields @@ -535,36 +610,19 @@ class RemoteActorError(Exception): # |___ .. tb_body_indent=1, ) - if not with_type_header: - body = '\n' + body - elif message := self._message: - # split off the first line so it isn't indented - # the same like the "boxed content". - if not with_type_header: - lines: list[str] = message.splitlines() - first: str = lines[0] - message: str = message.removeprefix(first) - - else: - first: str = '' - - body: str = ( - first - + - message - + - '\n' - ) - - if with_type_header: - tail: str = ')>' - else: - tail = '' + tail = '' + if ( + with_type_header + and not message + ): + tail: str = '>' return ( header + + message + + f'{body}' + tail @@ -577,7 +635,9 @@ class RemoteActorError(Exception): # |_ i guess `pexepect` relies on `str`-casing # of output? def __str__(self) -> str: - return self.pformat(with_type_header=False) + return self.pformat( + with_type_header=False + ) def unwrap( self, @@ -825,9 +885,6 @@ class MsgTypeError( extra_msgdata['_bad_msg'] = bad_msg extra_msgdata['cid'] = bad_msg.cid - if 'cid' not in extra_msgdata: - import pdbp; pdbp.set_trace() - return cls( message=message, boxed_type=cls, @@ -889,6 +946,7 @@ def pack_error( src_uid: tuple[str, str]|None = None, tb: TracebackType|None = None, tb_str: str = '', + message: str = '', ) -> Error: ''' @@ -971,7 +1029,7 @@ def pack_error( # the locally raised error (so, NOT the prior relay's boxed # `._ipc_msg.tb_str`). error_msg['tb_str'] = tb_str - + error_msg['message'] = message or getattr(exc, 'message', '') if cid is not None: error_msg['cid'] = cid @@ -995,26 +1053,24 @@ def unpack_error( if not isinstance(msg, Error): return None - # retrieve the remote error's msg-encoded details - tb_str: str = msg.tb_str - message: str = ( - f'{chan.uid}\n' - + - tb_str - ) - # try to lookup a suitable error type from the local runtime # env then use it to construct a local instance. # boxed_type_str: str = error_dict['boxed_type_str'] boxed_type_str: str = msg.boxed_type_str boxed_type: Type[BaseException] = get_err_type(boxed_type_str) - if boxed_type_str == 'ContextCancelled': - box_type = ContextCancelled - assert boxed_type is box_type + # retrieve the error's msg-encoded remotoe-env info + message: str = f'remote task raised a {msg.boxed_type_str!r}\n' - elif boxed_type_str == 'MsgTypeError': - box_type = MsgTypeError + # TODO: do we even really need these checks for RAEs? + if boxed_type_str in [ + 'ContextCancelled', + 'MsgTypeError', + ]: + box_type = { + 'ContextCancelled': ContextCancelled, + 'MsgTypeError': MsgTypeError, + }[boxed_type_str] assert boxed_type is box_type # TODO: already included by `_this_mod` in else loop right? @@ -1029,19 +1085,21 @@ def unpack_error( exc = box_type( message, ipc_msg=msg, + tb_str=msg.tb_str, ) return exc -def is_multi_cancelled(exc: BaseException) -> bool: +def is_multi_cancelled( + exc: BaseException|BaseExceptionGroup +) -> bool: ''' Predicate to determine if a possible ``BaseExceptionGroup`` contains only ``trio.Cancelled`` sub-exceptions (and is likely the result of cancelling a collection of subtasks. ''' - # if isinstance(exc, eg.BaseExceptionGroup): if isinstance(exc, BaseExceptionGroup): return exc.subgroup( lambda exc: isinstance(exc, trio.Cancelled) @@ -1184,7 +1242,6 @@ def _mk_msg_type_err( src_validation_error: ValidationError|None = None, src_type_error: TypeError|None = None, is_invalid_payload: bool = False, - # src_err_msg: Error|None = None, **mte_kwargs, @@ -1251,19 +1308,11 @@ def _mk_msg_type_err( msg_type: str = type(msg) any_pld: Any = msgpack.decode(msg.pld) message: str = ( - f'invalid `{msg_type.__qualname__}` payload\n\n' - f'value: `{any_pld!r}` does not match type-spec: ' #\n' + f'invalid `{msg_type.__qualname__}` msg payload\n\n' + f'value: `{any_pld!r}` does not match type-spec: ' f'`{type(msg).__qualname__}.pld: {codec.pld_spec_str}`' - # f'<{type(msg).__qualname__}(\n' - # f' |_pld: {codec.pld_spec_str}\n'# != {any_pld!r}\n' - # f')>\n\n' ) - # src_err_msg = msg bad_msg = msg - # TODO: should we just decode the msg to a dict despite - # only the payload being wrong? - # -[ ] maybe the better design is to break this construct - # logic into a separate explicit helper raiser-func? else: # decode the msg-bytes using the std msgpack @@ -1308,21 +1357,21 @@ def _mk_msg_type_err( if verb_header: message = f'{verb_header} ' + message - # if not isinstance(bad_msg, PayloadMsg): - # import pdbp; pdbp.set_trace() - msgtyperr = MsgTypeError.from_decode( message=message, bad_msg=bad_msg, bad_msg_as_dict=msg_dict, - # NOTE: for the send-side `.started()` pld-validate - # case we actually set the `._ipc_msg` AFTER we return - # from here inside `Context.started()` since we actually - # want to emulate the `Error` from the mte we build here - # Bo - # so by default in that case this is set to `None` - # ipc_msg=src_err_msg, + # NOTE: for pld-spec MTEs we set the `._ipc_msg` manually: + # - for the send-side `.started()` pld-validate + # case we actually raise inline so we don't need to + # set the it at all. + # - for recv side we set it inside `PldRx.decode_pld()` + # after a manual call to `pack_error()` since we + # actually want to emulate the `Error` from the mte we + # build here. So by default in that case, this is left + # as `None` here. + # ipc_msg=src_err_msg, ) msgtyperr.__cause__ = src_validation_error return msgtyperr -- 2.34.1 From b9ae41a161ec2e0fad83efa5f4cef892f7b17d4e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 15:24:25 -0400 Subject: [PATCH 138/190] Add `from_src_exc: BaseException` to maybe raisers That is as a control to `Context._maybe_raise_remote_err()` such that if set to anything other then the default (`False` value), we do `raise remote_error from from_src_exc` such that caller can choose to suppress or override the `.__cause__` tb. Also tidy up and old masked TODO regarding calling `.maybe_raise()` after the caller exits from the `yield` in `.open_context()`.. --- tractor/_context.py | 51 ++++++++++++++++++--------------------------- 1 file changed, 20 insertions(+), 31 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 291bed16..453e9065 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -1190,6 +1190,7 @@ class Context: self, remote_error: Exception, + from_src_exc: BaseException|None|bool = False, raise_ctxc_from_self_call: bool = False, raise_overrun_from_self: bool = True, hide_tb: bool = True, @@ -1284,7 +1285,10 @@ class Context: # runtime frames from the tb explicitly? # https://docs.python.org/3/reference/simple_stmts.html#the-raise-statement # https://stackoverflow.com/a/24752607 - raise remote_error # from None + if from_src_exc is not False: + raise remote_error from from_src_exc + + raise remote_error # TODO: change to `.wait_for_result()`? async def result( @@ -2096,7 +2100,11 @@ async def open_context_from_portal( # `._maybe_cancel_and_set_remote_error()` so ensure # we raise the underlying `._remote_error` directly # instead of bubbling that taskc. - ctx.maybe_raise() + ctx.maybe_raise( + # mask the above taskc from the tb + from_src_exc=None, + hide_tb=hide_tb, + ) # OW, some other unexpected cancel condition # that should prolly never happen right? @@ -2108,13 +2116,14 @@ async def open_context_from_portal( ctx._started_msg: bool = started_msg ctx._started_pld: bool = first - # deliver context instance and .started() msg value - # in enter tuple. + # deliver context ref and `.started()` msg payload value + # in `__aenter__` tuple. yield ctx, first # ??TODO??: do we still want to consider this or is # the `else:` block handling via a `.result()` # call below enough?? + # # -[ ] pretty sure `.result()` internals do the # same as our ctxc handler below so it ended up # being same (repeated?) behaviour, but ideally we @@ -2123,33 +2132,13 @@ async def open_context_from_portal( # that we can re-use it around the `yield` ^ here # or vice versa? # - # NOTE: between the caller exiting and arriving - # here the far end may have sent a ctxc-msg or - # other error, so check for it here immediately - # and maybe raise so as to engage the ctxc - # handling block below! + # maybe TODO NOTE: between the caller exiting and + # arriving here the far end may have sent a ctxc-msg or + # other error, so the quetion is whether we should check + # for it here immediately and maybe raise so as to engage + # the ctxc handling block below ???? # - # if re := ctx._remote_error: - # maybe_ctxc: ContextCancelled|None = ctx._maybe_raise_remote_err( - # re, - # # TODO: do we want this to always raise? - # # - means that on self-ctxc, if/when the - # # block is exited before the msg arrives - # # but then the msg during __exit__ - # # calling we may not activate the - # # ctxc-handler block below? should we - # # be? - # # - if there's a remote error that arrives - # # after the child has exited, we won't - # # handle until the `finally:` block - # # where `.result()` is always called, - # # again in which case we handle it - # # differently then in the handler block - # # that would normally engage from THIS - # # block? - # raise_ctxc_from_self_call=True, - # ) - # ctxc_from_callee = maybe_ctxc + # self.maybe_raise() # when in allow_overruns mode there may be # lingering overflow sender tasks remaining? @@ -2460,7 +2449,7 @@ async def open_context_from_portal( # # NOTE: further, this should be the only place the # underlying feeder channel is - # once-and-only-CLOSED! + # once-forever-and-only-CLOSED! with trio.CancelScope(shield=True): await ctx._rx_chan.aclose() -- 2.34.1 From d51c19fe3d714aa6e5d1663dc01be266ab8d36cd Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 15:55:34 -0400 Subject: [PATCH 139/190] Pass `boxed_type` from `_mk_msg_type_err()` Such that we're boxing the interchanged lib's specific error `msgspec.ValidationError` in this case) type much like how a `ContextCancelled[trio.Cancelled]` is composed; allows for seemless multi-backend-codec support later as well B) Pass `ctx.maybe_raise(from_src_exc=src_err)` where needed in a couple spots; as `None` in the send-side `Started` MTE case to avoid showing the `._scope1.cancel_called` result in the traceback from the `.open_context()` child-sync phase. --- tractor/_exceptions.py | 8 ++++---- tractor/_streaming.py | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 52048c17..92c3fafb 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -518,7 +518,6 @@ class RemoteActorError(Exception): def pformat( self, with_type_header: bool = True, - # with_ascii_box: bool = True, ) -> str: ''' @@ -885,9 +884,9 @@ class MsgTypeError( extra_msgdata['_bad_msg'] = bad_msg extra_msgdata['cid'] = bad_msg.cid + extra_msgdata.setdefault('boxed_type', cls) return cls( message=message, - boxed_type=cls, **extra_msgdata, ) @@ -1111,7 +1110,7 @@ def is_multi_cancelled( def _raise_from_unexpected_msg( ctx: Context, msg: MsgType, - src_err: AttributeError, + src_err: Exception, log: StackLevelAdapter, # caller specific `log` obj expect_msg: Type[MsgType], @@ -1212,7 +1211,7 @@ def _raise_from_unexpected_msg( # in case there already is some underlying remote error # that arrived which is probably the source of this stream # closure - ctx.maybe_raise() + ctx.maybe_raise(from_src_exc=src_err) raise eoc from src_err # TODO: our own transport/IPC-broke error subtype? @@ -1361,6 +1360,7 @@ def _mk_msg_type_err( message=message, bad_msg=bad_msg, bad_msg_as_dict=msg_dict, + boxed_type=type(src_validation_error), # NOTE: for pld-spec MTEs we set the `._ipc_msg` manually: # - for the send-side `.started()` pld-validate diff --git a/tractor/_streaming.py b/tractor/_streaming.py index a008eaf5..016577d3 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -233,6 +233,7 @@ class MsgStream(trio.abc.Channel): # ctx: Context = self._ctx ctx.maybe_raise( raise_ctxc_from_self_call=True, + from_src_exc=src_err, ) # propagate any error but hide low-level frame details -- 2.34.1 From 1c00668d205025b8e3fe877e2dfe6f336a3b2213 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 16:02:25 -0400 Subject: [PATCH 140/190] Always `.exception()` in `try_ship_error_to_remote()` on internal error --- tractor/_rpc.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index e4cc5750..57db52f3 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -326,7 +326,6 @@ async def _errors_relayed_via_ipc( f'|_{ctx}' ) - # ALWAYS try to ship RPC errors back to parent/caller task if is_rpc: @@ -817,6 +816,12 @@ async def try_ship_error_to_remote( # TODO: use `.msg.preetty_struct` for this! f'{msg}\n' ) + except BaseException: + log.exception( + 'Errored while attempting error shipment?' + ) + __tracebackhide__: bool = False + raise async def process_messages( -- 2.34.1 From d099466d2126d601663344cbdb1e9d9240a3175f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 16:02:59 -0400 Subject: [PATCH 141/190] Change `reraise` to `post_mortem: bool` in `maybe_expect_raises()` --- tests/test_pldrx_limiting.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_pldrx_limiting.py b/tests/test_pldrx_limiting.py index 0c5c7ee8..5cb0d357 100644 --- a/tests/test_pldrx_limiting.py +++ b/tests/test_pldrx_limiting.py @@ -46,7 +46,7 @@ maybe_msg_spec = PldMsg|None async def maybe_expect_raises( raises: BaseException|None = None, ensure_in_message: list[str]|None = None, - reraise: bool = False, + post_mortem: bool = False, timeout: int = 3, ) -> None: ''' @@ -86,8 +86,8 @@ async def maybe_expect_raises( f'{inner_err.args}' ) - if reraise: - raise inner_err + if post_mortem: + await tractor.post_mortem() else: if raises: @@ -314,6 +314,8 @@ def test_basic_payload_spec( f"value: `{bad_value_str}` does not " f"match type-spec: `{msg_type_str}.pld: PldMsg|NoneType`", ], + # only for debug + post_mortem=True, ), p.open_context( child, -- 2.34.1 From 5bab7648e2955d839d5bc69615785d8875824b5e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 16:03:28 -0400 Subject: [PATCH 142/190] Add a `tractor.post_mortem()` API test + example Since turns out we didn't have a single example using that API Bo The test granular-ly checks all use cases: - `.post_mortem()` manual calls in both subactor and root. - ensuring built-in RPC crash handling activates after each manual one from ^. - drafted some call-stack frame checking that i commented out for now since we need to first do ANSI escape code removal due to the colorization that `pdbp` does by default. |_ added a TODO with SO link on `assert_before()`. Also todo-staged a shielded-pause test to match with the already existing-but-needs-refinement example B) --- examples/debugging/pm_in_subactor.py | 56 ++++++++++++ tests/test_debugger.py | 122 ++++++++++++++++++++++++++- 2 files changed, 177 insertions(+), 1 deletion(-) create mode 100644 examples/debugging/pm_in_subactor.py diff --git a/examples/debugging/pm_in_subactor.py b/examples/debugging/pm_in_subactor.py new file mode 100644 index 00000000..a8f5048e --- /dev/null +++ b/examples/debugging/pm_in_subactor.py @@ -0,0 +1,56 @@ +import trio +import tractor + + +@tractor.context +async def name_error( + ctx: tractor.Context, +): + ''' + Raise a `NameError`, catch it and enter `.post_mortem()`, then + expect the `._rpc._invoke()` crash handler to also engage. + + ''' + try: + getattr(doggypants) # noqa (on purpose) + except NameError: + await tractor.post_mortem() + raise + + +async def main(): + ''' + Test 3 `PdbREPL` entries: + - one in the child due to manual `.post_mortem()`, + - another in the child due to runtime RPC crash handling. + - final one here in parent from the RAE. + + ''' + # XXX NOTE: ideally the REPL arrives at this frame in the parent + # ONE UP FROM the inner ctx block below! + async with tractor.open_nursery( + debug_mode=True, + # loglevel='cancel', + ) as an: + p: tractor.Portal = await an.start_actor( + 'child', + enable_modules=[__name__], + ) + + # XXX should raise `RemoteActorError[NameError]` + # AND be the active frame when REPL enters! + try: + async with p.open_context(name_error) as (ctx, first): + assert first + except tractor.RemoteActorError as rae: + assert rae.boxed_type is NameError + + # manually handle in root's parent task + await tractor.post_mortem() + raise + else: + raise RuntimeError('IPC ctx should have remote errored!?') + + +if __name__ == '__main__': + trio.run(main) diff --git a/tests/test_debugger.py b/tests/test_debugger.py index 6aa3bd53..ac7b0feb 100644 --- a/tests/test_debugger.py +++ b/tests/test_debugger.py @@ -159,6 +159,10 @@ def in_prompt_msg( return True + +# TODO: todo support terminal color-chars stripping so we can match +# against call stack frame output from the the 'll' command the like! +# -[ ] SO answer for stipping ANSI codes: https://stackoverflow.com/a/14693789 def assert_before( child, patts: list[str], @@ -1123,7 +1127,112 @@ def test_pause_from_sync( child.expect(pexpect.EOF) -# TODO! +def test_post_mortem_api( + spawn, + ctlc: bool, +): + ''' + Verify the `tractor.post_mortem()` API works in an exception + handler block. + + ''' + child = spawn('pm_in_subactor') + + # First entry is via manual `.post_mortem()` + child.expect(PROMPT) + assert_before( + child, + [ + _crash_msg, + " async with p.open_context(name_error) as (ctx, first):', + # ] + # ) + + # # step up a frame to ensure the it's the root's nursery + # child.sendline('u') + # child.expect(PROMPT) + # assert_before( + # child, + # [ + # # handler block annotation + # '-> async with tractor.open_nursery(', + # ] + # ) + + child.sendline('c') + child.expect(pexpect.EOF) + + +# TODO: needs ANSI code stripping tho, see `assert_before()` # above! def test_correct_frames_below_hidden(): ''' Ensure that once a `tractor.pause()` enages, when the user @@ -1136,4 +1245,15 @@ def test_correct_frames_below_hidden(): def test_cant_pause_from_paused_task(): + ''' + Pausing from with an already paused task should raise an error. + + Normally this should only happen in practise while debugging the call stack of `tractor.pause()` itself, likely + by a `.pause()` line somewhere inside our runtime. + + ''' + ... + + +def test_shield_pause(): ... -- 2.34.1 From d98f06314dd8307685e0525b02312ea5c3943af4 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 16:09:59 -0400 Subject: [PATCH 143/190] Rename `PldRx.dec_msg()` -> `.decode_pld()` Keep the old alias, but i think it's better form to use longer names for internal public APIs and this name better reflects the functionality: decoding and returning a `PayloadMsg.pld` field. --- tractor/msg/_ops.py | 54 +++++++++++++-------------------------------- 1 file changed, 15 insertions(+), 39 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 97cd3f29..86f80395 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -167,7 +167,7 @@ class PldRx(Struct): ipc_msg: MsgType|None = None, expect_msg: Type[MsgType]|None = None, hide_tb: bool = False, - **dec_msg_kwargs, + **dec_pld_kwargs, ) -> Any|Raw: __tracebackhide__: bool = hide_tb @@ -179,12 +179,12 @@ class PldRx(Struct): # sync-rx msg from underlying IPC feeder (mem-)chan ipc._rx_chan.receive_nowait() ) - return self.dec_msg( + return self.decode_pld( msg, ipc=ipc, expect_msg=expect_msg, hide_tb=hide_tb, - **dec_msg_kwargs, + **dec_pld_kwargs, ) async def recv_pld( @@ -194,7 +194,7 @@ class PldRx(Struct): expect_msg: Type[MsgType]|None = None, hide_tb: bool = True, - **dec_msg_kwargs, + **dec_pld_kwargs, ) -> Any|Raw: ''' @@ -208,17 +208,14 @@ class PldRx(Struct): # async-rx msg from underlying IPC feeder (mem-)chan await ipc._rx_chan.receive() ) - return self.dec_msg( + return self.decode_pld( msg=msg, ipc=ipc, expect_msg=expect_msg, - **dec_msg_kwargs, + **dec_pld_kwargs, ) - # TODO: rename to, - # -[ ] `.decode_pld()`? - # -[ ] `.dec_pld()`? - def dec_msg( + def decode_pld( self, msg: MsgType, ipc: Context|MsgStream, @@ -299,9 +296,6 @@ class PldRx(Struct): if not is_started_send_side else ipc._actor.uid ), - # tb=valerr.__traceback__, - # tb_str=mte._message, - # message=mte._message, ) mte._ipc_msg = err_msg @@ -317,29 +311,6 @@ class PldRx(Struct): # validation error. src_err = valerr - # TODO: should we instead make this explicit and - # use the above masked `is_started_send_decode`, - # expecting the `Context.started()` caller to set - # it? Rn this is kinda, howyousayyy, implicitly - # edge-case-y.. - # TODO: remove this since it's been added to - # `_raise_from_unexpected_msg()`..? - # if ( - # expect_msg is not Started - # and not is_started_send_side - # ): - # # set emulated remote error more-or-less as the - # # runtime would - # ctx: Context = getattr(ipc, 'ctx', ipc) - # ctx._maybe_cancel_and_set_remote_error(mte) - - # XXX some other decoder specific failure? - # except TypeError as src_error: - # from .devx import mk_pdb - # mk_pdb().set_trace() - # raise src_error - # ^-TODO-^ can remove? - # a runtime-internal RPC endpoint response. # always passthrough since (internal) runtime # responses are generally never exposed to consumer @@ -435,6 +406,8 @@ class PldRx(Struct): __tracebackhide__: bool = False raise + dec_msg = decode_pld + async def recv_msg_w_pld( self, ipc: Context|MsgStream, @@ -463,7 +436,7 @@ class PldRx(Struct): # TODO: is there some way we can inject the decoded # payload into an existing output buffer for the original # msg instance? - pld: PayloadT = self.dec_msg( + pld: PayloadT = self.decode_pld( msg, ipc=ipc, expect_msg=expect_msg, @@ -610,7 +583,10 @@ async def drain_to_final_msg( # only when we are sure the remote error is # the source cause of this local task's # cancellation. - ctx.maybe_raise() + ctx.maybe_raise( + # TODO: when use this/ + # from_src_exc=taskc, + ) # CASE 1: we DID request the cancel we simply # continue to bubble up as normal. @@ -783,7 +759,7 @@ def validate_payload_msg( try: roundtripped: Started = codec.decode(msg_bytes) ctx: Context = getattr(ipc, 'ctx', ipc) - pld: PayloadT = ctx.pld_rx.dec_msg( + pld: PayloadT = ctx.pld_rx.decode_pld( msg=roundtripped, ipc=ipc, expect_msg=Started, -- 2.34.1 From 15a47dc4f7ec294cd66db15fd74e648817e142e6 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 17:52:24 -0400 Subject: [PATCH 144/190] Finally, officially support shielded REPL-ing! It's been a long time prepped and now finally implemented! Offer a `shield: bool` argument from our async `._debug` APIs: - `await tractor.pause(shield=True)`, - `await tractor.post_mortem(shield=True)` ^-These-^ can now be used inside cancelled `trio.CancelScope`s, something very handy when introspecting complex (distributed) system tear/shut-downs particularly under remote error or (inter-peer) cancellation conditions B) Thanks to previous prepping in a prior attempt and various patches from the rigorous rework of `.devx._debug` internals around typed msg specs, there ain't much that was needed! Impl deats - obvi passthrough `shield` from the public API endpoints (was already done from a prior attempt). - put ad-hoc internal `with trio.CancelScope(shield=shield):` around all checkpoints inside `._pause()` for both the root-process and subactor case branches. Add a fairly rigorous example, `examples/debugging/shielded_pause.py` with a wrapping `pexpect` test, `test_debugger.test_shield_pause()` and ensure it covers as many cases as i can think of offhand: - multiple `.pause()` entries in a loop despite parent scope cancellation in a subactor RPC task which itself spawns a sub-task. - a `trio.Nursery.parent_task` which raises, is handled and tries to enter and unshielded `.post_mortem()`, which of course internally raises `Cancelled` in a `._pause()` checkpoint, so we catch the `Cancelled` again and then debug the debugger's internal cancellation with specific checks for the particular raising checkpoint-LOC. - do ^- the latter -^ for both subactor and root cases to ensure we can debug `._pause()` itself when it tries to REPL engage from a cancelled task scope Bo --- examples/debugging/shielded_pause.py | 88 ++++++++++++++++++++++++++++ tests/test_debugger.py | 79 +++++++++++++++++++++++-- tractor/devx/_debug.py | 59 +++++++++++-------- 3 files changed, 199 insertions(+), 27 deletions(-) create mode 100644 examples/debugging/shielded_pause.py diff --git a/examples/debugging/shielded_pause.py b/examples/debugging/shielded_pause.py new file mode 100644 index 00000000..3e34d8fc --- /dev/null +++ b/examples/debugging/shielded_pause.py @@ -0,0 +1,88 @@ +import trio +import tractor + + +async def cancellable_pause_loop( + task_status: trio.TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED +): + with trio.CancelScope() as cs: + task_status.started(cs) + for _ in range(3): + try: + # ON first entry, there is no level triggered + # cancellation yet, so this cp does a parent task + # ctx-switch so that this scope raises for the NEXT + # checkpoint we hit. + await trio.lowlevel.checkpoint() + await tractor.pause() + + cs.cancel() + + # parent should have called `cs.cancel()` by now + await trio.lowlevel.checkpoint() + + except trio.Cancelled: + print('INSIDE SHIELDED PAUSE') + await tractor.pause(shield=True) + else: + # should raise it again, bubbling up to parent + print('BUBBLING trio.Cancelled to parent task-nursery') + await trio.lowlevel.checkpoint() + + +async def pm_on_cancelled(): + async with trio.open_nursery() as tn: + tn.cancel_scope.cancel() + try: + await trio.sleep_forever() + except trio.Cancelled: + # should also raise `Cancelled` since + # we didn't pass `shield=True`. + try: + await tractor.post_mortem(hide_tb=False) + except trio.Cancelled as taskc: + + # should enter just fine, in fact it should + # be debugging the internals of the previous + # sin-shield call above Bo + await tractor.post_mortem( + hide_tb=False, + shield=True, + ) + raise taskc + + else: + raise RuntimeError('Dint cancel as expected!?') + + +async def cancelled_before_pause( +): + ''' + Verify that using a shielded pause works despite surrounding + cancellation called state in the calling task. + + ''' + async with trio.open_nursery() as tn: + cs: trio.CancelScope = await tn.start(cancellable_pause_loop) + await trio.sleep(0.1) + + assert cs.cancelled_caught + + await pm_on_cancelled() + + +async def main(): + async with tractor.open_nursery( + debug_mode=True, + ) as n: + portal: tractor.Portal = await n.run_in_actor( + cancelled_before_pause, + ) + await portal.result() + + # ensure the same works in the root actor! + await pm_on_cancelled() + + +if __name__ == '__main__': + trio.run(main) diff --git a/tests/test_debugger.py b/tests/test_debugger.py index ac7b0feb..88453682 100644 --- a/tests/test_debugger.py +++ b/tests/test_debugger.py @@ -1232,6 +1232,81 @@ def test_post_mortem_api( child.expect(pexpect.EOF) +def test_shield_pause( + spawn, +): + ''' + Verify the `tractor.pause()/.post_mortem()` API works inside an + already cancelled `trio.CancelScope` and that you can step to the + next checkpoint wherein the cancelled will get raised. + + ''' + child = spawn('shielded_pause') + + # First entry is via manual `.post_mortem()` + child.expect(PROMPT) + assert_before( + child, + [ + _pause_msg, + "cancellable_pause_loop'", + "('cancelled_before_pause'", # actor name + ] + ) + + # since 3 tries in ex. shield pause loop + for i in range(3): + child.sendline('c') + child.expect(PROMPT) + assert_before( + child, + [ + _pause_msg, + "INSIDE SHIELDED PAUSE", + "('cancelled_before_pause'", # actor name + ] + ) + + # back inside parent task that opened nursery + child.sendline('c') + child.expect(PROMPT) + assert_before( + child, + [ + _crash_msg, + "('cancelled_before_pause'", # actor name + "Failed to engage debugger via `_pause()`", + "trio.Cancelled", + "raise Cancelled._create()", + + # we should be handling a taskc inside + # the first `.port_mortem()` sin-shield! + 'await DebugStatus.req_finished.wait()', + ] + ) + + # same as above but in the root actor's task + child.sendline('c') + child.expect(PROMPT) + assert_before( + child, + [ + _crash_msg, + "('root'", # actor name + "Failed to engage debugger via `_pause()`", + "trio.Cancelled", + "raise Cancelled._create()", + + # handling a taskc inside the first unshielded + # `.port_mortem()`. + # BUT in this case in the root-proc path ;) + 'wait Lock._debug_lock.acquire()', + ] + ) + child.sendline('c') + child.expect(pexpect.EOF) + + # TODO: needs ANSI code stripping tho, see `assert_before()` # above! def test_correct_frames_below_hidden(): ''' @@ -1253,7 +1328,3 @@ def test_cant_pause_from_paused_task(): ''' ... - - -def test_shield_pause(): - ... diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 753c1985..2f0e7e12 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -1600,25 +1600,27 @@ async def _pause( f'REPL: {Lock.repl}\n' # TODO: use `._frame_stack` scanner to find the @api_frame ) - await trio.lowlevel.checkpoint() + with trio.CancelScope(shield=shield): + await trio.lowlevel.checkpoint() return # XXX: since we need to enter pdb synchronously below, # we have to release the lock manually from pdb completion # callbacks. Can't think of a nicer way then this atm. - if Lock._debug_lock.locked(): - log.warning( - 'attempting to shield-acquire active TTY lock owned by\n' - f'{ctx}' - ) + with trio.CancelScope(shield=shield): + if Lock._debug_lock.locked(): + log.warning( + 'attempting to shield-acquire active TTY lock owned by\n' + f'{ctx}' + ) - # must shield here to avoid hitting a ``Cancelled`` and - # a child getting stuck bc we clobbered the tty - with trio.CancelScope(shield=True): + # must shield here to avoid hitting a ``Cancelled`` and + # a child getting stuck bc we clobbered the tty + # with trio.CancelScope(shield=True): + await Lock._debug_lock.acquire() + else: + # may be cancelled await Lock._debug_lock.acquire() - else: - # may be cancelled - await Lock._debug_lock.acquire() # enter REPL from root, no TTY locking IPC ctx necessary _enter_repl_sync(debug_func) @@ -1659,7 +1661,8 @@ async def _pause( f'{task.name}@{actor.uid} already has TTY lock\n' f'ignoring..' ) - await trio.lowlevel.checkpoint() + with trio.CancelScope(shield=shield): + await trio.lowlevel.checkpoint() return else: @@ -1671,8 +1674,9 @@ async def _pause( f'{task}@{actor.uid} already has TTY lock\n' f'waiting for release..' ) - await DebugStatus.repl_release.wait() - await trio.sleep(0.1) + with trio.CancelScope(shield=shield): + await DebugStatus.repl_release.wait() + await trio.sleep(0.1) elif ( req_task @@ -1683,7 +1687,8 @@ async def _pause( 'Waiting for previous request to complete..\n' ) - await DebugStatus.req_finished.wait() + with trio.CancelScope(shield=shield): + await DebugStatus.req_finished.wait() # this **must** be awaited by the caller and is done using the # root nursery so that the debugger can continue to run without @@ -1721,14 +1726,15 @@ async def _pause( 'Starting request task\n' f'|_{task}\n' ) - req_ctx: Context = await actor._service_n.start( - partial( - request_root_stdio_lock, - actor_uid=actor.uid, - task_uid=(task.name, id(task)), # task uuid (effectively) - shield=shield, + with trio.CancelScope(shield=shield): + req_ctx: Context = await actor._service_n.start( + partial( + request_root_stdio_lock, + actor_uid=actor.uid, + task_uid=(task.name, id(task)), # task uuid (effectively) + shield=shield, + ) ) - ) # XXX sanity, our locker task should be the one which # entered a new IPC ctx with the root actor, NOT the one # that exists around the task calling into `._pause()`. @@ -2147,6 +2153,13 @@ async def post_mortem( **_pause_kwargs, ) -> None: + ''' + `tractor`'s builtin async equivalient of `pdb.post_mortem()` + which can be used inside exception handlers. + + It's also used for the crash handler when `debug_mode == True` ;) + + ''' __tracebackhide__: bool = hide_tb tb: TracebackType = tb or sys.exc_info()[2] -- 2.34.1 From 4bc7569981cebe7f9101081a64da73e842c6698c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 18:33:25 -0400 Subject: [PATCH 145/190] Woops, set `post_mortem=False` by default again! --- tests/test_pldrx_limiting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_pldrx_limiting.py b/tests/test_pldrx_limiting.py index 5cb0d357..ddf2a234 100644 --- a/tests/test_pldrx_limiting.py +++ b/tests/test_pldrx_limiting.py @@ -315,7 +315,7 @@ def test_basic_payload_spec( f"match type-spec: `{msg_type_str}.pld: PldMsg|NoneType`", ], # only for debug - post_mortem=True, + # post_mortem=True, ), p.open_context( child, -- 2.34.1 From 028bc3256f45f3bcda394bc8929640d90a8d4c8d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 31 May 2024 12:03:18 -0400 Subject: [PATCH 146/190] Drop sub-decoder proto-cruft from `.msg._codec` It ended up getting necessarily implemented as the `PldRx` though at a different layer and won't be needed as part of `MsgCodec` most likely, though this original idea did provide the source of inspiration for how things work now! Also Move the commented TODO proto for a codec hook factory from `.types` to `._codec` where it prolly better fits and update some msg related todo/questions. --- tractor/msg/_codec.py | 196 ++++++++---------------------------------- tractor/msg/types.py | 62 +++++-------- 2 files changed, 58 insertions(+), 200 deletions(-) diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index e1c59e94..c1301bd2 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -52,10 +52,6 @@ from msgspec import ( msgpack, Raw, ) -# from trio.lowlevel import ( -# RunVar, -# RunVarToken, -# ) # TODO: see notes below from @mikenerone.. # from tricycle import TreeVar @@ -368,160 +364,16 @@ class MsgCodec(Struct): # https://jcristharif.com/msgspec/usage.html#typed-decoding return self._dec.decode(msg) - # TODO: a sub-decoder system as well? - # payload_msg_specs: Union[Type[Struct]] = Any - # see related comments in `.msg.types` - # _payload_decs: ( - # dict[ - # str, - # msgpack.Decoder, - # ] - # |None - # ) = None - # OR - # ) = { - # # pre-seed decoders for std-py-type-set for use when - # # `MsgType.pld == None|Any`. - # None: msgpack.Decoder(Any), - # Any: msgpack.Decoder(Any), - # } - # - # -[ ] do we still want to try and support the sub-decoder with - # `.Raw` technique in the case that the `Generic` approach gives - # future grief? - # - # -[ ] - # -> https://jcristharif.com/msgspec/api.html#raw - # - #def mk_pld_subdec( - # self, - # payload_types: Union[Type[Struct]], - #) -> msgpack.Decoder: - # # TODO: sub-decoder suppor for `.pld: Raw`? - # # => see similar notes inside `.msg.types`.. - # # - # # not sure we'll end up needing this though it might have - # # unforeseen advantages in terms of enabling encrypted - # # appliciation layer (only) payloads? - # # - # # register sub-payload decoders to load `.pld: Raw` - # # decoded `Msg`-packets using a dynamic lookup (table) - # # instead of a pre-defined msg-spec via `Generic` - # # parameterization. - # # - # ( - # tags, - # payload_dec, - # ) = mk_tagged_union_dec( - # tagged_structs=list(payload_types.__args__), - # ) - # # register sub-decoders by tag - # subdecs: dict[str, msgpack.Decoder]|None = self._payload_decs - # for name in tags: - # subdecs.setdefault( - # name, - # payload_dec, - # ) - - # return payload_dec - - # sub-decoders for retreiving embedded - # payload data and decoding to a sender - # side defined (struct) type. - # def dec_payload( - # codec: MsgCodec, - # msg: Msg, - - # ) -> Any|Struct: - - # msg: PayloadMsg = codec.dec.decode(msg) - # payload_tag: str = msg.header.payload_tag - # payload_dec: msgpack.Decoder = codec._payload_decs[payload_tag] - # return payload_dec.decode(msg.pld) - - # def enc_payload( - # codec: MsgCodec, - # payload: Any, - # cid: str, - - # ) -> bytes: - - # # tag_field: str|None = None - - # plbytes = codec.enc.encode(payload) - # if b'msg_type' in plbytes: - # assert isinstance(payload, Struct) - - # # tag_field: str = type(payload).__name__ - # payload = msgspec.Raw(plbytes) - - # msg = Msg( - # cid=cid, - # pld=payload, - # # Header( - # # payload_tag=tag_field, - # # # dialog_id, - # # ), - # ) - # return codec.enc.encode(msg) - - - -# TODO: sub-decoded `Raw` fields? -# -[ ] see `MsgCodec._payload_decs` notes +# [x] TODO: a sub-decoder system as well? => No! # -# XXX if we wanted something more complex then field name str-keys -# we might need a header field type to describe the lookup sys? -# class Header(Struct, tag=True): -# ''' -# A msg header which defines payload properties - -# ''' -# payload_tag: str|None = None - - - #def mk_tagged_union_dec( - # tagged_structs: list[Struct], - - #) -> tuple[ - # list[str], - # msgpack.Decoder, - #]: - # ''' - # Create a `msgpack.Decoder` for an input `list[msgspec.Struct]` - # and return a `list[str]` of each struct's `tag_field: str` value - # which can be used to "map to" the initialized dec. - - # ''' - # # See "tagged unions" docs: - # # https://jcristharif.com/msgspec/structs.html#tagged-unions - - # # "The quickest way to enable tagged unions is to set tag=True when - # # defining every struct type in the union. In this case tag_field - # # defaults to "type", and tag defaults to the struct class name - # # (e.g. "Get")." - # first: Struct = tagged_structs[0] - # types_union: Union[Type[Struct]] = Union[ - # first - # ]|Any - # tags: list[str] = [first.__name__] - - # for struct in tagged_structs[1:]: - # types_union |= struct - # tags.append( - # getattr( - # struct, - # struct.__struct_config__.tag_field, - # struct.__name__, - # ) - # ) - - # dec = msgpack.Decoder(types_union) - # return ( - # tags, - # dec, - # ) +# -[x] do we still want to try and support the sub-decoder with +# `.Raw` technique in the case that the `Generic` approach gives +# future grief? +# => NO, since we went with the `PldRx` approach instead B) +# +# IF however you want to see the code that was staged for this +# from wayyy back, see the pure removal commit. def mk_codec( @@ -644,10 +496,6 @@ _def_tractor_codec: MsgCodec = mk_codec( # 3. We similarly set the pending values for the child nurseries # of the *current* task. # - -# TODO: STOP USING THIS, since it's basically a global and won't -# allow sub-IPC-ctxs to limit the msg-spec however desired.. -# _ctxvar_MsgCodec: MsgCodec = RunVar( _ctxvar_MsgCodec: ContextVar[MsgCodec] = ContextVar( 'msgspec_codec', default=_def_tractor_codec, @@ -782,3 +630,31 @@ def limit_msg_spec( # # import pdbp; pdbp.set_trace() # assert ext_codec.pld_spec == extended_spec # yield ext_codec + + +# TODO: make something similar to this inside `._codec` such that +# user can just pass a type table of some sort? +# -[ ] we would need to decode all msgs to `pretty_struct.Struct` +# and then call `.to_dict()` on them? +# -[x] we're going to need to re-impl all the stuff changed in the +# runtime port such that it can handle dicts or `Msg`s? +# +# def mk_dict_msg_codec_hooks() -> tuple[Callable, Callable]: +# ''' +# Deliver a `enc_hook()`/`dec_hook()` pair which does +# manual convertion from our above native `Msg` set +# to `dict` equivalent (wire msgs) in order to keep legacy compat +# with the original runtime implementation. +# +# Note: this is is/was primarly used while moving the core +# runtime over to using native `Msg`-struct types wherein we +# start with the send side emitting without loading +# a typed-decoder and then later flipping the switch over to +# load to the native struct types once all runtime usage has +# been adjusted appropriately. +# +# ''' +# return ( +# # enc_to_dict, +# dec_from_dict, +# ) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index ad6d6fb8..0fc0ee96 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -26,7 +26,6 @@ from __future__ import annotations import types from typing import ( Any, - # Callable, Generic, Literal, Type, @@ -161,7 +160,6 @@ class SpawnSpec( bind_addrs: list[tuple[str, int]] - # TODO: caps based RPC support in the payload? # # -[ ] integration with our ``enable_modules: list[str]`` caps sys. @@ -314,8 +312,9 @@ class Started( pld: PayloadT|Raw -# TODO: instead of using our existing `Start` -# for this (as we did with the original `{'cmd': ..}` style) +# TODO: cancel request dedicated msg? +# -[ ] instead of using our existing `Start`? +# # class Cancel: # cid: str @@ -477,12 +476,16 @@ def from_dict_msg( ) return msgT(**dict_msg) -# TODO: should be make a msg version of `ContextCancelled?` -# and/or with a scope field or a full `ActorCancelled`? +# TODO: should be make a set of cancel msgs? +# -[ ] a version of `ContextCancelled`? +# |_ and/or with a scope field? +# -[ ] or, a full `ActorCancelled`? +# # class Cancelled(MsgType): # cid: str - -# TODO what about overruns? +# +# -[ ] what about overruns? +# # class Overrun(MsgType): # cid: str @@ -564,10 +567,17 @@ def mk_msg_spec( Create a payload-(data-)type-parameterized IPC message specification. Allows generating IPC msg types from the above builtin set - with a payload (field) restricted data-type via the `Msg.pld: - PayloadT` type var. This allows runtime-task contexts to use - the python type system to limit/filter payload values as - determined by the input `payload_type_union: Union[Type]`. + with a payload (field) restricted data-type, the `Msg.pld: PayloadT`. + + This allows runtime-task contexts to use the python type system + to limit/filter payload values as determined by the input + `payload_type_union: Union[Type]`. + + Notes: originally multiple approaches for constructing the + type-union passed to `msgspec` were attempted as selected via the + `spec_build_method`, but it turns out only the defaul method + 'indexed_generics' seems to work reliably in all use cases. As + such, the others will likely be removed in the near future. ''' submsg_types: list[MsgType] = Msg.__subclasses__() @@ -707,31 +717,3 @@ def mk_msg_spec( + ipc_msg_types, ) - - -# TODO: make something similar to this inside `._codec` such that -# user can just pass a type table of some sort? -# -[ ] we would need to decode all msgs to `pretty_struct.Struct` -# and then call `.to_dict()` on them? -# -[ ] we're going to need to re-impl all the stuff changed in the -# runtime port such that it can handle dicts or `Msg`s? -# -# def mk_dict_msg_codec_hooks() -> tuple[Callable, Callable]: -# ''' -# Deliver a `enc_hook()`/`dec_hook()` pair which does -# manual convertion from our above native `Msg` set -# to `dict` equivalent (wire msgs) in order to keep legacy compat -# with the original runtime implementation. -# -# Note: this is is/was primarly used while moving the core -# runtime over to using native `Msg`-struct types wherein we -# start with the send side emitting without loading -# a typed-decoder and then later flipping the switch over to -# load to the native struct types once all runtime usage has -# been adjusted appropriately. -# -# ''' -# return ( -# # enc_to_dict, -# dec_from_dict, -# ) -- 2.34.1 From 54386900e0c76f6d8ff1f73ede8449214fd83edd Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 31 May 2024 14:40:55 -0400 Subject: [PATCH 147/190] Use `Context` repr APIs for RPC outcome logs Delegate to the new `.repr_state: str` and adjust log level based on error vs. cancel vs. result. --- tractor/_rpc.py | 39 +++++++++++++-------------------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 57db52f3..8a9b3487 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -738,37 +738,24 @@ async def _invoke( cid, )) + logmeth: Callable = log.runtime merr: Exception|None = ctx.maybe_error - ( - res_type_str, - res_str, - ) = ( - ('error', f'{type(merr)}',) if merr - else ( - 'result', - f'`{repr(ctx.outcome)}`', - ) - ) + descr_str: str = 'with final result `{repr(ctx.outcome)}`' message: str = ( - f'IPC context terminated with a final {res_type_str}\n\n' - f'{ctx}' + f'IPC context terminated {descr_str}\n\n' ) if merr: - from tractor import RemoteActorError - if not isinstance(merr, RemoteActorError): - fmt_merr: str = ( - f'\n{merr!r}\n' - # f'{merr.args[0]!r}\n' - ) - else: - fmt_merr = f'\n{merr!r}' - log.error( - message - + - fmt_merr + descr_str: str = ( + f'with ctx having {ctx.repr_state!r}\n' + f'{ctx.repr_outcome()}\n' ) - else: - log.runtime(message) + if isinstance(merr, ContextCancelled): + logmeth: Callable = log.runtime + else: + logmeth: Callable = log.error + message += f'\n{merr!r}\n' + + logmeth(message) async def try_ship_error_to_remote( -- 2.34.1 From 18b4618b5fe8ce77fe6a44eed0b870802c34d167 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 31 May 2024 17:32:11 -0400 Subject: [PATCH 148/190] Move `Context.open_stream()` impl to `._streaming` Exactly like how it's organized for `Portal.open_context()`, put the main streaming API `@acm` with the `MsgStream` code and bind the method to the new module func. Other, - rename `Context.result()` -> `.wait_for_result()` to better match the blocking semantics and rebind `.result()` as deprecated. - add doc-str for `Context.maybe_raise()`. --- tractor/_context.py | 247 +++++++----------------------------------- tractor/_streaming.py | 209 +++++++++++++++++++++++++++++++++++ 2 files changed, 248 insertions(+), 208 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 453e9065..dd4ad846 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -86,7 +86,10 @@ from .msg import ( from ._ipc import ( Channel, ) -from ._streaming import MsgStream +from ._streaming import ( + MsgStream, + open_stream_from_ctx, +) from ._state import ( current_actor, debug_mode, @@ -978,198 +981,6 @@ class Context: assert self._scope self._scope.cancel() - # TODO? should we move this to `._streaming` much like we - # moved `Portal.open_context()`'s def to this mod? - @acm - async def open_stream( - self, - allow_overruns: bool|None = False, - msg_buffer_size: int|None = None, - - ) -> AsyncGenerator[MsgStream, None]: - ''' - Open a ``MsgStream``, a bi-directional stream connected to the - cross-actor (far end) task for this ``Context``. - - This context manager must be entered on both the caller and - callee for the stream to logically be considered "connected". - - A ``MsgStream`` is currently "one-shot" use, meaning if you - close it you can not "re-open" it for streaming and instead you - must re-establish a new surrounding ``Context`` using - ``Portal.open_context()``. In the future this may change but - currently there seems to be no obvious reason to support - "re-opening": - - pausing a stream can be done with a message. - - task errors will normally require a restart of the entire - scope of the inter-actor task context due to the nature of - ``trio``'s cancellation system. - - ''' - actor: Actor = self._actor - - # If the surrounding context has been cancelled by some - # task with a handle to THIS, we error here immediately - # since it likely means the surrounding lexical-scope has - # errored, been `trio.Cancelled` or at the least - # `Context.cancel()` was called by some task. - if self._cancel_called: - - # XXX NOTE: ALWAYS RAISE any remote error here even if - # it's an expected `ContextCancelled` due to a local - # task having called `.cancel()`! - # - # WHY: we expect the error to always bubble up to the - # surrounding `Portal.open_context()` call and be - # absorbed there (silently) and we DO NOT want to - # actually try to stream - a cancel msg was already - # sent to the other side! - self.maybe_raise( - raise_ctxc_from_self_call=True, - ) - # NOTE: this is diff then calling - # `._maybe_raise_remote_err()` specifically - # because we want to raise a ctxc on any task entering this `.open_stream()` - # AFTER cancellation was already been requested, - # we DO NOT want to absorb any ctxc ACK silently! - # if self._remote_error: - # raise self._remote_error - - # XXX NOTE: if no `ContextCancelled` has been responded - # back from the other side (yet), we raise a different - # runtime error indicating that this task's usage of - # `Context.cancel()` and then `.open_stream()` is WRONG! - task: str = trio.lowlevel.current_task().name - raise RuntimeError( - 'Stream opened after `Context.cancel()` called..?\n' - f'task: {actor.uid[0]}:{task}\n' - f'{self}' - ) - - if ( - not self._portal - and not self._started_called - ): - raise RuntimeError( - 'Context.started()` must be called before opening a stream' - ) - - # NOTE: in one way streaming this only happens on the - # parent-ctx-task side (on the side that calls - # `Actor.start_remote_task()`) so if you try to send - # a stop from the caller to the callee in the - # single-direction-stream case you'll get a lookup error - # currently. - ctx: Context = actor.get_context( - chan=self.chan, - cid=self.cid, - nsf=self._nsf, - # side=self.side, - - msg_buffer_size=msg_buffer_size, - allow_overruns=allow_overruns, - ) - ctx._allow_overruns: bool = allow_overruns - assert ctx is self - - # XXX: If the underlying channel feeder receive mem chan has - # been closed then likely client code has already exited - # a ``.open_stream()`` block prior or there was some other - # unanticipated error or cancellation from ``trio``. - - if ctx._rx_chan._closed: - raise trio.ClosedResourceError( - 'The underlying channel for this stream was already closed!\n' - ) - - # NOTE: implicitly this will call `MsgStream.aclose()` on - # `.__aexit__()` due to stream's parent `Channel` type! - # - # XXX NOTE XXX: ensures the stream is "one-shot use", - # which specifically means that on exit, - # - signal ``trio.EndOfChannel``/``StopAsyncIteration`` to - # the far end indicating that the caller exited - # the streaming context purposefully by letting - # the exit block exec. - # - this is diff from the cancel/error case where - # a cancel request from this side or an error - # should be sent to the far end indicating the - # stream WAS NOT just closed normally/gracefully. - async with MsgStream( - ctx=self, - rx_chan=ctx._rx_chan, - ) as stream: - - # NOTE: we track all existing streams per portal for - # the purposes of attempting graceful closes on runtime - # cancel requests. - if self._portal: - self._portal._streams.add(stream) - - try: - self._stream_opened: bool = True - self._stream = stream - - # XXX: do we need this? - # ensure we aren't cancelled before yielding the stream - # await trio.lowlevel.checkpoint() - yield stream - - # XXX: (MEGA IMPORTANT) if this is a root opened process we - # wait for any immediate child in debug before popping the - # context from the runtime msg loop otherwise inside - # ``Actor._deliver_ctx_payload()`` the msg will be discarded and in - # the case where that msg is global debugger unlock (via - # a "stop" msg for a stream), this can result in a deadlock - # where the root is waiting on the lock to clear but the - # child has already cleared it and clobbered IPC. - # - # await maybe_wait_for_debugger() - - # XXX TODO: pretty sure this isn't needed (see - # note above this block) AND will result in - # a double `.send_stop()` call. The only reason to - # put it here would be to due with "order" in - # terms of raising any remote error (as per - # directly below) or bc the stream's - # `.__aexit__()` block might not get run - # (doubtful)? Either way if we did put this back - # in we also need a state var to avoid the double - # stop-msg send.. - # - # await stream.aclose() - - # NOTE: absorb and do not raise any - # EoC received from the other side such that - # it is not raised inside the surrounding - # context block's scope! - except trio.EndOfChannel as eoc: - if ( - eoc - and - stream.closed - ): - # sanity, can remove? - assert eoc is stream._eoc - - log.warning( - 'Stream was terminated by EoC\n\n' - # NOTE: won't show the error but - # does show txt followed by IPC msg. - f'{str(eoc)}\n' - ) - - finally: - if self._portal: - try: - self._portal._streams.remove(stream) - except KeyError: - log.warning( - f'Stream was already destroyed?\n' - f'actor: {self.chan.uid}\n' - f'ctx id: {self.cid}' - ) - # TODO: replace all the `._maybe_raise_remote_err()` usage # with instances of this!! def maybe_raise( @@ -1178,6 +989,14 @@ class Context: **kwargs, ) -> Exception|None: + ''' + Check for for a remote error delivered by the runtime from + our peer (task); if set immediately raise. + + This is a convenience wrapper for + `._maybe_raise_remote_err(self._remote_error)`. + + ''' __tracebackhide__: bool = hide_tb if re := self._remote_error: return self._maybe_raise_remote_err( @@ -1290,8 +1109,7 @@ class Context: raise remote_error - # TODO: change to `.wait_for_result()`? - async def result( + async def wait_for_result( self, hide_tb: bool = True, @@ -1380,18 +1198,27 @@ class Context: (not self._cancel_called) ) ) + # TODO: eventually make `.outcome: Outcome` and thus return + # `self.outcome.unwrap()` here! return self.outcome # TODO: switch this with above! # -[ ] should be named `.wait_for_outcome()` and instead do # a `.outcome.Outcome.unwrap()` ? # - # @property - # def result(self) -> Any|None: - # if self._final_result_is_set(): - # return self._result - - # raise RuntimeError('No result is available!') + async def result( + self, + *args, + **kwargs, + ) -> Any|Exception: + log.warning( + '`Context.result()` is DEPRECATED!\n' + 'Use `Context.[no]wait_for_result()` instead!\n' + ) + return await self.wait_for_result( + *args, + **kwargs, + ) @property def maybe_error(self) -> BaseException|None: @@ -1447,6 +1274,9 @@ class Context: return self._result is not Unresolved # def get_result_nowait(self) -> Any|None: + # def get_outcome_nowait(self) -> Any|None: + # def recv_result_nowait(self) -> Any|None: + # def receive_outcome_nowait(self) -> Any|None: # TODO: use `outcome.Outcome` here instead? @property def outcome(self) -> ( @@ -1476,7 +1306,6 @@ class Context: def has_outcome(self) -> bool: return bool(self.maybe_error) or self._final_result_is_set() - # @property def repr_outcome( self, show_error_fields: bool = False, @@ -1498,7 +1327,8 @@ class Context: # just deliver the type name. if ( (reprol := getattr(merr, 'reprol', False)) - and show_error_fields + and + show_error_fields ): return reprol() @@ -1515,10 +1345,6 @@ class Context: repr(merr) ) - # just the type name - # else: # but wen? - # return type(merr).__name__ - # for all other errors show their regular output return ( str(merr) @@ -1572,7 +1398,7 @@ class Context: _, # any non-unresolved value None, ) if self._final_result_is_set(): - status = 'returned' + status = 'result-returned' # normal operation but still in a pre-`Return`-result # dialog phase @@ -1940,6 +1766,11 @@ class Context: # ow, indicate unable to deliver by default return False + # NOTE: similar to `Portal.open_context()`, this impl is found in + # the `._streaming`` mod to make reading/groking the details + # simpler code-org-wise. + open_stream = open_stream_from_ctx + # TODO: exception tb masking by using a manual # `.__aexit__()`/.__aenter__()` pair on a type? diff --git a/tractor/_streaming.py b/tractor/_streaming.py index 016577d3..314a93b8 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -26,6 +26,7 @@ import inspect from pprint import pformat from typing import ( Any, + AsyncGenerator, Callable, AsyncIterator, TYPE_CHECKING, @@ -51,6 +52,7 @@ from tractor.msg import ( ) if TYPE_CHECKING: + from ._runtime import Actor from ._context import Context from ._ipc import Channel @@ -550,6 +552,213 @@ class MsgStream(trio.abc.Channel): # ... +@acm +async def open_stream_from_ctx( + ctx: Context, + allow_overruns: bool|None = False, + msg_buffer_size: int|None = None, + +) -> AsyncGenerator[MsgStream, None]: + ''' + Open a `MsgStream`, a bi-directional msg transport dialog + connected to the cross-actor peer task for an IPC `Context`. + + This context manager must be entered in both the "parent" (task + which entered `Portal.open_context()`) and "child" (RPC task + which is decorated by `@context`) tasks for the stream to + logically be considered "open"; if one side begins sending to an + un-opened peer, depending on policy config, msgs will either be + queued until the other side opens and/or a `StreamOverrun` will + (eventually) be raised. + + ------ - ------ + + Runtime semantics design: + + A `MsgStream` session adheres to "one-shot use" semantics, + meaning if you close the scope it **can not** be "re-opened". + + Instead you must re-establish a new surrounding RPC `Context` + (RTC: remote task context?) using `Portal.open_context()`. + + In the future this *design choice* may need to be changed but + currently there seems to be no obvious reason to support such + semantics.. + + - "pausing a stream" can be supported with a message implemented + by the `tractor` application dev. + + - any remote error will normally require a restart of the entire + `trio.Task`'s scope due to the nature of `trio`'s cancellation + (`CancelScope`) system and semantics (level triggered). + + ''' + actor: Actor = ctx._actor + + # If the surrounding context has been cancelled by some + # task with a handle to THIS, we error here immediately + # since it likely means the surrounding lexical-scope has + # errored, been `trio.Cancelled` or at the least + # `Context.cancel()` was called by some task. + if ctx._cancel_called: + + # XXX NOTE: ALWAYS RAISE any remote error here even if + # it's an expected `ContextCancelled` due to a local + # task having called `.cancel()`! + # + # WHY: we expect the error to always bubble up to the + # surrounding `Portal.open_context()` call and be + # absorbed there (silently) and we DO NOT want to + # actually try to stream - a cancel msg was already + # sent to the other side! + ctx.maybe_raise( + raise_ctxc_from_self_call=True, + ) + # NOTE: this is diff then calling + # `._maybe_raise_remote_err()` specifically + # because we want to raise a ctxc on any task entering this `.open_stream()` + # AFTER cancellation was already been requested, + # we DO NOT want to absorb any ctxc ACK silently! + # if ctx._remote_error: + # raise ctx._remote_error + + # XXX NOTE: if no `ContextCancelled` has been responded + # back from the other side (yet), we raise a different + # runtime error indicating that this task's usage of + # `Context.cancel()` and then `.open_stream()` is WRONG! + task: str = trio.lowlevel.current_task().name + raise RuntimeError( + 'Stream opened after `Context.cancel()` called..?\n' + f'task: {actor.uid[0]}:{task}\n' + f'{ctx}' + ) + + if ( + not ctx._portal + and not ctx._started_called + ): + raise RuntimeError( + 'Context.started()` must be called before opening a stream' + ) + + # NOTE: in one way streaming this only happens on the + # parent-ctx-task side (on the side that calls + # `Actor.start_remote_task()`) so if you try to send + # a stop from the caller to the callee in the + # single-direction-stream case you'll get a lookup error + # currently. + ctx: Context = actor.get_context( + chan=ctx.chan, + cid=ctx.cid, + nsf=ctx._nsf, + # side=ctx.side, + + msg_buffer_size=msg_buffer_size, + allow_overruns=allow_overruns, + ) + ctx._allow_overruns: bool = allow_overruns + assert ctx is ctx + + # XXX: If the underlying channel feeder receive mem chan has + # been closed then likely client code has already exited + # a ``.open_stream()`` block prior or there was some other + # unanticipated error or cancellation from ``trio``. + + if ctx._rx_chan._closed: + raise trio.ClosedResourceError( + 'The underlying channel for this stream was already closed!\n' + ) + + # NOTE: implicitly this will call `MsgStream.aclose()` on + # `.__aexit__()` due to stream's parent `Channel` type! + # + # XXX NOTE XXX: ensures the stream is "one-shot use", + # which specifically means that on exit, + # - signal ``trio.EndOfChannel``/``StopAsyncIteration`` to + # the far end indicating that the caller exited + # the streaming context purposefully by letting + # the exit block exec. + # - this is diff from the cancel/error case where + # a cancel request from this side or an error + # should be sent to the far end indicating the + # stream WAS NOT just closed normally/gracefully. + async with MsgStream( + ctx=ctx, + rx_chan=ctx._rx_chan, + ) as stream: + + # NOTE: we track all existing streams per portal for + # the purposes of attempting graceful closes on runtime + # cancel requests. + if ctx._portal: + ctx._portal._streams.add(stream) + + try: + ctx._stream_opened: bool = True + ctx._stream = stream + + # XXX: do we need this? + # ensure we aren't cancelled before yielding the stream + # await trio.lowlevel.checkpoint() + yield stream + + # XXX: (MEGA IMPORTANT) if this is a root opened process we + # wait for any immediate child in debug before popping the + # context from the runtime msg loop otherwise inside + # ``Actor._deliver_ctx_payload()`` the msg will be discarded and in + # the case where that msg is global debugger unlock (via + # a "stop" msg for a stream), this can result in a deadlock + # where the root is waiting on the lock to clear but the + # child has already cleared it and clobbered IPC. + # + # await maybe_wait_for_debugger() + + # XXX TODO: pretty sure this isn't needed (see + # note above this block) AND will result in + # a double `.send_stop()` call. The only reason to + # put it here would be to due with "order" in + # terms of raising any remote error (as per + # directly below) or bc the stream's + # `.__aexit__()` block might not get run + # (doubtful)? Either way if we did put this back + # in we also need a state var to avoid the double + # stop-msg send.. + # + # await stream.aclose() + + # NOTE: absorb and do not raise any + # EoC received from the other side such that + # it is not raised inside the surrounding + # context block's scope! + except trio.EndOfChannel as eoc: + if ( + eoc + and + stream.closed + ): + # sanity, can remove? + assert eoc is stream._eoc + + log.warning( + 'Stream was terminated by EoC\n\n' + # NOTE: won't show the error but + # does show txt followed by IPC msg. + f'{str(eoc)}\n' + ) + + finally: + if ctx._portal: + try: + ctx._portal._streams.remove(stream) + except KeyError: + log.warning( + f'Stream was already destroyed?\n' + f'actor: {ctx.chan.uid}\n' + f'ctx id: {ctx.cid}' + ) + + + def stream(func: Callable) -> Callable: ''' Mark an async function as a streaming routine with ``@stream``. -- 2.34.1 From 2f1a97e73e28ac93223268f021fe10f48c57bed9 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 6 Jun 2024 16:14:58 -0400 Subject: [PATCH 149/190] Catch `.pause_from_sync()` in root bg thread bugs! Originally discovered as while using `tractor.pause_from_sync()` from the `i3ipc` client running in a bg-thread that uses `asyncio` inside `modden`. Turns out we definitely aren't correctly handling `.pause_from_sync()` from the root actor when called from a `trio.to_thread.run_sync()` bg thread: - root-actor bg threads which can't `Lock._debug_lock.acquire()` since they aren't in `trio.Task`s. - even if scheduled via `.to_thread.run_sync(_debug._pause)` the acquirer won't be the task/thread which calls `Lock.release()` from `PdbREPL` hooks; this results in a RTE raised by `trio`.. - multiple threads will step on each other's stdio since cpython's GIL seems to ctx switch threads on every input from the user to the REPL loop.. Reproduce via reworking our example and test so that they catch and fail for all edge cases: - rework the `/examples/debugging/sync_bp.py` example to demonstrate the above issues, namely the stdio clobbering in the REPL when multiple threads and/or a subactor try to debug simultaneously. |_ run one thread using a task nursery to ensure it runs conc with the nursery's parent task. |_ ensure the bg threads run conc a subactor usage of `.pause_from_sync()`. |_ gravely detail all the special cases inside a TODO comment. |_ add some control flags to `sync_pause()` helper and don't use `breakpoint()` by default. - extend and adjust `test_debugger.test_pause_from_sync` to match (and thus currently fail) by ensuring exclusive `PdbREPL` attachment when the 2 bg root-actor threads are concurrently interacting alongside the subactor: |_ should only see one of the `_pause_msg` logs at a time for either one of the threads or the subactor. |_ ensure each attaches (in no particular order) before expecting the script to exit. Impl adjustments to `.devx._debug`: - drop `Lock.repl`, no longer used. - add `Lock._owned_by_root: bool` for the `.ctx_in_debug == None` root-actor-task active case. - always `log.exception()` for any `._debug_lock.release()` ownership RTE emitted by `trio`, like we used to.. - add special `Lock.release()` log message for the stale lock but `._owned_by_root == True` case; oh yeah and actually `log.devx(message)`.. - rename `Lock.acquire()` -> `.acquire_for_ctx()` since it's only ever used from subactor IPC usage; well that and for local root-task usage we should prolly add a `.acquire_from_root_task()`? - buncha `._pause()` impl improvements: |_ type `._pause()`'s `debug_func` as a `partial` as well. |_ offer `called_from_sync: bool` and `called_from_bg_thread: bool` for the special case handling when called from `.pause_from_sync()` |_ only set `DebugStatus.repl/repl_task` when `debug_func != None` (OW ensure the `.repl_task` is not the current one). |_ handle error logging even when `debug_func is None`.. |_ lotsa detailed commentary around root-actor-bg-thread special cases. - when `._set_trace(hide_tb=False)` do `pdbp.set_trace(frame=currentframe())` so the `._debug` internal frames are always included. - by default always hide tracebacks for `.pause[_from_sync]()` internals. - improve `.pause_from_sync()` to avoid root-bg-thread crashes: |_ pass new `called_from_xxx_` flags and ensure `DebugStatus.repl_task` is actually set to the `threading.current_thread()` when needed. |_ manually call `Lock._debug_lock.acquire_nowait()` for the non-bg thread case. |_ TODO: still need to implement the bg-thread case using a bg `trio.Task`-in-thread with an `trio.Event` set by thread REPL exit. --- examples/debugging/sync_bp.py | 125 +++++++++++++++--- tests/test_debugger.py | 71 +++++++--- tractor/devx/_debug.py | 241 +++++++++++++++++++++++----------- 3 files changed, 323 insertions(+), 114 deletions(-) diff --git a/examples/debugging/sync_bp.py b/examples/debugging/sync_bp.py index efa4e405..e265df44 100644 --- a/examples/debugging/sync_bp.py +++ b/examples/debugging/sync_bp.py @@ -1,15 +1,32 @@ +from functools import partial +import time +from threading import current_thread + import trio import tractor def sync_pause( - use_builtin: bool = True, + use_builtin: bool = False, error: bool = False, + hide_tb: bool = True, + pre_sleep: float|None = None, ): + if pre_sleep: + time.sleep(pre_sleep) + if use_builtin: - breakpoint(hide_tb=False) + print( + f'Entering `breakpoint()` from\n' + f'{current_thread()}\n' + ) + breakpoint(hide_tb=hide_tb) else: + print( + f'Entering `tractor.pause_from_sync()` from\n' + f'{current_thread()}@{tractor.current_actor().uid}\n' + ) tractor.pause_from_sync() if error: @@ -25,44 +42,114 @@ async def start_n_sync_pause( # sync to parent-side task await ctx.started() - print(f'entering SYNC PAUSE in {actor.uid}') + print(f'Entering `sync_pause()` in subactor: {actor.uid}\n') sync_pause() - print(f'back from SYNC PAUSE in {actor.uid}') + print(f'Exited `sync_pause()` in subactor: {actor.uid}\n') async def main() -> None: - async with tractor.open_nursery( - # NOTE: required for pausing from sync funcs - maybe_enable_greenback=True, - debug_mode=True, - ) as an: + async with ( + tractor.open_nursery( + # NOTE: required for pausing from sync funcs + maybe_enable_greenback=True, + debug_mode=True, + # loglevel='cancel', + ) as an, + trio.open_nursery() as tn, + ): + # just from root task + sync_pause() p: tractor.Portal = await an.start_actor( 'subactor', enable_modules=[__name__], # infect_asyncio=True, debug_mode=True, - loglevel='cancel', ) # TODO: 3 sub-actor usage cases: + # -[x] via a `.open_context()` # -[ ] via a `.run_in_actor()` call # -[ ] via a `.run()` - # -[ ] via a `.open_context()` - # + # -[ ] via a `.to_thread.run_sync()` in subactor async with p.open_context( start_n_sync_pause, ) as (ctx, first): assert first is None - await tractor.pause() - sync_pause() + # TODO: handle bg-thread-in-root-actor special cases! + # + # there are a couple very subtle situations possible here + # and they are likely to become more important as cpython + # moves to support no-GIL. + # + # Cases: + # 1. root-actor bg-threads that call `.pause_from_sync()` + # whilst an in-tree subactor also is using ` .pause()`. + # |_ since the root-actor bg thread can not + # `Lock._debug_lock.acquire_nowait()` without running + # a `trio.Task`, AND because the + # `PdbREPL.set_continue()` is called from that + # bg-thread, we can not `._debug_lock.release()` + # either! + # |_ this results in no actor-tree `Lock` being used + # on behalf of the bg-thread and thus the subactor's + # task and the thread trying to to use stdio + # simultaneously which results in the classic TTY + # clobbering! + # + # 2. mutiple sync-bg-threads that call + # `.pause_from_sync()` where one is scheduled via + # `Nursery.start_soon(to_thread.run_sync)` in a bg + # task. + # + # Due to the GIL, the threads never truly try to step + # through the REPL simultaneously, BUT their `logging` + # and traceback outputs are interleaved since the GIL + # (seemingly) on every REPL-input from the user + # switches threads.. + # + # Soo, the context switching semantics of the GIL + # result in a very confusing and messy interaction UX + # since eval and (tb) print output is NOT synced to + # each REPL-cycle (like we normally make it via + # a `.set_continue()` callback triggering the + # `Lock.release()`). Ideally we can solve this + # usability issue NOW because this will of course be + # that much more important when eventually there is no + # GIL! - # TODO: make this work!! - await trio.to_thread.run_sync( - sync_pause, - abandon_on_cancel=False, - ) + # XXX should cause double REPL entry and thus TTY + # clobbering due to case 1. above! + tn.start_soon( + partial( + trio.to_thread.run_sync, + partial( + sync_pause, + use_builtin=False, + # pre_sleep=0.5, + ), + abandon_on_cancel=True, + thread_name='start_soon_root_bg_thread', + ) + ) + + await tractor.pause() + + # XXX should cause double REPL entry and thus TTY + # clobbering due to case 2. above! + await trio.to_thread.run_sync( + partial( + sync_pause, + # NOTE this already works fine since in the new + # thread the `breakpoint()` built-in is never + # overloaded, thus NO locking is used, HOWEVER + # the case 2. from above still exists! + use_builtin=True, + ), + abandon_on_cancel=False, + thread_name='inline_root_bg_thread', + ) await ctx.cancel() diff --git a/tests/test_debugger.py b/tests/test_debugger.py index 88453682..c95c4f95 100644 --- a/tests/test_debugger.py +++ b/tests/test_debugger.py @@ -1071,6 +1071,8 @@ def test_pause_from_sync( ''' child = spawn('sync_bp') + + # first `sync_pause()` after nurseries open child.expect(PROMPT) assert_before( child, @@ -1085,43 +1087,70 @@ def test_pause_from_sync( do_ctlc(child) child.sendline('c') + + + # first `await tractor.pause()` inside `p.open_context()` body child.expect(PROMPT) - # XXX shouldn't see gb loaded again + # XXX shouldn't see gb loaded message with PDB loglevel! before = str(child.before.decode()) assert not in_prompt_msg( before, ['`greenback` portal opened!'], ) + # should be same root task assert_before( child, - [_pause_msg, "('root'",], + [ + _pause_msg, + " CancelScope|None: if not is_root_process(): @@ -223,6 +219,7 @@ class Lock: ctx_in_debug: Context|None = None req_handler_finished: trio.Event|None = None + _owned_by_root: bool = False _debug_lock: trio.StrictFIFOLock = trio.StrictFIFOLock() _blocked: set[ tuple[str, str] # `Actor.uid` for per actor @@ -231,23 +228,16 @@ class Lock: @classmethod def repr(cls) -> str: - - # both root and subs + lock_stats: trio.LockStatistics = cls._debug_lock.statistics() fields: str = ( - f'repl: {cls.repl}\n' + f'req_handler_finished: {cls.req_handler_finished}\n' + f'_blocked: {cls._blocked}\n\n' + f'_debug_lock: {cls._debug_lock}\n' + f'lock_stats: {lock_stats}\n' + f'ctx_in_debug: {cls.ctx_in_debug}\n' + ) - if is_root_process(): - lock_stats: trio.LockStatistics = cls._debug_lock.statistics() - fields += ( - f'req_handler_finished: {cls.req_handler_finished}\n' - - f'_blocked: {cls._blocked}\n\n' - f'_debug_lock: {cls._debug_lock}\n' - f'lock_stats: {lock_stats}\n' - - ) - body: str = textwrap.indent( fields, prefix=' |_', @@ -256,8 +246,6 @@ class Lock: f'<{cls.__name__}(\n' f'{body}' ')>\n\n' - - f'{cls.ctx_in_debug}\n' ) @classmethod @@ -266,7 +254,10 @@ class Lock: cls, force: bool = False, ): - message: str = 'TTY lock not held by any child\n' + if not cls._owned_by_root: + message: str = 'TTY lock not held by any child\n' + else: + message: str = 'TTY lock held in root-actor task\n' if not (is_trio_main := DebugStatus.is_main_trio_thread()): task: threading.Thread = threading.current_thread() @@ -279,8 +270,20 @@ class Lock: if ( lock.locked() and - owner is task - # ^-NOTE-^ if not will raise a RTE.. + ( + owner is task + # or + # cls._owned_by_root + ) + # ^-NOTE-^ if we do NOT ensure this, `trio` will + # raise a RTE when a non-owner tries to releasee the + # lock. + # + # Further we need to be extra pedantic about the + # correct task, greenback-spawned-task and/or thread + # being set to the `.repl_task` such that the above + # condition matches and we actually release the lock. + # This is particular of note from `.pause_from_sync()`! ): if not is_trio_main: trio.from_thread.run_sync( @@ -290,6 +293,10 @@ class Lock: cls._debug_lock.release() message: str = 'TTY lock released for child\n' + except RuntimeError as rte: + log.exception('Failed to release `Lock`?') + raise rte + finally: # IFF there are no more requesting tasks queued up fire, the # "tty-unlocked" event thereby alerting any monitors of the lock that @@ -305,7 +312,11 @@ class Lock: ): message += '-> No more child ctx tasks hold the TTY lock!\n' - elif req_handler_finished: + elif ( + req_handler_finished + and + lock.locked() + ): req_stats = req_handler_finished.statistics() message += ( f'-> A child ctx task still owns the `Lock` ??\n' @@ -315,9 +326,20 @@ class Lock: cls.ctx_in_debug = None + if ( + cls._owned_by_root + ): + if not lock.locked(): + cls._owned_by_root = False + else: + message += 'Lock still held by root actor task?!?\n' + lock.release() + + log.devx(message) + @classmethod @acm - async def acquire( + async def acquire_for_ctx( cls, ctx: Context, @@ -372,7 +394,7 @@ class Lock: ) # NOTE: critical section: this yield is unshielded! - + # # IF we received a cancel during the shielded lock entry of some # next-in-queue requesting task, then the resumption here will # result in that ``trio.Cancelled`` being raised to our caller @@ -384,7 +406,7 @@ class Lock: yield cls._debug_lock finally: - message :str = 'Exiting `Lock.acquire()` on behalf of sub-actor\n' + message :str = 'Exiting `Lock.acquire_for_ctx()` on behalf of sub-actor\n' if we_acquired: message += '-> TTY lock released by child\n' cls.release() @@ -468,11 +490,11 @@ async def lock_tty_for_child( # TODO: use `.msg._ops.maybe_limit_plds()` here instead so we # can merge into a single async with, with the - # `Lock.acquire()` enter below? + # `Lock.acquire_for_ctx()` enter below? # # enable the locking msgspec with apply_debug_pldec(): - async with Lock.acquire(ctx=ctx): + async with Lock.acquire_for_ctx(ctx=ctx): debug_lock_cs.shield = True log.devx( @@ -567,6 +589,11 @@ class DebugStatus: whenever a local task is an active REPL. ''' + # XXX local ref to the `pdbp.Pbp` instance, ONLY set in the + # actor-process that currently has activated a REPL i.e. it + # should be `None` (unset) in any other actor-process that does + # not yet have the `Lock` acquired via a root-actor debugger + # request. repl: PdbREPL|None = None # TODO: yet again this looks like a task outcome where we need @@ -1443,7 +1470,7 @@ class DebugRequestError(RuntimeError): async def _pause( - debug_func: Callable|None, + debug_func: Callable|partial|None, # NOTE: must be passed in the `.pause_from_sync()` case! repl: PdbREPL|None = None, @@ -1457,7 +1484,9 @@ async def _pause( # be no way to override it?.. # shield: bool = False, - hide_tb: bool = False, + hide_tb: bool = True, + called_from_sync: bool = False, + called_from_bg_thread: bool = False, task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED, **debug_func_kwargs, @@ -1502,27 +1531,15 @@ async def _pause( # -[ ] factor out better, main reason for it is common logic for # both root and sub repl entry def _enter_repl_sync( - debug_func: Callable, + debug_func: partial[None], ) -> None: __tracebackhide__: bool = hide_tb + debug_func_name: str = ( + debug_func.func.__name__ if debug_func else 'None' + ) try: - # set local actor task to avoid recurrent - # entries/requests from the same local task (to the root - # process). - DebugStatus.repl_task = task - DebugStatus.repl = repl - - # TODO: do we want to support using this **just** for the - # locking / common code (prolly to help address #320)? - if debug_func is None: - task_status.started(DebugStatus) - - else: - log.warning( - 'Entering REPL for task fuck you!\n' - f'{task}\n' - ) + if debug_func: # block here one (at the appropriate frame *up*) where # ``breakpoint()`` was awaited and begin handling stdio. log.devx( @@ -1531,6 +1548,12 @@ async def _pause( f' |_{task}\n' ) + # set local actor task to avoid recurrent + # entries/requests from the same local task (to the root + # process). + DebugStatus.repl = repl + DebugStatus.repl_task = task + # invoke the low-level REPL activation routine which itself # should call into a `Pdb.set_trace()` of some sort. debug_func( @@ -1539,10 +1562,27 @@ async def _pause( **debug_func_kwargs, ) + # TODO: maybe invert this logic and instead + # do `assert debug_func is None` when + # `called_from_sync`? + else: + if ( + called_from_sync + # and + # is_root_process() + and + not DebugStatus.is_main_trio_thread() + ): + assert DebugStatus.repl_task is not task + + # TODO: do we want to support using this **just** for the + # locking / common code (prolly to help address #320)? + task_status.started(DebugStatus) + except trio.Cancelled: log.exception( - 'Cancelled during invoke of internal `debug_func = ' - f'{debug_func.func.__name__}`\n' + 'Cancelled during invoke of internal\n\n' + f'`debug_func = {debug_func_name}`\n' ) # XXX NOTE: DON'T release lock yet raise @@ -1550,8 +1590,8 @@ async def _pause( except BaseException: __tracebackhide__: bool = False log.exception( - 'Failed to invoke internal `debug_func = ' - f'{debug_func.func.__name__}`\n' + 'Failed to invoke internal\n\n' + f'`debug_func = {debug_func_name}`\n' ) # NOTE: OW this is ONLY called from the # `.set_continue/next` hooks! @@ -1597,34 +1637,56 @@ async def _pause( f'This root actor task is already within an active REPL session\n' f'Ignoring this re-entered `tractor.pause()`\n' f'task: {task.name}\n' - f'REPL: {Lock.repl}\n' # TODO: use `._frame_stack` scanner to find the @api_frame ) with trio.CancelScope(shield=shield): await trio.lowlevel.checkpoint() return - # XXX: since we need to enter pdb synchronously below, - # we have to release the lock manually from pdb completion - # callbacks. Can't think of a nicer way then this atm. + # must shield here to avoid hitting a `Cancelled` and + # a child getting stuck bc we clobbered the tty with trio.CancelScope(shield=shield): if Lock._debug_lock.locked(): - log.warning( - 'attempting to shield-acquire active TTY lock owned by\n' + + acq_prefix: str = 'shield-' if shield else '' + ctx_line: str = ( + 'lock owned by ctx\n\n' f'{ctx}' + ) if ctx else 'stale lock with no request ctx!?' + log.devx( + f'attempting to {acq_prefix}acquire active TTY ' + f'{ctx_line}' ) - # must shield here to avoid hitting a ``Cancelled`` and - # a child getting stuck bc we clobbered the tty - # with trio.CancelScope(shield=True): - await Lock._debug_lock.acquire() - else: - # may be cancelled + # XXX: since we need to enter pdb synchronously below, + # and we don't want to block the thread that starts + # stepping through the application thread, we later + # must `Lock._debug_lock.release()` manually from + # some `PdbREPL` completion callback(`.set_[continue/exit]()`). + # + # So, when `._pause()` is called from a (bg/non-trio) + # thread, special provisions are needed and we need + # to do the `.acquire()`/`.release()` calls from + # a common `trio.task` (due to internal impl of + # `FIFOLock`). Thus we do not acquire here and + # instead expect `.pause_from_sync()` to take care of + # this detail depending on the caller's (threading) + # usage. + # + # NOTE that this special case is ONLY required when + # using `.pause_from_sync()` from the root actor + # since OW a subactor will instead make an IPC + # request (in the branch below) to acquire the + # `Lock`-mutex and a common root-actor RPC task will + # take care of `._debug_lock` mgmt! + if not called_from_sync: await Lock._debug_lock.acquire() + Lock._owned_by_root = True # enter REPL from root, no TTY locking IPC ctx necessary + # since we can acquire the `Lock._debug_lock` directly in + # thread. _enter_repl_sync(debug_func) - return # next branch is mutex and for subactors # TODO: need a more robust check for the "root" actor elif ( @@ -1843,6 +1905,11 @@ def _set_trace( # called our API. caller_frame: FrameType = api_frame.f_back # type: ignore + # pretend this frame is the caller frame to show + # the entire call-stack all the way down to here. + if not hide_tb: + caller_frame: FrameType = inspect.currentframe() + # engage ze REPL # B~() repl.set_trace(frame=caller_frame) @@ -1850,7 +1917,7 @@ def _set_trace( async def pause( *, - hide_tb: bool = False, + hide_tb: bool = True, api_frame: FrameType|None = None, # TODO: figure out how to still make this work: @@ -1970,13 +2037,12 @@ async def maybe_init_greenback( # runtime aware version which takes care of all . def pause_from_sync( - hide_tb: bool = False, - # proxied to `_pause()` + hide_tb: bool = True, - **_pause_kwargs, - # for eg. + # proxy to `._pause()`, for ex: # shield: bool = False, # api_frame: FrameType|None = None, + **_pause_kwargs, ) -> None: @@ -2020,26 +2086,53 @@ def pause_from_sync( # noop: non-cancelled `.to_thread` # `trio.Cancelled`: cancelled `.to_thread` # + log.warning( + 'Engaging `.pause_from_sync()` from ANOTHER THREAD!' + ) + task: threading.Thread = threading.current_thread() + DebugStatus.repl_task: str = task + + # TODO: make root-actor bg thread usage work! + # if is_root_process(): + # async def _pause_from_sync_thread(): + # ... + # else: + # .. the below .. + trio.from_thread.run( partial( _pause, debug_func=None, repl=mdb, + hide_tb=hide_tb, + + # XXX to prevent `._pause()` for setting + # `DebugStatus.repl_task` to the gb task! + called_from_sync=True, + called_from_bg_thread=True, + **_pause_kwargs ), ) - task: threading.Thread = threading.current_thread() else: # we are presumably the `trio.run()` + main thread task: trio.Task = current_task() + DebugStatus.repl_task: str = task greenback.await_( _pause( debug_func=None, repl=mdb, + hide_tb=hide_tb, + called_from_sync=True, **_pause_kwargs, ) ) - DebugStatus.repl_task: str = current_task() + + if is_root_process(): + # Manually acquire since otherwise on release we'll + # get a RTE raised by `trio` due to ownership.. + Lock._debug_lock.acquire_nowait() + Lock._owned_by_root = True # TODO: ensure we aggressively make the user aware about # entering the global ``breakpoint()`` built-in from sync -- 2.34.1 From ad44d59f3d16ae1d71986db6be59d955cfba5294 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 7 Jun 2024 22:35:59 -0400 Subject: [PATCH 150/190] Drop thread logging to make `log.pdb()` patts match in test --- examples/debugging/sync_bp.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/examples/debugging/sync_bp.py b/examples/debugging/sync_bp.py index e265df44..137710fc 100644 --- a/examples/debugging/sync_bp.py +++ b/examples/debugging/sync_bp.py @@ -1,6 +1,5 @@ from functools import partial import time -from threading import current_thread import trio import tractor @@ -16,17 +15,9 @@ def sync_pause( time.sleep(pre_sleep) if use_builtin: - print( - f'Entering `breakpoint()` from\n' - f'{current_thread()}\n' - ) breakpoint(hide_tb=hide_tb) else: - print( - f'Entering `tractor.pause_from_sync()` from\n' - f'{current_thread()}@{tractor.current_actor().uid}\n' - ) tractor.pause_from_sync() if error: -- 2.34.1 From 31cc33c66ceea7afab655ad43a2626df7ec93f39 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 10 Jun 2024 08:54:03 -0400 Subject: [PATCH 151/190] First proto: multi-threaded synced `pdb`-REPLs Functionally working for multi-threaded (via cpython threads spawned from `to_trio.to_thread.run_sync()`) alongside subactors, tested (for now) only with threads started inside the root actor (which seemed to have the most issues in terms of the impl and special cases..) using the new `tractor.pause_from_sync()` API! Main implementation changes to `.pause_from_sync()` ------ - ------ - from the root actor, we need to ensure bg thread case is handled *specially* since no IPC is used to request the TTY stdio mutex and `Lock` (API) usage is conducted entirely from a local task or thread; dedicated `Lock` usage for the root-actor already is branched inside `._pause()` and needs similar handling from a root bg-thread: |_for the special case of a root bg thread we need to `trio`-main-thread schedule a bg task inside a new `_pause_from_bg_root_thread()`. The new task needs to implement most of what was is handled inside `._pause()` manually, mostly because in this root-actor-bg-thread case we have 2 constraints: 1. to enter `PdbREPL.interaction()` **from the bg thread** directly, 2. the task that `Lock._debug_lock.acquire()`s has to be the same that calls `.release() (a `trio.FIFOLock` constraint) |_impl deats of this `_pause_from_bg_root_thread()` include: - (for now) calling `._pause()` to acquire the `Lock._debug_lock`. - setting its own `DebugStatus.repl_release`. - calling `.DebugStatus.shield_sigint()` to ensure the root's main thread uses the right handler when the bg one is REPL-ing. - wait manually on the `.repl_release()` to be set by the thread's dedicated `PdbREPL` exit. - manually calling `Lock.release()` from the **same task** that acquired it. - expect calls to `._pause()` to deliver a `tuple[Task, PdbREPL]` such that we always get the handle both to any newly created REPl instance and the (maybe) the scheduled bg task within which is runs. - add a single `message: str` style to `log.devx()` based on branching style for logging. - ensure both `DebugStatus.repl` and `.repl_task` are set **just before** calling `._set_trace()` to ensure the correct `Task|Thread` is set when the REPL is finally entered from sync code. - add a wrapping caller `_sync_pause_from_builtin()` which passes in the new `called_from_builtin=True` to indicate `breakpoint()` caller usage, obvi pass in `api_frame`. Changes to `._pause()` in support of ^ ------ - ------ - `TaskStatus.started()` and return the `tuple[Task, PdbREPL]` to callers / starters. - only call `DebugStatus.shield_sigint()` when no `repl` passed bc some callers (like bg threads) may need to apply it at some specific point themselves. - tweak some asserts for the `debug_func == None` / non-`trio`-thread case. - add a mod-level `_repl_fail_msg: str` to be used when there's an internal `._pause()` failure for testing, easier to pexpect match. - more comprehensive logging for the root-actor branched case to (attempt to) indicate any of the 3 cases: - remote ctx from subactor has the `Lock`, - already existing root task or thread has it or, - some kinda stale `.locked()` situation where the root has the lock but we don't know why. - for root usage, revert to always `await Lock._debug_lock.acquire()`-ing despite `called_from_sync` since `.pause_from_sync()` was reworked to instead handle the special bg thread case in the new `_pause_from_bg_root_thread()` task. - always do `return _enter_repl_sync(debug_func)`. - try to report any `repl_task: Task|Thread` set by the caller (particularly for the bg thread cases) as being the thread or task `._pause()` was called "on behalf of" Changes to `DebugStatus`/`Lock` in support of ^ ------ - ------ - only call `Lock.release()` from `DebugStatus.set_[quit/continue]()` when called from the main `trio` thread and always call `DebugStatus.release()` **after** to ensure `.repl_released()` is set **after** `._debug_lock.release()`. - only call `.repl_release.set()` from `trio` thread otherwise use `.from_thread.run()`. - much more refinements in `Lock.release()` for threading cases: - return `bool` to indicate whether lock was released by caller. - mask (in prep to drop) `_pause()` usage of `Lock.release.force=True)` since forcing a release can't ever avoid the RTE from `trio`.. same task **must** acquire/release. - don't allow usage from non-`trio`-main-threads, ever; there's no point since the same-task-needs-to-manage-`FIFOLock` constraint. - much more detailed logging using `message`-building-style for all caller (edge) cases. |_ use a `we_released: bool` to determine failed-to-release edge cases which can happen if called from bg threads, ensure we `log.exception()` on any incorrect usage resulting in release failure. |_ complain loudly if the release fails and some other task/thread still holds the lock. |_ be explicit about "who" (which task or thread) the release is "on behalf of" by reading `DebugStatus.repl_task` since the caller isn't the REPL operator in many sync cases. - more or less drop `force` support, as mentioned above. - ensure we unset `._owned_by_root` if the caller is a root task. Other misc ------ - ------ - rename `lock_tty_for_child()` -> `lock_stdio_for_peer()`. - rejig `Lock.repr()` to show lock and event stats. - stage `Lock.stats` and `.owner` methods in prep for doing a singleton instance and `@property`s. --- tractor/devx/_debug.py | 732 +++++++++++++++++++++++++++++------------ 1 file changed, 525 insertions(+), 207 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 858133fd..3218cffa 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -62,7 +62,6 @@ import trio from trio import CancelScope from trio.lowlevel import ( current_task, - Task, ) from trio import ( TaskStatus, @@ -81,6 +80,8 @@ from tractor._state import ( # ) if TYPE_CHECKING: + from trio.lowlevel import Task + from threading import Thread from tractor._ipc import Channel from tractor._context import Context from tractor._runtime import ( @@ -92,6 +93,11 @@ if TYPE_CHECKING: log = get_logger(__name__) +# TODO: refine the internal impl and APIs in this module! +# +# -[ ] separate `._pause()` branch-cases for calling from a root task +# vs. from subactors + def hide_runtime_frames() -> dict[FunctionType, CodeType]: ''' @@ -203,6 +209,15 @@ class Lock: return None + # TODO: once we convert to singleton-per-actor-style + # @property + # def stats(cls) -> trio.LockStatistics: + # return cls._debug_lock.statistics() + + # @property + # def owner(cls) -> Task: + # return cls._debug_lock.statistics().owner + # ROOT ONLY # ------ - ------- # the root-actor-ONLY singletons for, @@ -229,18 +244,22 @@ class Lock: @classmethod def repr(cls) -> str: lock_stats: trio.LockStatistics = cls._debug_lock.statistics() + req: trio.Event|None = cls.req_handler_finished fields: str = ( - f'req_handler_finished: {cls.req_handler_finished}\n' - f'_blocked: {cls._blocked}\n\n' - f'_debug_lock: {cls._debug_lock}\n' - f'lock_stats: {lock_stats}\n' - f'ctx_in_debug: {cls.ctx_in_debug}\n' + f'|_ ._blocked: {cls._blocked}\n' + f'|_ ._debug_lock: {cls._debug_lock}\n' + f' {lock_stats}\n\n' + f'|_ .ctx_in_debug: {cls.ctx_in_debug}\n' + f'|_ .req_handler_finished: {req}\n' ) + if req: + req_stats: trio.EventStatistics = req.statistics() + fields += f' {req_stats}\n' body: str = textwrap.indent( fields, - prefix=' |_', + prefix=' ', ) return ( f'<{cls.__name__}(\n' @@ -253,28 +272,59 @@ class Lock: def release( cls, force: bool = False, - ): - if not cls._owned_by_root: - message: str = 'TTY lock not held by any child\n' - else: - message: str = 'TTY lock held in root-actor task\n' + raise_on_thread: bool = True, - if not (is_trio_main := DebugStatus.is_main_trio_thread()): - task: threading.Thread = threading.current_thread() + ) -> bool: + ''' + Release the actor-tree global TTY stdio lock (only) from the + `trio.run()`-main-thread. + + ''' + we_released: bool = False + ctx_in_debug: Context|None = cls.ctx_in_debug + repl_task: Task|Thread|None = DebugStatus.repl_task + if not DebugStatus.is_main_trio_thread(): + thread: threading.Thread = threading.current_thread() + message: str = ( + '`Lock.release()` can not be called from a non-main-`trio` thread!\n' + f'{thread}\n' + ) + if raise_on_thread: + raise RuntimeError(message) + + log.devx(message) + return False + + task: Task = current_task() + + # sanity check that if we're the root actor + # the lock is marked as such. + # note the pre-release value may be diff the the + # post-release task. + if repl_task is task: + assert cls._owned_by_root + message: str = ( + 'TTY lock held by root-actor on behalf of local task\n' + f'|_{repl_task}\n' + ) else: - task: trio.Task = current_task() + assert DebugStatus.repl_task is not task + + message: str = ( + 'TTY lock was NOT released on behalf of caller\n' + f'|_{task}\n' + ) try: lock: trio.StrictFIFOLock = cls._debug_lock owner: Task = lock.statistics().owner if ( - lock.locked() - and - ( - owner is task - # or - # cls._owned_by_root - ) + (lock.locked() or force) + # ^-TODO-NOTE-^ should we just remove this, since the + # RTE case above will always happen when you force + # from the wrong task? + + and (owner is task) # ^-NOTE-^ if we do NOT ensure this, `trio` will # raise a RTE when a non-owner tries to releasee the # lock. @@ -284,17 +334,27 @@ class Lock: # being set to the `.repl_task` such that the above # condition matches and we actually release the lock. # This is particular of note from `.pause_from_sync()`! + ): - if not is_trio_main: - trio.from_thread.run_sync( - cls._debug_lock.release + cls._debug_lock.release() + we_released: bool = True + if repl_task: + message: str = ( + 'Lock released on behalf of root-actor-local REPL owner\n' + f'|_{repl_task}\n' ) else: - cls._debug_lock.release() - message: str = 'TTY lock released for child\n' + message: str = ( + 'TTY lock released by us on behalf of remote peer?\n' + f'|_ctx_in_debug: {ctx_in_debug}\n\n' + ) + # mk_pdb().set_trace() + # elif owner: except RuntimeError as rte: - log.exception('Failed to release `Lock`?') + log.exception( + 'Failed to release `Lock._debug_lock: trio.FIFOLock`?\n' + ) raise rte finally: @@ -303,40 +363,59 @@ class Lock: # we are now back in the "tty unlocked" state. This is basically # and edge triggered signal around an empty queue of sub-actor # tasks that may have tried to acquire the lock. - lock_stats = cls._debug_lock.statistics() + lock_stats: trio.LockStatistics = cls._debug_lock.statistics() req_handler_finished: trio.Event|None = Lock.req_handler_finished if ( not lock_stats.owner - or force and req_handler_finished is None ): - message += '-> No more child ctx tasks hold the TTY lock!\n' - - elif ( - req_handler_finished - and - lock.locked() - ): - req_stats = req_handler_finished.statistics() message += ( - f'-> A child ctx task still owns the `Lock` ??\n' - f' |_lock_stats: {lock_stats}\n' - f' |_req_stats: {req_stats}\n' + '-> No new task holds the TTY lock!\n\n' + f'{Lock.repr()}\n' ) - cls.ctx_in_debug = None + elif ( + req_handler_finished # new IPC ctx debug request active + and + lock.locked() # someone has the lock + ): + behalf_of_task = ( + ctx_in_debug + or + repl_task + ) + message += ( + f'\nA non-caller task still owns this lock on behalf of ' + f'{behalf_of_task}\n' + f'|_{lock_stats.owner}\n' + ) if ( - cls._owned_by_root + we_released + and + ctx_in_debug ): - if not lock.locked(): - cls._owned_by_root = False - else: - message += 'Lock still held by root actor task?!?\n' - lock.release() + cls.ctx_in_debug = None # unset + + # post-release value (should be diff then value above!) + repl_task: Task|Thread|None = DebugStatus.repl_task + if ( + cls._owned_by_root + and + we_released + ): + cls._owned_by_root = False + + if task is not repl_task: + message += ( + 'Lock released by root actor on behalf of bg thread\n' + f'|_{repl_task}\n' + ) log.devx(message) + return we_released + @classmethod @acm async def acquire_for_ctx( @@ -380,7 +459,7 @@ class Lock: log.runtime(pre_msg) # NOTE: if the surrounding cancel scope from the - # `lock_tty_for_child()` caller is cancelled, this line should + # `lock_stdio_for_peer()` caller is cancelled, this line should # unblock and NOT leave us in some kind of # a "child-locked-TTY-but-child-is-uncontactable-over-IPC" # condition. @@ -398,7 +477,7 @@ class Lock: # IF we received a cancel during the shielded lock entry of some # next-in-queue requesting task, then the resumption here will # result in that ``trio.Cancelled`` being raised to our caller - # (likely from ``lock_tty_for_child()`` below)! In + # (likely from `lock_stdio_for_peer()` below)! In # this case the ``finally:`` below should trigger and the # surrounding caller side context should cancel normally # relaying back to the caller. @@ -408,8 +487,8 @@ class Lock: finally: message :str = 'Exiting `Lock.acquire_for_ctx()` on behalf of sub-actor\n' if we_acquired: - message += '-> TTY lock released by child\n' cls.release() + message += '-> TTY lock released by child\n' else: message += '-> TTY lock never acquired by child??\n' @@ -421,7 +500,7 @@ class Lock: @tractor.context -async def lock_tty_for_child( +async def lock_stdio_for_peer( ctx: Context, subactor_task_uid: tuple[str, int], @@ -545,25 +624,26 @@ async def lock_tty_for_child( except BaseException as req_err: message: str = ( + f'On behalf of remote peer {subactor_task_uid!r}@{ctx.chan.uid!r}\n\n' 'Forcing `Lock.release()` for req-ctx since likely an ' 'internal error!\n\n' f'{ctx}' ) if isinstance(req_err, trio.Cancelled): message = ( - 'Cancelled during root TTY-lock dialog?\n' + 'Cancelled during root TTY-lock dialog\n' + message ) else: message = ( - 'Errored during root TTY-lock dialog?\n' + 'Errored during root TTY-lock dialog\n' + message ) log.exception(message) - Lock.release(force=True) + Lock.release() #force=True) raise finally: @@ -645,7 +725,7 @@ class DebugStatus: def shield_sigint(cls): ''' Shield out SIGINT handling (which by default triggers - `trio.Task` cancellation) in subactors when a `pdb` REPL + `Task` cancellation) in subactors when a `pdb` REPL is active. Avoids cancellation of the current actor (task) when the user @@ -767,9 +847,17 @@ class DebugStatus: try: # sometimes the task might already be terminated in # which case this call will raise an RTE? - if repl_release is not None: - repl_release.set() - + if ( + repl_release is not None + ): + if cls.is_main_trio_thread(): + repl_release.set() + else: + # XXX NOTE ONLY used for bg root-actor sync + # threads, see `.pause_from_sync()`. + trio.from_thread.run_sync( + repl_release.set + ) finally: # if req_ctx := cls.req_ctx: # req_ctx._scope.cancel() @@ -856,8 +944,6 @@ class PdbREPL(pdbp.Pdb): try: super().set_continue() finally: - DebugStatus.release() - # NOTE: for subactors the stdio lock is released via the # allocated RPC locker task, so for root we have to do it # manually. @@ -865,21 +951,32 @@ class PdbREPL(pdbp.Pdb): is_root_process() and Lock._debug_lock.locked() + and + DebugStatus.is_main_trio_thread() ): + # Lock.release(raise_on_thread=False) Lock.release() + # XXX after `Lock.release()` for root local repl usage + DebugStatus.release() + def set_quit(self): try: super().set_quit() finally: - DebugStatus.release() if ( is_root_process() and Lock._debug_lock.locked() + and + DebugStatus.is_main_trio_thread() ): + # Lock.release(raise_on_thread=False) Lock.release() + # XXX after `Lock.release()` for root local repl usage + DebugStatus.release() + # XXX NOTE: we only override this because apparently the stdlib pdb # bois likes to touch the SIGINT handler as much as i like to touch # my d$%&. @@ -960,20 +1057,24 @@ async def request_root_stdio_lock( task_status: TaskStatus[CancelScope] = trio.TASK_STATUS_IGNORED, ): ''' - Connect to the root actor of this process tree and RPC-invoke - a task which acquires a std-streams global `Lock`: a actor tree - global mutex which prevents other subactors from entering - a `PdbREPL` at the same time as any other. + Connect to the root actor for this actor's process tree and + RPC-invoke a task which acquires the std-streams global `Lock`: + a process-tree-global mutex which prevents multiple actors from + entering `PdbREPL.interaction()` at the same time such that the + parent TTY's stdio is never "clobbered" by simultaneous + reads/writes. - The actual `Lock` singleton exists ONLY in the root actor's - memory and does nothing more then set process-tree global state. - The actual `PdbREPL` interaction is completely isolated to each - sub-actor and with the `Lock` merely providing the multi-process - syncing mechanism to avoid any subactor (or the root itself) from - entering the REPL at the same time. + The actual `Lock` singleton instance exists ONLY in the root + actor's memory space and does nothing more then manage + process-tree global state, + namely a `._debug_lock: trio.FIFOLock`. + + The actual `PdbREPL` interaction/operation is completely isolated + to each sub-actor (process) with the root's `Lock` providing the + multi-process mutex-syncing mechanism to avoid parallel REPL + usage within an actor tree. ''' - log.devx( 'Initing stdio-lock request task with root actor' ) @@ -1004,7 +1105,7 @@ async def request_root_stdio_lock( # `.repl_release: # trio.Event`. with trio.CancelScope(shield=shield) as req_cs: # XXX: was orig for debugging cs stack corruption.. - # log.info( + # log.devx( # 'Request cancel-scope is:\n\n' # f'{pformat_cs(req_cs, var_name="req_cs")}\n\n' # ) @@ -1014,7 +1115,7 @@ async def request_root_stdio_lock( # TODO: merge into single async with ? async with get_root() as portal: async with portal.open_context( - lock_tty_for_child, + lock_stdio_for_peer, subactor_task_uid=task_uid, # NOTE: set it here in the locker request task bc it's # possible for multiple such requests for the lock in any @@ -1468,6 +1569,11 @@ class DebugRequestError(RuntimeError): ''' +_repl_fail_msg: str = ( + 'Failed to REPl via `_pause()` ' +) + + async def _pause( debug_func: Callable|partial|None, @@ -1487,10 +1593,13 @@ async def _pause( hide_tb: bool = True, called_from_sync: bool = False, called_from_bg_thread: bool = False, - task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED, + task_status: TaskStatus[ + tuple[Task, PdbREPL], + trio.Event + ] = trio.TASK_STATUS_IGNORED, **debug_func_kwargs, -) -> None: +) -> tuple[PdbREPL, Task]|None: ''' Inner impl for `pause()` to avoid the `trio.CancelScope.__exit__()` stack frame when not shielded (since apparently i can't figure out @@ -1502,25 +1611,26 @@ async def _pause( __tracebackhide__: bool = hide_tb actor: Actor = current_actor() try: - # TODO: use the `Task` instance instead for `is` checks - # below! - task: Task = trio.lowlevel.current_task() + task: Task = current_task() except RuntimeError as rte: + log.exception('Failed to get current task?') if actor.is_infected_aio(): raise RuntimeError( '`tractor.pause[_from_sync]()` not yet supported ' 'for infected `asyncio` mode!' ) from rte + raise + if debug_func is not None: debug_func = partial(debug_func) - repl: PdbREPL = repl or mk_pdb() - # XXX NOTE XXX set it here to avoid ctl-c from cancelling a debug # request from a subactor BEFORE the REPL is entered by that # process. - DebugStatus.shield_sigint() + if not repl: + DebugStatus.shield_sigint() + repl: PdbREPL = repl or mk_pdb() # TODO: move this into a `open_debug_request()` @acm? # -[ ] prolly makes the most sense to do the request @@ -1538,6 +1648,9 @@ async def _pause( debug_func.func.__name__ if debug_func else 'None' ) + # TODO: do we want to support using this **just** for the + # locking / common code (prolly to help address #320)? + task_status.started((task, repl)) try: if debug_func: # block here one (at the appropriate frame *up*) where @@ -1548,11 +1661,11 @@ async def _pause( f' |_{task}\n' ) - # set local actor task to avoid recurrent - # entries/requests from the same local task (to the root - # process). - DebugStatus.repl = repl + # set local task on process-global state to avoid + # recurrent entries/requests from the same + # actor-local task. DebugStatus.repl_task = task + DebugStatus.repl = repl # invoke the low-level REPL activation routine which itself # should call into a `Pdb.set_trace()` of some sort. @@ -1568,16 +1681,13 @@ async def _pause( else: if ( called_from_sync - # and - # is_root_process() and not DebugStatus.is_main_trio_thread() ): + assert called_from_bg_thread assert DebugStatus.repl_task is not task - # TODO: do we want to support using this **just** for the - # locking / common code (prolly to help address #320)? - task_status.started(DebugStatus) + return (task, repl) except trio.Cancelled: log.exception( @@ -1607,12 +1717,23 @@ async def _pause( # TODO: this should be created as part of `DebugRequest()` init # which should instead be a one-shot-use singleton much like # the `PdbREPL`. + repl_task: Thread|Task|None = DebugStatus.repl_task if ( not DebugStatus.repl_release or DebugStatus.repl_release.is_set() ): + log.devx( + 'Setting new `DebugStatus.repl_release: trio.Event` for requesting task\n' + f'|_{task}\n' + ) DebugStatus.repl_release = trio.Event() + else: + log.devx( + 'Already an existing actor-local REPL user task\n' + f'|_{repl_task}\n' + ) + # ^-NOTE-^ this must be created BEFORE scheduling any subactor # debug-req task since it needs to wait on it just after # `.started()`-ing back its wrapping `.req_cs: CancelScope`. @@ -1620,73 +1741,110 @@ async def _pause( repl_err: BaseException|None = None try: if is_root_process(): - # we also wait in the root-parent for any child that # may have the tty locked prior - # TODO: wait, what about multiple root tasks acquiring it though? + # TODO: wait, what about multiple root tasks (with bg + # threads) acquiring it though? ctx: Context|None = Lock.ctx_in_debug + repl_task: Task|None = DebugStatus.repl_task if ( ctx is None and - DebugStatus.repl - and - DebugStatus.repl_task is task + repl_task is task + # and + # DebugStatus.repl + # ^-NOTE-^ matches for multi-threaded case as well? ): # re-entrant root process already has it: noop. log.warning( f'This root actor task is already within an active REPL session\n' - f'Ignoring this re-entered `tractor.pause()`\n' - f'task: {task.name}\n' + f'Ignoring this recurrent`tractor.pause()` entry\n\n' + f'|_{task}\n' # TODO: use `._frame_stack` scanner to find the @api_frame ) with trio.CancelScope(shield=shield): await trio.lowlevel.checkpoint() - return + return repl, task + + # elif repl_task: + # log.warning( + # f'This root actor has another task already in REPL\n' + # f'Waitin for the other task to complete..\n\n' + # f'|_{task}\n' + # # TODO: use `._frame_stack` scanner to find the @api_frame + # ) + # with trio.CancelScope(shield=shield): + # await DebugStatus.repl_release.wait() + # await trio.sleep(0.1) # must shield here to avoid hitting a `Cancelled` and # a child getting stuck bc we clobbered the tty with trio.CancelScope(shield=shield): - if Lock._debug_lock.locked(): + ctx_line = '`Lock` in this root actor task' + acq_prefix: str = 'shield-' if shield else '' + if ( + Lock._debug_lock.locked() + ): + if ctx: + ctx_line: str = ( + 'active `Lock` owned by ctx\n\n' + f'{ctx}' + ) + elif Lock._owned_by_root: + ctx_line: str = ( + 'Already owned by root-task `Lock`\n\n' + f'repl_task: {DebugStatus.repl_task}\n' + f'repl: {DebugStatus.repl}\n' + ) + else: + ctx_line: str = ( + '**STALE `Lock`** held by unknown root/remote task ' + 'with no request ctx !?!?' + ) - acq_prefix: str = 'shield-' if shield else '' - ctx_line: str = ( - 'lock owned by ctx\n\n' - f'{ctx}' - ) if ctx else 'stale lock with no request ctx!?' - log.devx( - f'attempting to {acq_prefix}acquire active TTY ' - f'{ctx_line}' - ) + log.devx( + f'attempting to {acq_prefix}acquire ' + f'{ctx_line}' + ) + await Lock._debug_lock.acquire() + Lock._owned_by_root = True + # else: - # XXX: since we need to enter pdb synchronously below, - # and we don't want to block the thread that starts - # stepping through the application thread, we later - # must `Lock._debug_lock.release()` manually from - # some `PdbREPL` completion callback(`.set_[continue/exit]()`). - # - # So, when `._pause()` is called from a (bg/non-trio) - # thread, special provisions are needed and we need - # to do the `.acquire()`/`.release()` calls from - # a common `trio.task` (due to internal impl of - # `FIFOLock`). Thus we do not acquire here and - # instead expect `.pause_from_sync()` to take care of - # this detail depending on the caller's (threading) - # usage. - # - # NOTE that this special case is ONLY required when - # using `.pause_from_sync()` from the root actor - # since OW a subactor will instead make an IPC - # request (in the branch below) to acquire the - # `Lock`-mutex and a common root-actor RPC task will - # take care of `._debug_lock` mgmt! - if not called_from_sync: - await Lock._debug_lock.acquire() - Lock._owned_by_root = True + # if ( + # not called_from_bg_thread + # and not called_from_sync + # ): + # log.devx( + # f'attempting to {acq_prefix}acquire ' + # f'{ctx_line}' + # ) + + # XXX: since we need to enter pdb synchronously below, + # and we don't want to block the thread that starts + # stepping through the application thread, we later + # must `Lock._debug_lock.release()` manually from + # some `PdbREPL` completion callback(`.set_[continue/exit]()`). + # + # So, when `._pause()` is called from a (bg/non-trio) + # thread, special provisions are needed and we need + # to do the `.acquire()`/`.release()` calls from + # a common `trio.task` (due to internal impl of + # `FIFOLock`). Thus we do not acquire here and + # instead expect `.pause_from_sync()` to take care of + # this detail depending on the caller's (threading) + # usage. + # + # NOTE that this special case is ONLY required when + # using `.pause_from_sync()` from the root actor + # since OW a subactor will instead make an IPC + # request (in the branch below) to acquire the + # `Lock`-mutex and a common root-actor RPC task will + # take care of `._debug_lock` mgmt! # enter REPL from root, no TTY locking IPC ctx necessary # since we can acquire the `Lock._debug_lock` directly in # thread. - _enter_repl_sync(debug_func) + return _enter_repl_sync(debug_func) # TODO: need a more robust check for the "root" actor elif ( @@ -1809,7 +1967,7 @@ async def _pause( ) # enter REPL - _enter_repl_sync(debug_func) + return _enter_repl_sync(debug_func) # TODO: prolly factor this plus the similar block from # `_enter_repl_sync()` into a common @cm? @@ -1838,7 +1996,9 @@ async def _pause( else: log.exception( - 'Failed to engage debugger via `_pause()` ??\n' + _repl_fail_msg + + + f'on behalf of {repl_task} ??\n' ) DebugStatus.release(cancel_req_task=True) @@ -1882,11 +2042,11 @@ def _set_trace( # optionally passed in to provide support for # `pause_from_sync()` where actor: tractor.Actor|None = None, - task: trio.Task|None = None, + task: Task|Thread|None = None, ): __tracebackhide__: bool = hide_tb actor: tractor.Actor = actor or current_actor() - task: trio.Task = task or current_task() + task: Task|Thread = task or current_task() # else: # TODO: maybe print the actor supervion tree up to the @@ -2023,7 +2183,7 @@ async def maybe_init_greenback( if mod := maybe_import_greenback(**kwargs): await mod.ensure_portal() - log.info( + log.devx( '`greenback` portal opened!\n' 'Sync debug support activated!\n' ) @@ -2032,12 +2192,116 @@ async def maybe_init_greenback( return None -# TODO: allow pausing from sync code. -# normally by remapping python's builtin breakpoint() hook to this -# runtime aware version which takes care of all . -def pause_from_sync( +async def _pause_from_bg_root_thread( + behalf_of_thread: Thread, + repl: PdbREPL, + hide_tb: bool, + task_status: TaskStatus[Task] = trio.TASK_STATUS_IGNORED, + **_pause_kwargs, +): + ''' + Acquire the `Lock._debug_lock` from a bg (only need for + root-actor) non-`trio` thread (started via a call to + `.to_thread.run_sync()` in some actor) by scheduling this func in + the actor's service (TODO eventually a special debug_mode) + nursery. This task acquires the lock then `.started()`s the + `DebugStatus.repl_release: trio.Event` waits for the `PdbREPL` to + set it, then terminates very much the same way as + `request_root_stdio_lock()` uses an IPC `Context` from a subactor + to do the same from a remote process. + + This task is normally only required to be scheduled for the + special cases of a bg sync thread running in the root actor; see + the only usage inside `.pause_from_sync()`. + + ''' + global Lock + # TODO: unify this copied code with where it was + # from in `maybe_wait_for_debugger()` + # if ( + # Lock.req_handler_finished is not None + # and not Lock.req_handler_finished.is_set() + # and (in_debug := Lock.ctx_in_debug) + # ): + # log.devx( + # '\nRoot is waiting on tty lock to release from\n\n' + # # f'{caller_frame_info}\n' + # ) + # with trio.CancelScope(shield=True): + # await Lock.req_handler_finished.wait() + + # log.pdb( + # f'Subactor released debug lock\n' + # f'|_{in_debug}\n' + # ) + task: Task = current_task() + + # Manually acquire since otherwise on release we'll + # get a RTE raised by `trio` due to ownership.. + log.devx( + 'Trying to acquire `Lock` on behalf of bg thread\n' + f'|_{behalf_of_thread}\n' + ) + # DebugStatus.repl_task = behalf_of_thread + out = await _pause( + debug_func=None, + repl=repl, + hide_tb=hide_tb, + called_from_sync=True, + called_from_bg_thread=True, + **_pause_kwargs + ) + lock: trio.FIFOLock = Lock._debug_lock + stats: trio.LockStatistics= lock.statistics() + assert stats.owner is task + assert Lock._owned_by_root + assert DebugStatus.repl_release + + # TODO: do we actually need this? + # originally i was trying to solve wy this was + # unblocking too soon in a thread but it was actually + # that we weren't setting our own `repl_release` below.. + while stats.owner is not task: + log.devx( + 'Trying to acquire `._debug_lock` from {stats.owner} for\n' + f'|_{behalf_of_thread}\n' + ) + await lock.acquire() + break + + # XXX NOTE XXX super important dawg.. + # set our own event since the current one might + # have already been overriden and then set when the + # last REPL mutex holder exits their sesh! + # => we do NOT want to override any existing one + # and we want to ensure we set our own ONLY AFTER we have + # acquired the `._debug_lock` + repl_release = DebugStatus.repl_release = trio.Event() + + # unblock caller thread delivering this bg task + log.devx( + 'Unblocking root-bg-thread since we acquired lock via `._pause()`\n' + f'|_{behalf_of_thread}\n' + ) + task_status.started(out) + DebugStatus.shield_sigint() + + # wait for bg thread to exit REPL sesh. + try: + await repl_release.wait() + finally: + log.devx( + 'releasing lock from bg root thread task!\n' + f'|_ {behalf_of_thread}\n' + ) + Lock.release() + + +def pause_from_sync( hide_tb: bool = True, + called_from_builtin: bool = False, + api_frame: FrameType|None = None, # proxy to `._pause()`, for ex: # shield: bool = False, @@ -2045,15 +2309,24 @@ def pause_from_sync( **_pause_kwargs, ) -> None: + ''' + Pause a `tractor` scheduled task or thread from sync (non-async + function) code. + When `greenback` is installed we remap python's builtin + `breakpoint()` hook to this runtime-aware version which takes + care of all bg-thread detection and appropriate synchronization + with the root actor's `Lock` to avoid mult-thread/process REPL + clobbering Bo + + ''' __tracebackhide__: bool = hide_tb try: actor: tractor.Actor = current_actor( err_on_no_runtime=False, ) - log.debug( - f'{actor.uid}: JUST ENTERED `tractor.pause_from_sync()`' - f'|_{actor}\n' + message: str = ( + f'{actor.uid} task called `tractor.pause_from_sync()`\n\n' ) if not actor: raise RuntimeError( @@ -2063,7 +2336,7 @@ def pause_from_sync( '- `async with tractor.open_root_actor()`\n' ) - # NOTE: once supported, remove this AND the one + # TODO: once supported, remove this AND the one # inside `._pause()`! if actor.is_infected_aio(): raise RuntimeError( @@ -2071,78 +2344,111 @@ def pause_from_sync( 'for infected `asyncio` mode!' ) - # raises on not-found by default - greenback: ModuleType = maybe_import_greenback() - mdb: PdbREPL = mk_pdb() + DebugStatus.shield_sigint() + repl: PdbREPL = mk_pdb() - # run async task which will lock out the root proc's TTY. + # message += f'-> created local REPL {repl}\n' + is_root: bool = is_root_process() + + # TODO: we could also check for a non-`.to_thread` context + # using `trio.from_thread.check_cancelled()` (says + # oremanj) wherein we get the following outputs: + # + # `RuntimeError`: non-`.to_thread` spawned thread + # noop: non-cancelled `.to_thread` + # `trio.Cancelled`: cancelled `.to_thread` + + # when called from a (bg) thread, run an async task in a new + # thread which will call `._pause()` manually with special + # handling for root-actor caller usage. if not DebugStatus.is_main_trio_thread(): - - # TODO: we could also check for a non-`.to_thread` context - # using `trio.from_thread.check_cancelled()` (says - # oremanj) wherein we get the following outputs: - # - # `RuntimeError`: non-`.to_thread` spawned thread - # noop: non-cancelled `.to_thread` - # `trio.Cancelled`: cancelled `.to_thread` - # - log.warning( - 'Engaging `.pause_from_sync()` from ANOTHER THREAD!' - ) - task: threading.Thread = threading.current_thread() - DebugStatus.repl_task: str = task + thread: threading.Thread = threading.current_thread() + repl_owner = thread # TODO: make root-actor bg thread usage work! - # if is_root_process(): - # async def _pause_from_sync_thread(): - # ... - # else: - # .. the below .. + if is_root: + message += ( + f'-> called from a root-actor bg {thread}\n' + f'-> scheduling `._pause_from_sync_thread()`..\n' + ) + bg_task, repl = trio.from_thread.run( + afn=partial( + actor._service_n.start, + partial( + _pause_from_bg_root_thread, + behalf_of_thread=thread, + repl=repl, + hide_tb=hide_tb, + **_pause_kwargs, + ), + ) + ) + message += ( + f'-> `._pause_from_sync_thread()` started bg task {bg_task}\n' + ) + else: + message += f'-> called from a bg {thread}\n' + # NOTE: since this is a subactor, `._pause()` will + # internally issue a debug request via + # `request_root_stdio_lock()` and we don't need to + # worry about all the special considerations as with + # the root-actor per above. + bg_task, repl = trio.from_thread.run( + afn=partial( + _pause, + debug_func=None, + repl=repl, + hide_tb=hide_tb, - trio.from_thread.run( - partial( - _pause, - debug_func=None, - repl=mdb, - hide_tb=hide_tb, + # XXX to prevent `._pause()` for setting + # `DebugStatus.repl_task` to the gb task! + called_from_sync=True, + called_from_bg_thread=True, - # XXX to prevent `._pause()` for setting - # `DebugStatus.repl_task` to the gb task! - called_from_sync=True, - called_from_bg_thread=True, - - **_pause_kwargs - ), - ) + **_pause_kwargs + ), + ) + assert bg_task is not DebugStatus.repl_task else: # we are presumably the `trio.run()` + main thread - task: trio.Task = current_task() - DebugStatus.repl_task: str = task - greenback.await_( + # raises on not-found by default + greenback: ModuleType = maybe_import_greenback() + message += f'-> imported {greenback}\n' + repl_owner: Task = current_task() + message += '-> calling `greenback.await_(_pause(debug_func=None))` from sync caller..\n' + out = greenback.await_( _pause( debug_func=None, - repl=mdb, + repl=repl, hide_tb=hide_tb, called_from_sync=True, **_pause_kwargs, ) ) + if out: + bg_task, repl = out + assert repl is repl + assert bg_task is repl_owner - if is_root_process(): - # Manually acquire since otherwise on release we'll - # get a RTE raised by `trio` due to ownership.. - Lock._debug_lock.acquire_nowait() - Lock._owned_by_root = True + # NOTE: normally set inside `_enter_repl_sync()` + DebugStatus.repl_task: str = repl_owner # TODO: ensure we aggressively make the user aware about - # entering the global ``breakpoint()`` built-in from sync + # entering the global `breakpoint()` built-in from sync # code? + message += ( + f'-> successfully scheduled `._pause()` in `trio` thread on behalf of {bg_task}\n' + f'-> Entering REPL via `tractor._set_trace()` from caller {repl_owner}\n' + ) + log.devx(message) + + DebugStatus.repl = repl _set_trace( - api_frame=inspect.currentframe(), - repl=mdb, + api_frame=api_frame or inspect.currentframe(), + repl=repl, hide_tb=hide_tb, actor=actor, - task=task, + task=repl_owner, ) # LEGACY NOTE on next LOC's frame showing weirdness.. # @@ -2155,6 +2461,26 @@ def pause_from_sync( raise err +def _sync_pause_from_builtin( + *args, + called_from_builtin=True, + **kwargs, +) -> None: + ''' + Proxy call `.pause_from_sync()` but indicate the caller is the + `breakpoint()` built-in. + + Note: this assigned to `os.environ['PYTHONBREAKPOINT']` inside `._root` + + ''' + pause_from_sync( + *args, + called_from_builtin=True, + api_frame=inspect.currentframe(), + **kwargs, + ) + + # NOTE prefer a new "pause" semantic since it better describes # "pausing the actor's runtime" for this particular # paralell task to do debugging in a REPL. @@ -2406,7 +2732,6 @@ async def maybe_wait_for_debugger( and not Lock.req_handler_finished.is_set() and in_debug is not None ): - # caller_frame_info: str = pformat_caller_frame() logmeth( msg @@ -2421,7 +2746,7 @@ async def maybe_wait_for_debugger( with trio.CancelScope(shield=True): await Lock.req_handler_finished.wait() - log.pdb( + log.devx( f'Subactor released debug lock\n' f'|_{in_debug}\n' ) @@ -2453,13 +2778,6 @@ async def maybe_wait_for_debugger( await trio.sleep(poll_delay) continue - # fallthrough on failure to acquire.. - # else: - # raise RuntimeError( - # msg - # + - # 'Root actor failed to acquire debug lock?' - # ) return True # else: -- 2.34.1 From 6a5d33b7ed8089efe70d732be4aab0cad72e29e2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 10 Jun 2024 17:46:10 -0400 Subject: [PATCH 152/190] Make big TODO: for `devx._debug` refinements Hopefully would make grok-ing this fairly sophisticated sub-sys possible for any up-and-coming `tractor` hacker XD A lot of internal API and re-org ideas I discovered/realized as part of finishing the `__pld_spec__` and multi-threaded support. Particularly better isolation between root-actor vs subactor task APIs and generally less globally-state-ful stuff like `DebugStatus` and `Lock` method APIs would likely make a lot of the hard to follow edge cases more clear? --- tractor/devx/_debug.py | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 3218cffa..5578e8a6 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -95,8 +95,38 @@ log = get_logger(__name__) # TODO: refine the internal impl and APIs in this module! # -# -[ ] separate `._pause()` branch-cases for calling from a root task -# vs. from subactors +# -[ ] rework `._pause()` and it's branch-cases for root vs. +# subactor: +# -[ ] `._pause_from_root()` + `_pause_from_subactor()`? +# -[ ] do the de-factor based on bg-thread usage in +# `.pause_from_sync()` & `_pause_from_bg_root_thread()`. +# -[ ] drop `debug_func == None` case which is confusing af.. +# -[ ] factor out `_enter_repl_sync()` into a util func for calling +# the `_set_trace()` / `_post_mortem()` APIs? +# +# -[ ] figure out if we need `acquire_debug_lock()` and/or re-implement +# it as part of the `.pause_from_sync()` rework per above? +# +# -[ ] pair the `._pause_from_subactor()` impl with a "debug nursery" +# that's dynamically allocated inside the `._rpc` task thus +# avoiding the `._service_n.start()` usage for the IPC request? +# -[ ] see the TODO inside `._rpc._errors_relayed_via_ipc()` +# +# -[ ] impl a `open_debug_request()` which encaps all +# `request_root_stdio_lock()` task scheduling deats +# + `DebugStatus` state mgmt; which should prolly be re-branded as +# a `DebugRequest` type anyway AND with suppoort for bg-thread +# (from root actor) usage? +# +# -[ ] handle the `xonsh` case for bg-root-threads in the SIGINT +# handler! +# -[ ] do we need to do the same for subactors? +# -[ ] make the failing tests finally pass XD +# +# -[ ] simplify `maybe_wait_for_debugger()` to be a root-task only +# API? +# -[ ] currently it's implemented as that so might as well make it +# formal? def hide_runtime_frames() -> dict[FunctionType, CodeType]: -- 2.34.1 From f83e06d371caeb0a27ec1bb14d891d1f33583d00 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 10 Jun 2024 17:57:43 -0400 Subject: [PATCH 153/190] Use new `._debug._repl_fail_msg` inside `test_pause_from_sync` --- tests/test_debugger.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_debugger.py b/tests/test_debugger.py index c95c4f95..e4f28548 100644 --- a/tests/test_debugger.py +++ b/tests/test_debugger.py @@ -12,7 +12,6 @@ TODO: """ from functools import partial import itertools -from typing import Optional import platform import pathlib import time @@ -30,6 +29,7 @@ from tractor._testing import ( from tractor.devx._debug import ( _pause_msg, _crash_msg, + _repl_fail_msg, ) from .conftest import ( _ci_env, @@ -291,7 +291,7 @@ def do_ctlc( child, count: int = 3, delay: float = 0.1, - patt: Optional[str] = None, + patt: str|None = None, # expect repl UX to reprint the prompt after every # ctrl-c send. @@ -1304,7 +1304,7 @@ def test_shield_pause( [ _crash_msg, "('cancelled_before_pause'", # actor name - "Failed to engage debugger via `_pause()`", + _repl_fail_msg, "trio.Cancelled", "raise Cancelled._create()", @@ -1322,7 +1322,7 @@ def test_shield_pause( [ _crash_msg, "('root'", # actor name - "Failed to engage debugger via `_pause()`", + _repl_fail_msg, "trio.Cancelled", "raise Cancelled._create()", -- 2.34.1 From ad28f0c9b3a9336b594da5f9c591cef74d8d7ff6 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 10 Jun 2024 19:16:21 -0400 Subject: [PATCH 154/190] Use `_debug._sync_pause_from_builtin()` as `breakpoint()` override --- tractor/_root.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tractor/_root.py b/tractor/_root.py index 4ddfde51..bbd9d93d 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -120,7 +120,7 @@ async def open_root_actor( ) ): os.environ['PYTHONBREAKPOINT'] = ( - 'tractor.devx._debug.pause_from_sync' + 'tractor.devx._debug._sync_pause_from_builtin' ) _state._runtime_vars['use_greenback'] = True -- 2.34.1 From c10c34d717b52c5a169254d9b79f1d18e4bfe696 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 14 Jun 2024 15:27:35 -0400 Subject: [PATCH 155/190] Add `@context(pld_spec=)` TODO list Longer run we don't want `tractor` app devs having to call `msg._ops.limit_plds()` from every child endpoint.. so this starts a list of decorator API ideas and obviously ties in with an ideal final API design that will come with py3.13 and typed funcs. Obviously this is directly fueled by, - https://github.com/goodboy/tractor/issues/365 Other, - type with direct `trio.lowlevel.Task` import. - use `log.exception()` to show tbs for all error-terminations in `.open_context()` (for now) and always explicitly mention the `.side`. --- tractor/_context.py | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index dd4ad846..20ca29f6 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -58,6 +58,7 @@ from typing import ( import warnings # ------ - ------ import trio +from trio.lowlevel import Task # ------ - ------ from ._exceptions import ( ContextCancelled, @@ -121,7 +122,7 @@ class Unresolved: @dataclass class Context: ''' - An inter-actor, SC transitive, `trio.Task` communication context. + An inter-actor, SC transitive, `Task` communication context. NB: This class should **never be instatiated directly**, it is allocated by the runtime in 2 ways: @@ -134,7 +135,7 @@ class Context: Allows maintaining task or protocol specific state between 2 cancel-scope-linked, communicating and parallel executing - `trio.Task`s. Contexts are allocated on each side of any task + `Task`s. Contexts are allocated on each side of any task RPC-linked msg dialog, i.e. for every request to a remote actor from a `Portal`. On the "callee" side a context is always allocated inside `._rpc._invoke()`. @@ -214,7 +215,7 @@ class Context: # which is exactly the primitive that allows for # cross-actor-task-supervision and thus SC. _scope: trio.CancelScope|None = None - _task: trio.lowlevel.Task|None = None + _task: Task|None = None # TODO: cs around result waiting so we can cancel any # permanently blocking `._rx_chan.receive()` call in @@ -258,14 +259,14 @@ class Context: # a call to `.cancel()` which triggers `ContextCancelled`. _cancel_msg: str|dict|None = None - # NOTE: this state var used by the runtime to determine if the + # NOTE: this state-var is used by the runtime to determine if the # `pdbp` REPL is allowed to engage on contexts terminated via # a `ContextCancelled` due to a call to `.cancel()` triggering # "graceful closure" on either side: # - `._runtime._invoke()` will check this flag before engaging # the crash handler REPL in such cases where the "callee" # raises the cancellation, - # - `.devx._debug.lock_tty_for_child()` will set it to `False` if + # - `.devx._debug.lock_stdio_for_peer()` will set it to `False` if # the global tty-lock has been configured to filter out some # actors from being able to acquire the debugger lock. _enter_debugger_on_cancel: bool = True @@ -861,7 +862,7 @@ class Context: ) -> None: ''' Cancel this inter-actor IPC context by requestng the - remote side's cancel-scope-linked `trio.Task` by calling + remote side's cancel-scope-linked `Task` by calling `._scope.cancel()` and delivering an `ContextCancelled` ack msg in reponse. @@ -1030,7 +1031,7 @@ class Context: # XXX NOTE XXX: `ContextCancelled`/`StreamOverrun` absorption # for "graceful cancellation" case: # - # Whenever a "side" of a context (a `trio.Task` running in + # Whenever a "side" of a context (a `Task` running in # an actor) **is** the side which requested ctx # cancellation (likekly via ``Context.cancel()``), we # **don't** want to re-raise any eventually received @@ -1089,7 +1090,8 @@ class Context: else: log.warning( 'Local error already set for ctx?\n' - f'{self._local_error}\n' + f'{self._local_error}\n\n' + f'{self}' ) return remote_error @@ -2117,8 +2119,9 @@ async def open_context_from_portal( # the `ContextCancelled` "self cancellation absorbed" case # handled in the block above ^^^ !! # await _debug.pause() - log.cancel( - 'Context terminated due to\n\n' + # log.cancel( + log.exception( + f'{ctx.side}-side of `Context` terminated with ' f'.outcome => {ctx.repr_outcome()}\n' ) @@ -2319,7 +2322,7 @@ async def open_context_from_portal( # type_only=True, ) log.cancel( - f'Context terminated due to local scope error:\n\n' + f'Context terminated due to local {ctx.side!r}-side error:\n\n' f'{ctx.chan.uid} => {outcome_str}\n' ) @@ -2385,15 +2388,25 @@ def mk_context( # TODO: use the new type-parameters to annotate this in 3.13? # -[ ] https://peps.python.org/pep-0718/#unknown-types +# -[ ] allow for `pld_spec` input(s) ideally breaking down, +# |_ `start: ParameterSpec`, +# |_ `started: TypeAlias`, +# |_ `yields: TypeAlias`, +# |_ `return: TypeAlias`, +# |_ `invalid_policy: str|Callable` ? +# -[ ] prolly implement the `@acm` wrapper using +# a `contextlib.ContextDecorator`? +# def context( func: Callable, ) -> Callable: ''' - Mark an (async) function as an SC-supervised, inter-`Actor`, - child-`trio.Task`, IPC endpoint otherwise known more - colloquially as a (RPC) "context". + Mark an async function as an SC-supervised, inter-`Actor`, RPC + scheduled child-side `Task`, IPC endpoint otherwise + known more colloquially as a (RPC) "context". - Functions annotated the fundamental IPC endpoint type offered by `tractor`. + Functions annotated the fundamental IPC endpoint type offered by + `tractor`. ''' # TODO: apply whatever solution ``mypy`` ends up picking for this: -- 2.34.1 From 0ffb4f0db1556854fc347b1ebec1d6d51b731b53 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 14 Jun 2024 15:49:30 -0400 Subject: [PATCH 156/190] Log tbs from non-RAE `._invoke()`-RPC-task errors `RemoteActorError`s show this by default in their `.__repr__()`, and we obvi capture and embed the src traceback in an `Error` msg prior to transit, but for logging it's also handy to see the tb of any set `Context._remote_error` on console especially when trying to decipher remote error details at their origin actor. Also improve the log message description using `ctx.repr_state` and show any `ctx.outcome`. --- tractor/_rpc.py | 47 ++++++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 8a9b3487..a8cb010a 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -26,6 +26,7 @@ from contextlib import ( from functools import partial import inspect from pprint import pformat +import traceback from typing import ( Any, Callable, @@ -47,6 +48,7 @@ from ._context import ( ) from ._exceptions import ( ContextCancelled, + RemoteActorError, ModuleNotExposed, MsgTypeError, TransportClosed, @@ -197,7 +199,8 @@ async def _invoke_non_context( raise ipc_err else: log.exception( - f'Failed to respond to runtime RPC request for\n\n' + f'Failed to ack runtime RPC request\n\n' + f'{func} x=> {ctx.chan}\n\n' f'{ack}\n' ) @@ -414,7 +417,6 @@ async def _errors_relayed_via_ipc( async def _invoke( - actor: Actor, cid: str, chan: Channel, @@ -690,10 +692,6 @@ async def _invoke( boxed_type=trio.Cancelled, canceller=canceller, ) - # does this matter other then for - # consistentcy/testing? |_ no user code should be - # in this scope at this point.. - # ctx._local_error = ctxc raise ctxc # XXX: do we ever trigger this block any more? @@ -714,6 +712,11 @@ async def _invoke( # always set this (child) side's exception as the # local error on the context ctx._local_error: BaseException = scope_error + # ^-TODO-^ question, + # does this matter other then for + # consistentcy/testing? + # |_ no user code should be in this scope at this point + # AND we already set this in the block below? # if a remote error was set then likely the # exception group was raised due to that, so @@ -740,22 +743,32 @@ async def _invoke( logmeth: Callable = log.runtime merr: Exception|None = ctx.maybe_error - descr_str: str = 'with final result `{repr(ctx.outcome)}`' - message: str = ( - f'IPC context terminated {descr_str}\n\n' + message: str = 'IPC context terminated ' + descr_str: str = ( + f'after having {ctx.repr_state!r}\n' ) if merr: - descr_str: str = ( - f'with ctx having {ctx.repr_state!r}\n' - f'{ctx.repr_outcome()}\n' - ) + + logmeth: Callable = log.error if isinstance(merr, ContextCancelled): logmeth: Callable = log.runtime - else: - logmeth: Callable = log.error - message += f'\n{merr!r}\n' - logmeth(message) + if not isinstance(merr, RemoteActorError): + tb_str: str = ''.join(traceback.format_exception(merr)) + descr_str += ( + f'\n{merr!r}\n' # needed? + f'{tb_str}\n' + ) + else: + descr_str += f'\n{merr!r}\n' + else: + descr_str += f'\nand final result {ctx.outcome!r}\n' + + logmeth( + message + + + descr_str + ) async def try_ship_error_to_remote( -- 2.34.1 From b8d37060ec33551bd14cf00a166c397c036e318e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 16 Jun 2024 22:50:43 -0400 Subject: [PATCH 157/190] Offer a `@context(pld_spec=)` API Instead of the WIP/prototyped `Portal.open_context()` offering a `pld_spec` input arg, this changes to a proper decorator API for specifying the "payload spec" on `@context` endpoints. The impl change details actually cover 2-birds: - monkey patch decorated functions with a new `._tractor_context_meta: dict[str, Any]` and insert any provided input `@context` kwargs: `_pld_spec`, `enc_hook`, `enc_hook`. - use `inspect.get_annotations()` to scan for a `func` arg type-annotated with `tractor.Context` and use the name of that arg as the RPC task-side injected `Context`, thus injecting the needed arg by type instead of by name (a longstanding TODO); raise a type-error when not found. - pull the `pld_spec` from the `._tractor_context_meta` attr both in the `.open_context()` parent-side and child-side `._invoke()`-cation of the RPC task and use the `msg._ops.maybe_limit_plds()` API to apply it internally in the runtime for each case. --- tractor/_context.py | 89 +++++++++++++++++++++++++++++++++++---------- tractor/_rpc.py | 25 +++++++++++-- 2 files changed, 92 insertions(+), 22 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 20ca29f6..c61af15a 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -1792,7 +1792,6 @@ async def open_context_from_portal( portal: Portal, func: Callable, - pld_spec: TypeAlias|None = None, allow_overruns: bool = False, hide_tb: bool = True, @@ -1838,12 +1837,20 @@ async def open_context_from_portal( # NOTE: 2 bc of the wrapping `@acm` __runtimeframe__: int = 2 # noqa - # conduct target func method structural checks - if not inspect.iscoroutinefunction(func) and ( - getattr(func, '_tractor_contex_function', False) + # if NOT an async func but decorated with `@context`, error. + if ( + not inspect.iscoroutinefunction(func) + and getattr(func, '_tractor_context_meta', False) ): raise TypeError( - f'{func} must be an async generator function!') + f'{func!r} must be an async function!' + ) + + ctx_meta: dict[str, Any]|None = getattr( + func, + '_tractor_context_meta', + None, + ) # TODO: i think from here onward should probably # just be factored into an `@acm` inside a new @@ -1890,12 +1897,9 @@ async def open_context_from_portal( trio.open_nursery() as tn, msgops.maybe_limit_plds( ctx=ctx, - spec=pld_spec, - ) as maybe_msgdec, + spec=ctx_meta.get('pld_spec'), + ), ): - if maybe_msgdec: - assert maybe_msgdec.pld_spec == pld_spec - # NOTE: this in an implicit runtime nursery used to, # - start overrun queuing tasks when as well as # for cancellation of the scope opened by the user. @@ -2398,7 +2402,15 @@ def mk_context( # a `contextlib.ContextDecorator`? # def context( - func: Callable, + func: Callable|None = None, + + *, + + # must be named! + pld_spec: Union[Type]|TypeAlias = Any, + dec_hook: Callable|None = None, + enc_hook: Callable|None = None, + ) -> Callable: ''' Mark an async function as an SC-supervised, inter-`Actor`, RPC @@ -2409,15 +2421,54 @@ def context( `tractor`. ''' + # XXX for the `@context(pld_spec=MyMsg|None)` case + if func is None: + return partial( + context, + pld_spec=pld_spec, + dec_hook=dec_hook, + enc_hook=enc_hook, + ) + + # TODO: from this, enforcing a `Start.sig` type + # check when invoking RPC tasks by ensuring the input + # args validate against the endpoint def. + sig: inspect.Signature = inspect.signature(func) + # params: inspect.Parameters = sig.parameters + + # https://docs.python.org/3/library/inspect.html#inspect.get_annotations + annots: dict[str, Type] = inspect.get_annotations( + func, + eval_str=True, + ) + name: str + param: Type + for name, param in annots.items(): + if param is Context: + ctx_var_name: str = name + break + else: + raise TypeError( + 'At least one (normally the first) argument to the `@context` function ' + f'{func.__name__!r} must be typed as `tractor.Context`, for ex,\n\n' + f'`ctx: tractor.Context`\n' + ) + # TODO: apply whatever solution ``mypy`` ends up picking for this: # https://github.com/python/mypy/issues/2087#issuecomment-769266912 - func._tractor_context_function = True # type: ignore + # func._tractor_context_function = True # type: ignore + func._tractor_context_meta: dict[str, Any] = { + 'ctx_var_name': ctx_var_name, + # `msgspec` related settings + 'pld_spec': pld_spec, + 'enc_hook': enc_hook, + 'dec_hook': dec_hook, - sig: inspect.Signature = inspect.signature(func) - params: Mapping = sig.parameters - if 'ctx' not in params: - raise TypeError( - "The first argument to the context function " - f"{func.__name__} must be `ctx: tractor.Context`" - ) + # TODO: eventually we need to "signature-check" with these + # vs. the `Start` msg fields! + # => this would allow for TPC endpoint argument-type-spec + # limiting and we could then error on + # invalid inputs passed to `.open_context(rpc_ep, arg0='blah')` + 'sig': sig, + } return func diff --git a/tractor/_rpc.py b/tractor/_rpc.py index a8cb010a..37fea079 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -68,6 +68,7 @@ from .msg import ( PayloadT, NamespacePath, pretty_struct, + _ops as msgops, ) from tractor.msg.types import ( CancelAck, @@ -499,8 +500,19 @@ async def _invoke( # handle decorated ``@tractor.context`` async function - elif getattr(func, '_tractor_context_function', False): - kwargs['ctx'] = ctx + # - pull out any typed-pld-spec info and apply (below) + # - (TODO) store func-ref meta data for API-frame-info logging + elif ( + ctx_meta := getattr( + func, + '_tractor_context_meta', + False, + ) + ): + # kwargs['ctx'] = ctx + # set the required `tractor.Context` typed input argument to + # the allocated RPC task context. + kwargs[ctx_meta['ctx_var_name']] = ctx context_ep_func = True # errors raised inside this block are propgated back to caller @@ -594,7 +606,14 @@ async def _invoke( # `@context` marked RPC function. # - `._portal` is never set. try: - async with trio.open_nursery() as tn: + async with ( + trio.open_nursery() as tn, + msgops.maybe_limit_plds( + ctx=ctx, + spec=ctx_meta.get('pld_spec'), + dec_hook=ctx_meta.get('dec_hook'), + ), + ): ctx._scope_nursery = tn ctx._scope = tn.cancel_scope task_status.started(ctx) -- 2.34.1 From 097101f8d3bae787534aae4f645f94653fc9edf8 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 17 Jun 2024 09:01:13 -0400 Subject: [PATCH 158/190] Port debug request ep to use `@context(pld_spec)` Namely passing the `.__pld_spec__` directly to the `lock_stdio_for_peer()` decorator B) Also, allows dropping `apply_debug_pldec()` (which was a todo) and removing a `lock_stdio_for_peer()` indent level. --- tractor/devx/_debug.py | 150 +++++++++++++++-------------------------- 1 file changed, 54 insertions(+), 96 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 5578e8a6..ccf57d62 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -68,6 +68,7 @@ from trio import ( ) import tractor from tractor.log import get_logger +from tractor._context import Context from tractor._state import ( current_actor, is_root_process, @@ -83,7 +84,6 @@ if TYPE_CHECKING: from trio.lowlevel import Task from threading import Thread from tractor._ipc import Channel - from tractor._context import Context from tractor._runtime import ( Actor, ) @@ -529,7 +529,10 @@ class Lock: ) -@tractor.context +@tractor.context( + # enable the locking msgspec + pld_spec=__pld_spec__, +) async def lock_stdio_for_peer( ctx: Context, subactor_task_uid: tuple[str, int], @@ -597,61 +600,55 @@ async def lock_stdio_for_peer( # scope despite the shielding we apply below. debug_lock_cs: CancelScope = ctx._scope - # TODO: use `.msg._ops.maybe_limit_plds()` here instead so we - # can merge into a single async with, with the - # `Lock.acquire_for_ctx()` enter below? - # - # enable the locking msgspec - with apply_debug_pldec(): - async with Lock.acquire_for_ctx(ctx=ctx): - debug_lock_cs.shield = True + async with Lock.acquire_for_ctx(ctx=ctx): + debug_lock_cs.shield = True - log.devx( - 'Subactor acquired debugger request lock!\n' - f'root task: {root_task_name}\n' - f'subactor_uid: {subactor_uid}\n' - f'remote task: {subactor_task_uid}\n\n' + log.devx( + 'Subactor acquired debugger request lock!\n' + f'root task: {root_task_name}\n' + f'subactor_uid: {subactor_uid}\n' + f'remote task: {subactor_task_uid}\n\n' - 'Sending `ctx.started(LockStatus)`..\n' + 'Sending `ctx.started(LockStatus)`..\n' - ) - - # indicate to child that we've locked stdio - await ctx.started( - LockStatus( - subactor_uid=subactor_uid, - cid=ctx.cid, - locked=True, - ) - ) - - log.devx( - f'Actor {subactor_uid} acquired `Lock` via debugger request' - ) - - # wait for unlock pdb by child - async with ctx.open_stream() as stream: - release_msg: LockRelease = await stream.receive() - - # TODO: security around only releasing if - # these match? - log.devx( - f'TTY lock released requested\n\n' - f'{release_msg}\n' - ) - assert release_msg.cid == ctx.cid - assert release_msg.subactor_uid == tuple(subactor_uid) - - log.devx( - f'Actor {subactor_uid} released TTY lock' - ) - - return LockStatus( - subactor_uid=subactor_uid, - cid=ctx.cid, - locked=False, ) + # indicate to child that we've locked stdio + await ctx.started( + LockStatus( + subactor_uid=subactor_uid, + cid=ctx.cid, + locked=True, + ) + ) + + log.devx( + f'Actor {subactor_uid} acquired `Lock` via debugger request' + ) + + # wait for unlock pdb by child + async with ctx.open_stream() as stream: + release_msg: LockRelease = await stream.receive() + + # TODO: security around only releasing if + # these match? + log.devx( + f'TTY lock released requested\n\n' + f'{release_msg}\n' + ) + assert release_msg.cid == ctx.cid + assert release_msg.subactor_uid == tuple(subactor_uid) + + log.devx( + f'Actor {subactor_uid} released TTY lock' + ) + + return LockStatus( + subactor_uid=subactor_uid, + cid=ctx.cid, + locked=False, + ) + except BaseException as req_err: message: str = ( f'On behalf of remote peer {subactor_task_uid!r}@{ctx.chan.uid!r}\n\n' @@ -1037,48 +1034,6 @@ class PdbREPL(pdbp.Pdb): return None -# TODO: prolly remove this and instead finally get our @context API -# supporting a msg/pld-spec via type annots as per, -# https://github.com/goodboy/tractor/issues/365 -@cm -def apply_debug_pldec() -> _codec.MsgCodec: - ''' - Apply the subactor TTY `Lock`-ing protocol's msgspec temporarily - (only in the current task). - - ''' - from tractor.msg import ( - _ops as msgops, - ) - cctx: Context = current_ipc_ctx() - rx: msgops.PldRx = cctx.pld_rx - orig_pldec: msgops.MsgDec = rx.pld_dec - - try: - with msgops.limit_plds( - spec=__pld_spec__, - ) as debug_dec: - assert ( - debug_dec - is - rx.pld_dec - ) - log.runtime( - 'Applied `.devx._debug` pld-spec\n\n' - f'{debug_dec}\n' - ) - yield debug_dec - - finally: - assert ( - rx.pld_dec is orig_pldec - ) - log.runtime( - 'Reverted to previous pld-spec\n\n' - f'{orig_pldec}\n' - ) - - async def request_root_stdio_lock( actor_uid: tuple[str, str], task_uid: tuple[str, int], @@ -1147,6 +1102,7 @@ async def request_root_stdio_lock( async with portal.open_context( lock_stdio_for_peer, subactor_task_uid=task_uid, + # NOTE: set it here in the locker request task bc it's # possible for multiple such requests for the lock in any # single sub-actor AND there will be a race between when the @@ -1159,7 +1115,7 @@ async def request_root_stdio_lock( # this IPC-ctx request task, NOT any other task(s) # including the one that actually enters the REPL. This # is oc desired bc ow the debugged task will msg-type-error. - pld_spec=__pld_spec__, + # pld_spec=__pld_spec__, ) as (req_ctx, status): @@ -2856,7 +2812,9 @@ def open_crash_handler( @cm -def maybe_open_crash_handler(pdb: bool = False): +def maybe_open_crash_handler( + pdb: bool = False, +): ''' Same as `open_crash_handler()` but with bool input flag to allow conditional handling. -- 2.34.1 From 535fd06f73c5abc6d6ca3e3848ff99645868133e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 17 Jun 2024 09:23:31 -0400 Subject: [PATCH 159/190] Proxy through `dec_hook` in `.limit_plds()` APIs --- tractor/msg/_ops.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 86f80395..80633e7e 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -27,6 +27,7 @@ from contextlib import ( ) from typing import ( Any, + Callable, Type, TYPE_CHECKING, Union, @@ -138,6 +139,7 @@ class PldRx(Struct): def limit_plds( self, spec: Union[Type[Struct]], + **dec_kwargs, ) -> MsgDec: ''' @@ -147,7 +149,10 @@ class PldRx(Struct): ''' orig_dec: MsgDec = self._pld_dec - limit_dec: MsgDec = mk_dec(spec=spec) + limit_dec: MsgDec = mk_dec( + spec=spec, + **dec_kwargs, + ) try: self._pld_dec = limit_dec yield limit_dec @@ -449,7 +454,7 @@ class PldRx(Struct): @cm def limit_plds( spec: Union[Type[Struct]], - **kwargs, + **dec_kwargs, ) -> MsgDec: ''' @@ -467,7 +472,7 @@ def limit_plds( with rx.limit_plds( spec=spec, - **kwargs, + **dec_kwargs, ) as pldec: log.runtime( 'Applying payload-decoder\n\n' @@ -487,7 +492,9 @@ def limit_plds( async def maybe_limit_plds( ctx: Context, spec: Union[Type[Struct]]|None = None, + dec_hook: Callable|None = None, **kwargs, + ) -> MsgDec|None: ''' Async compat maybe-payload type limiter. @@ -497,7 +504,11 @@ async def maybe_limit_plds( used. ''' - if spec is None: + if ( + spec is None + and + dec_hook is None + ): yield None return @@ -505,7 +516,11 @@ async def maybe_limit_plds( curr_ctx: Context = current_ipc_ctx() assert ctx is curr_ctx - with ctx._pld_rx.limit_plds(spec=spec) as msgdec: + with ctx._pld_rx.limit_plds( + spec=spec, + dec_hook=dec_hook, + **kwargs, + ) as msgdec: yield msgdec curr_ctx: Context = current_ipc_ctx() -- 2.34.1 From 89c2137fc9444a45651d398fa87edd5e095dbf01 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 17 Jun 2024 09:24:03 -0400 Subject: [PATCH 160/190] Update pld-rx limiting test(s) to use deco input The tests only use one input spec (conveniently) so there's not much to change in the logic, - only pass the `maybe_msg_spec` to the child-side decorator and obvi drop the surrounding `msgops.limit_plds()` block in the child. - tweak a few `MsgDec` asserts, mostly dropping the `msg._ops._def_any_spec` state checks since the child-side won't have any pre pld-spec state given the runtime now applies the `pld_spec` before running the task's func body. - also allowed dropping the `finally:` which did a similar check outside the `.limit_plds()` block. --- tests/test_pldrx_limiting.py | 203 +++++++++++++++++------------------ 1 file changed, 99 insertions(+), 104 deletions(-) diff --git a/tests/test_pldrx_limiting.py b/tests/test_pldrx_limiting.py index ddf2a234..e5ce691a 100644 --- a/tests/test_pldrx_limiting.py +++ b/tests/test_pldrx_limiting.py @@ -7,9 +7,6 @@ related settings around IPC contexts. from contextlib import ( asynccontextmanager as acm, ) -from contextvars import ( - Context, -) from msgspec import ( Struct, @@ -19,6 +16,7 @@ import trio import tractor from tractor import ( + Context, MsgTypeError, current_ipc_ctx, Portal, @@ -35,7 +33,17 @@ from tractor.msg.types import ( ) -class PldMsg(Struct): +class PldMsg( + Struct, + + # TODO: with multiple structs in-spec we need to tag them! + # -[ ] offer a built-in `PldMsg` type to inherit from which takes + # case of these details? + # + # https://jcristharif.com/msgspec/structs.html#tagged-unions + # tag=True, + # tag_field='msg_type', +): field: str @@ -96,12 +104,14 @@ async def maybe_expect_raises( ) -@tractor.context +@tractor.context( + pld_spec=maybe_msg_spec, +) async def child( ctx: Context, started_value: int|PldMsg|None, return_value: str|None, - validate_pld_spec: bool, + validate_pld_spec: bool, raise_on_started_mte: bool = True, ) -> None: @@ -116,113 +126,99 @@ async def child( assert ctx is curr_ctx rx: msgops.PldRx = ctx._pld_rx - orig_pldec: _codec.MsgDec = rx.pld_dec - # senity that default pld-spec should be set - assert ( - rx.pld_dec - is - msgops._def_any_pldec + curr_pldec: _codec.MsgDec = rx.pld_dec + + ctx_meta: dict = getattr( + child, + '_tractor_context_meta', + None, ) + if ctx_meta: + assert ( + ctx_meta['pld_spec'] + is curr_pldec.spec + is curr_pldec.pld_spec + ) + # 2 cases: hdndle send-side and recv-only validation + # - when `raise_on_started_mte == True`, send validate + # - else, parent-recv-side only validation + mte: MsgTypeError|None = None try: - with msgops.limit_plds( - spec=maybe_msg_spec, - ) as pldec: - # sanity on `MsgDec` state - assert rx.pld_dec is pldec - assert pldec.spec is maybe_msg_spec + await ctx.started( + value=started_value, + validate_pld_spec=validate_pld_spec, + ) - # 2 cases: hdndle send-side and recv-only validation - # - when `raise_on_started_mte == True`, send validate - # - else, parent-recv-side only validation - mte: MsgTypeError|None = None - try: - await ctx.started( - value=started_value, - validate_pld_spec=validate_pld_spec, - ) - - except MsgTypeError as _mte: - mte = _mte - log.exception('started()` raised an MTE!\n') - if not expect_started_mte: - raise RuntimeError( - 'Child-ctx-task SHOULD NOT HAVE raised an MTE for\n\n' - f'{started_value!r}\n' - ) - - boxed_div: str = '------ - ------' - assert boxed_div not in mte._message - assert boxed_div not in mte.tb_str - assert boxed_div not in repr(mte) - assert boxed_div not in str(mte) - mte_repr: str = repr(mte) - for line in mte.message.splitlines(): - assert line in mte_repr - - # since this is a *local error* there should be no - # boxed traceback content! - assert not mte.tb_str - - # propagate to parent? - if raise_on_started_mte: - raise - - # no-send-side-error fallthrough - if ( - validate_pld_spec - and - expect_started_mte - ): - raise RuntimeError( - 'Child-ctx-task SHOULD HAVE raised an MTE for\n\n' - f'{started_value!r}\n' - ) - - assert ( - not expect_started_mte - or - not validate_pld_spec + except MsgTypeError as _mte: + mte = _mte + log.exception('started()` raised an MTE!\n') + if not expect_started_mte: + raise RuntimeError( + 'Child-ctx-task SHOULD NOT HAVE raised an MTE for\n\n' + f'{started_value!r}\n' ) - # if wait_for_parent_to_cancel: - # ... - # - # ^-TODO-^ logic for diff validation policies on each side: - # - # -[ ] ensure that if we don't validate on the send - # side, that we are eventually error-cancelled by our - # parent due to the bad `Started` payload! - # -[ ] the boxed error should be srced from the parent's - # runtime NOT ours! - # -[ ] we should still error on bad `return_value`s - # despite the parent not yet error-cancelling us? - # |_ how do we want the parent side to look in that - # case? - # -[ ] maybe the equiv of "during handling of the - # above error another occurred" for the case where - # the parent sends a MTE to this child and while - # waiting for the child to terminate it gets back - # the MTE for this case? - # + boxed_div: str = '------ - ------' + assert boxed_div not in mte._message + assert boxed_div not in mte.tb_str + assert boxed_div not in repr(mte) + assert boxed_div not in str(mte) + mte_repr: str = repr(mte) + for line in mte.message.splitlines(): + assert line in mte_repr - # XXX should always fail on recv side since we can't - # really do much else beside terminate and relay the - # msg-type-error from this RPC task ;) - return return_value + # since this is a *local error* there should be no + # boxed traceback content! + assert not mte.tb_str - finally: - # sanity on `limit_plds()` reversion - assert ( - rx.pld_dec - is - msgops._def_any_pldec - ) - log.runtime( - 'Reverted to previous pld-spec\n\n' - f'{orig_pldec}\n' + # propagate to parent? + if raise_on_started_mte: + raise + + # no-send-side-error fallthrough + if ( + validate_pld_spec + and + expect_started_mte + ): + raise RuntimeError( + 'Child-ctx-task SHOULD HAVE raised an MTE for\n\n' + f'{started_value!r}\n' ) + assert ( + not expect_started_mte + or + not validate_pld_spec + ) + + # if wait_for_parent_to_cancel: + # ... + # + # ^-TODO-^ logic for diff validation policies on each side: + # + # -[ ] ensure that if we don't validate on the send + # side, that we are eventually error-cancelled by our + # parent due to the bad `Started` payload! + # -[ ] the boxed error should be srced from the parent's + # runtime NOT ours! + # -[ ] we should still error on bad `return_value`s + # despite the parent not yet error-cancelling us? + # |_ how do we want the parent side to look in that + # case? + # -[ ] maybe the equiv of "during handling of the + # above error another occurred" for the case where + # the parent sends a MTE to this child and while + # waiting for the child to terminate it gets back + # the MTE for this case? + # + + # XXX should always fail on recv side since we can't + # really do much else beside terminate and relay the + # msg-type-error from this RPC task ;) + return return_value + @pytest.mark.parametrize( 'return_value', @@ -321,7 +317,6 @@ def test_basic_payload_spec( child, return_value=return_value, started_value=started_value, - pld_spec=maybe_msg_spec, validate_pld_spec=pld_check_started_value, ) as (ctx, first), ): -- 2.34.1 From 00dbf55fd396729f00a6170afe51c4b3ba490596 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 17 Jun 2024 10:32:38 -0400 Subject: [PATCH 161/190] Add note about using `@acm` as decorator in 3.10 --- tractor/_context.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tractor/_context.py b/tractor/_context.py index c61af15a..686a648b 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -2399,7 +2399,11 @@ def mk_context( # |_ `return: TypeAlias`, # |_ `invalid_policy: str|Callable` ? # -[ ] prolly implement the `@acm` wrapper using -# a `contextlib.ContextDecorator`? +# a `contextlib.ContextDecorator`, i guess not if +# we don't need an `__aexit__` block right? +# |_ de hecho, @acm can already be used as a decorator as of 3.10 +# but i dunno how that's gonna play with `trio.Nursery.start[_soon]()` +# |_ https://docs.python.org/3/library/contextlib.html#using-a-context-manager-as-a-function-decorator # def context( func: Callable|None = None, -- 2.34.1 From 3613c37a6fc5272339247b458fa844853409a0fb Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 17 Jun 2024 10:32:50 -0400 Subject: [PATCH 162/190] Don't pass `ipc_msg` for send side MTEs Just pass `_bad_msg` such that it get's injected to `.msgdata` since with a send-side `MsgTypeError` we don't have a remote `._ipc_msg: Error` per say to include. --- tractor/_exceptions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 92c3fafb..8ed46ebc 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -441,7 +441,8 @@ class RemoteActorError(Exception): for key in fields: if ( - key == 'relay_uid' and not self.is_inception() + key == 'relay_uid' + and not self.is_inception() ): continue @@ -1291,8 +1292,7 @@ def _mk_msg_type_err( msgtyperr = MsgTypeError( message=message, - ipc_msg=msg, - bad_msg=msg, + _bad_msg=msg, ) # ya, might be `None` msgtyperr.__cause__ = src_type_error -- 2.34.1 From 3246b3a3bcd6a4388d598ac82ce9d5c66565451a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 17 Jun 2024 13:12:16 -0400 Subject: [PATCH 163/190] Break `_mk_msg_type_err()` into recv/send side funcs Name them `_mk_send_mte()`/`_mk_recv_mte()` and change the runtime to call each appropriately depending on location/usage. Also add some dynamic call-frame "unhide" blocks such that when we expect raised MTE from the aboves calls but we get a different unexpected error from the runtime, we ensure the call stack downward is shown in tbs/pdb. |_ ideally in the longer run we come up with a fancier dynamic sys for this, prolly something in `.devx._frame_stack`? --- tractor/_context.py | 30 ++--- tractor/_exceptions.py | 258 ++++++++++++++++++++++------------------- tractor/_ipc.py | 12 +- tractor/msg/_ops.py | 36 +++--- 4 files changed, 174 insertions(+), 162 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 686a648b..be492e4e 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -49,7 +49,6 @@ from typing import ( Any, AsyncGenerator, Callable, - Mapping, Type, TypeAlias, TYPE_CHECKING, @@ -1484,13 +1483,21 @@ class Context: # __tracebackhide__: bool = hide_tb if validate_pld_spec: - msgops.validate_payload_msg( - pld_msg=started_msg, - pld_value=value, - ipc=self, - strict_pld_parity=strict_pld_parity, - hide_tb=hide_tb, - ) + # TODO: prolly wrap this as a `show_frame_when_not()` + try: + msgops.validate_payload_msg( + pld_msg=started_msg, + pld_value=value, + ipc=self, + strict_pld_parity=strict_pld_parity, + hide_tb=hide_tb, + ) + except BaseException as err: + if not isinstance(err, MsgTypeError): + __tracebackhide__: bool = False + + raise + # TODO: maybe a flag to by-pass encode op if already done # here in caller? @@ -2185,11 +2192,6 @@ async def open_context_from_portal( try: result_or_err: Exception|Any = await ctx.result() except BaseException as berr: - # cancelled before (or maybe during?) final result capture - # if isinstance(trio.Cancelled, berr): - # from .devx import mk_pdb - # mk_pdb.set_trace() - # on normal teardown, if we get some error # raised in `Context.result()` we still want to # save that error on the ctx's state to @@ -2201,7 +2203,7 @@ async def open_context_from_portal( ctx._local_error: BaseException = scope_err raise - # yes! this worx Bp + # yes this worx! # from .devx import _debug # await _debug.pause() diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 8ed46ebc..7164d6ab 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -1232,14 +1232,13 @@ def _raise_from_unexpected_msg( _raise_from_no_key_in_msg = _raise_from_unexpected_msg -def _mk_msg_type_err( +def _mk_send_mte( msg: Any|bytes|MsgType, codec: MsgCodec|MsgDec, message: str|None = None, verb_header: str = '', - src_validation_error: ValidationError|None = None, src_type_error: TypeError|None = None, is_invalid_payload: bool = False, @@ -1247,131 +1246,148 @@ def _mk_msg_type_err( ) -> MsgTypeError: ''' - Compose a `MsgTypeError` from an input runtime context. + Compose a `MsgTypeError` from a `Channel.send()`-side error, + normally raised witih a runtime IPC `Context`. ''' - # `Channel.send()` case - if src_validation_error is None: + if isinstance(codec, MsgDec): + raise RuntimeError( + '`codec` must be a `MsgCodec` for send-side errors?' + ) - if isinstance(codec, MsgDec): - raise RuntimeError( - '`codec` must be a `MsgCodec` for send-side errors?' + from tractor.devx import ( + pformat_caller_frame, + ) + # no src error from `msgspec.msgpack.Decoder.decode()` so + # prolly a manual type-check on our part. + if message is None: + tb_fmt: str = pformat_caller_frame(stack_limit=3) + message: str = ( + f'invalid msg -> {msg}: {type(msg)}\n\n' + f'{tb_fmt}\n' + f'Valid IPC msgs are:\n\n' + f'{codec.msg_spec_str}\n', + ) + elif src_type_error: + src_message: str = str(src_type_error) + patt: str = 'type ' + type_idx: int = src_message.find('type ') + invalid_type: str = src_message[type_idx + len(patt):].split()[0] + + enc_hook: Callable|None = codec.enc.enc_hook + if enc_hook is None: + message += ( + '\n\n' + + f"The current IPC-msg codec can't encode type `{invalid_type}` !\n" + f'Maybe a `msgpack.Encoder.enc_hook()` extension is needed?\n\n' + + f'Check the `msgspec` docs for ad-hoc type extending:\n' + '|_ https://jcristharif.com/msgspec/extending.html\n' + '|_ https://jcristharif.com/msgspec/extending.html#defining-a-custom-extension-messagepack-only\n' ) - from tractor.devx import ( - pformat_caller_frame, + msgtyperr = MsgTypeError( + message=message, + _bad_msg=msg, + ) + # ya, might be `None` + msgtyperr.__cause__ = src_type_error + return msgtyperr + + +def _mk_recv_mte( + msg: Any|bytes|MsgType, + codec: MsgCodec|MsgDec, + + message: str|None = None, + verb_header: str = '', + + src_validation_error: ValidationError|None = None, + is_invalid_payload: bool = False, + + **mte_kwargs, + +) -> MsgTypeError: + ''' + Compose a `MsgTypeError` from a + `Channel|Context|MsgStream.receive()`-side error, + normally raised witih a runtime IPC ctx or streaming + block. + + ''' + msg_dict: dict|None = None + bad_msg: PayloadMsg|None = None + + if is_invalid_payload: + msg_type: str = type(msg) + any_pld: Any = msgpack.decode(msg.pld) + message: str = ( + f'invalid `{msg_type.__qualname__}` msg payload\n\n' + f'value: `{any_pld!r}` does not match type-spec: ' + f'`{type(msg).__qualname__}.pld: {codec.pld_spec_str}`' ) - # no src error from `msgspec.msgpack.Decoder.decode()` so - # prolly a manual type-check on our part. - if message is None: - tb_fmt: str = pformat_caller_frame(stack_limit=3) - message: str = ( - f'invalid msg -> {msg}: {type(msg)}\n\n' - f'{tb_fmt}\n' - f'Valid IPC msgs are:\n\n' - f'{codec.msg_spec_str}\n', - ) - elif src_type_error: - src_message: str = str(src_type_error) - patt: str = 'type ' - type_idx: int = src_message.find('type ') - invalid_type: str = src_message[type_idx + len(patt):].split()[0] + bad_msg = msg - enc_hook: Callable|None = codec.enc.enc_hook - if enc_hook is None: - message += ( - '\n\n' - - f"The current IPC-msg codec can't encode type `{invalid_type}` !\n" - f'Maybe a `msgpack.Encoder.enc_hook()` extension is needed?\n\n' - - f'Check the `msgspec` docs for ad-hoc type extending:\n' - '|_ https://jcristharif.com/msgspec/extending.html\n' - '|_ https://jcristharif.com/msgspec/extending.html#defining-a-custom-extension-messagepack-only\n' - ) - - msgtyperr = MsgTypeError( - message=message, - _bad_msg=msg, - ) - # ya, might be `None` - msgtyperr.__cause__ = src_type_error - return msgtyperr - - # `Channel.recv()` case else: - msg_dict: dict|None = None - bad_msg: PayloadMsg|None = None - - if is_invalid_payload: - msg_type: str = type(msg) - any_pld: Any = msgpack.decode(msg.pld) - message: str = ( - f'invalid `{msg_type.__qualname__}` msg payload\n\n' - f'value: `{any_pld!r}` does not match type-spec: ' - f'`{type(msg).__qualname__}.pld: {codec.pld_spec_str}`' - ) - bad_msg = msg - - else: - # decode the msg-bytes using the std msgpack - # interchange-prot (i.e. without any `msgspec.Struct` - # handling) so that we can determine what - # `.msg.types.PayloadMsg` is the culprit by reporting the - # received value. - msg: bytes - msg_dict: dict = msgpack.decode(msg) - msg_type_name: str = msg_dict['msg_type'] - msg_type = getattr(msgtypes, msg_type_name) - message: str = ( - f'invalid `{msg_type_name}` IPC msg\n\n' - ) - # XXX be "fancy" and see if we can determine the exact - # invalid field such that we can comprehensively report - # the specific field's type problem. - msgspec_msg: str = src_validation_error.args[0].rstrip('`') - msg, _, maybe_field = msgspec_msg.rpartition('$.') - obj = object() - if (field_val := msg_dict.get(maybe_field, obj)) is not obj: - field_name_expr: str = ( - f' |_{maybe_field}: {codec.pld_spec_str} = ' - ) - fmt_val_lines: list[str] = pformat(field_val).splitlines() - fmt_val: str = ( - f'{fmt_val_lines[0]}\n' - + - textwrap.indent( - '\n'.join(fmt_val_lines[1:]), - prefix=' '*len(field_name_expr), - ) - ) - message += ( - f'{msg.rstrip("`")}\n\n' - f'<{msg_type.__qualname__}(\n' - # f'{".".join([msg_type.__module__, msg_type.__qualname__])}\n' - f'{field_name_expr}{fmt_val}\n' - f')>' - ) - - if verb_header: - message = f'{verb_header} ' + message - - msgtyperr = MsgTypeError.from_decode( - message=message, - bad_msg=bad_msg, - bad_msg_as_dict=msg_dict, - boxed_type=type(src_validation_error), - - # NOTE: for pld-spec MTEs we set the `._ipc_msg` manually: - # - for the send-side `.started()` pld-validate - # case we actually raise inline so we don't need to - # set the it at all. - # - for recv side we set it inside `PldRx.decode_pld()` - # after a manual call to `pack_error()` since we - # actually want to emulate the `Error` from the mte we - # build here. So by default in that case, this is left - # as `None` here. - # ipc_msg=src_err_msg, + # decode the msg-bytes using the std msgpack + # interchange-prot (i.e. without any `msgspec.Struct` + # handling) so that we can determine what + # `.msg.types.PayloadMsg` is the culprit by reporting the + # received value. + msg: bytes + msg_dict: dict = msgpack.decode(msg) + msg_type_name: str = msg_dict['msg_type'] + msg_type = getattr(msgtypes, msg_type_name) + message: str = ( + f'invalid `{msg_type_name}` IPC msg\n\n' ) - msgtyperr.__cause__ = src_validation_error - return msgtyperr + # XXX be "fancy" and see if we can determine the exact + # invalid field such that we can comprehensively report + # the specific field's type problem. + msgspec_msg: str = src_validation_error.args[0].rstrip('`') + msg, _, maybe_field = msgspec_msg.rpartition('$.') + obj = object() + if (field_val := msg_dict.get(maybe_field, obj)) is not obj: + field_name_expr: str = ( + f' |_{maybe_field}: {codec.pld_spec_str} = ' + ) + fmt_val_lines: list[str] = pformat(field_val).splitlines() + fmt_val: str = ( + f'{fmt_val_lines[0]}\n' + + + textwrap.indent( + '\n'.join(fmt_val_lines[1:]), + prefix=' '*len(field_name_expr), + ) + ) + message += ( + f'{msg.rstrip("`")}\n\n' + f'<{msg_type.__qualname__}(\n' + # f'{".".join([msg_type.__module__, msg_type.__qualname__])}\n' + f'{field_name_expr}{fmt_val}\n' + f')>' + ) + + if verb_header: + message = f'{verb_header} ' + message + + msgtyperr = MsgTypeError.from_decode( + message=message, + bad_msg=bad_msg, + bad_msg_as_dict=msg_dict, + boxed_type=type(src_validation_error), + + # NOTE: for pld-spec MTEs we set the `._ipc_msg` manually: + # - for the send-side `.started()` pld-validate + # case we actually raise inline so we don't need to + # set the it at all. + # - for recv side we set it inside `PldRx.decode_pld()` + # after a manual call to `pack_error()` since we + # actually want to emulate the `Error` from the mte we + # build here. So by default in that case, this is left + # as `None` here. + # ipc_msg=src_err_msg, + ) + msgtyperr.__cause__ = src_validation_error + return msgtyperr diff --git a/tractor/_ipc.py b/tractor/_ipc.py index ec7d348a..e5e3d10f 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -49,7 +49,8 @@ from tractor._exceptions import ( MsgTypeError, pack_from_raise, TransportClosed, - _mk_msg_type_err, + _mk_send_mte, + _mk_recv_mte, ) from tractor.msg import ( _ctxvar_MsgCodec, @@ -256,7 +257,7 @@ class MsgpackTCPStream(MsgTransport): # and always raise such that spec violations # are never allowed to be caught silently! except msgspec.ValidationError as verr: - msgtyperr: MsgTypeError = _mk_msg_type_err( + msgtyperr: MsgTypeError = _mk_recv_mte( msg=msg_bytes, codec=codec, src_validation_error=verr, @@ -321,7 +322,7 @@ class MsgpackTCPStream(MsgTransport): if type(msg) not in msgtypes.__msg_types__: if strict_types: - raise _mk_msg_type_err( + raise _mk_send_mte( msg, codec=codec, ) @@ -333,8 +334,9 @@ class MsgpackTCPStream(MsgTransport): try: bytes_data: bytes = codec.encode(msg) - except TypeError as typerr: - msgtyperr: MsgTypeError = _mk_msg_type_err( + except TypeError as _err: + typerr = _err + msgtyperr: MsgTypeError = _mk_send_mte( msg, codec=codec, message=( diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 80633e7e..91c0ddea 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -47,7 +47,7 @@ from tractor._exceptions import ( InternalError, _raise_from_unexpected_msg, MsgTypeError, - _mk_msg_type_err, + _mk_recv_mte, pack_error, ) from tractor._state import current_ipc_ctx @@ -264,7 +264,7 @@ class PldRx(Struct): # pack mgterr into error-msg for # reraise below; ensure remote-actor-err # info is displayed nicely? - mte: MsgTypeError = _mk_msg_type_err( + mte: MsgTypeError = _mk_recv_mte( msg=msg, codec=self.pld_dec, src_validation_error=valerr, @@ -277,19 +277,6 @@ class PldRx(Struct): if is_started_send_side: raise mte - # XXX TODO: remove this right? - # => any bad stated/return values should - # always be treated a remote errors right? - # - # if ( - # expect_msg is Return - # or expect_msg is Started - # ): - # # set emulated remote error more-or-less as the - # # runtime would - # ctx: Context = getattr(ipc, 'ctx', ipc) - # ctx._maybe_cancel_and_set_remote_error(mte) - # NOTE: the `.message` is automatically # transferred into the message as long as we # define it as a `Error.message` field. @@ -799,13 +786,18 @@ def validate_payload_msg( # raise any msg type error NO MATTER WHAT! except ValidationError as verr: - mte: MsgTypeError = _mk_msg_type_err( - msg=roundtripped, - codec=codec, - src_validation_error=verr, - verb_header='Trying to send ', - is_invalid_payload=True, - ) + try: + mte: MsgTypeError = _mk_recv_mte( + msg=roundtripped, + codec=codec, + src_validation_error=verr, + verb_header='Trying to send ', + is_invalid_payload=True, + ) + except BaseException: + __tracebackhide__: bool = False + raise + if not raise_mte: return mte -- 2.34.1 From 3b056fd761c152e3194f35352fc2087d851a0208 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 18 Jun 2024 09:57:10 -0400 Subject: [PATCH 164/190] Expand `PayloadMsg` doc-str --- tractor/msg/types.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 0fc0ee96..0904411f 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -73,11 +73,22 @@ class PayloadMsg( # as_array=True, ): ''' - The "god" boxing msg type. + An abstract payload boxing/shuttling IPC msg type. - Boxes user data-msgs in a `.pld` and uses `msgspec`'s tagged - unions support to enable a spec from a common msg inheritance - tree. + Boxes data-values passed to/from user code + + (i.e. any values passed by `tractor` application code using any of + + |_ `._streaming.MsgStream.send/receive()` + |_ `._context.Context.started/result()` + |_ `._ipc.Channel.send/recv()` + + aka our "IPC primitive APIs") + + as message "payloads" set to the `.pld` field and uses + `msgspec`'s "tagged unions" feature to support a subset of our + "SC-transitive shuttle protocol" specification with + a `msgspec.Struct` inheritance tree. ''' cid: str # call/context-id -- 2.34.1 From b0f0971ad4f4ce762a91e1466c4dad4c10394576 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 18 Jun 2024 14:40:26 -0400 Subject: [PATCH 165/190] Change `_Cache` reuse emit to `.runtime()` --- tractor/trionics/_mngrs.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tractor/trionics/_mngrs.py b/tractor/trionics/_mngrs.py index f57be0a7..08e70ad2 100644 --- a/tractor/trionics/_mngrs.py +++ b/tractor/trionics/_mngrs.py @@ -271,8 +271,11 @@ async def maybe_open_context( yield False, yielded else: - log.info(f'Reusing _Cached resource for {ctx_key}') _Cache.users += 1 + log.runtime( + f'Reusing resource for `_Cache` user {_Cache.users}\n\n' + f'{ctx_key!r} -> {yielded!r}\n' + ) lock.release() yield True, yielded -- 2.34.1 From f64447148e297a4b13abc2bee562707a654d8739 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 18 Jun 2024 14:42:25 -0400 Subject: [PATCH 166/190] Avoid actor-nursery-exit warns on registrees Since a local-actor-nursery-parented subactor might also use the root as its registry, we need to avoid warning when short lived IPC `Channel` connections establish and then disconnect (quickly, bc the apparently the subactor isn't re-using an already cached parente-peer<->child conn as you'd expect efficiency..) since such cases currently considered normal operation of our super shoddy/naive "discovery sys" XD As such, (un)guard the whole local-actor-nursery OR channel-draining waiting blocks with the additional `or Actor._cancel_called` branch since really we should also be waiting on the parent nurse to exit (at least, for sure and always) when the local `Actor` indeed has been "globally" cancelled-called. Further add separate timeout warnings for channel-draining vs. local-actor-nursery-exit waiting since they are technically orthogonal cases (at least, afaik). Also, - adjust the `Actor._stream_handler()` connection status log-emit to `.runtime()`, especially to reduce noise around the aforementioned ephemeral registree connection-requests. - if we do wait on a local actor-nurse to exit, report its `._children` table (which should help figure out going forward how useful the warning is, if at all). --- tractor/_runtime.py | 136 +++++++++++++++++++++++++++++++------------- 1 file changed, 95 insertions(+), 41 deletions(-) diff --git a/tractor/_runtime.py b/tractor/_runtime.py index fe2ecc00..dfa76cd8 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -446,7 +446,7 @@ class Actor: # inside ``open_root_actor()`` where there is a check for # a bound listener on the "arbiter" addr. the reset will be # because the handshake was never meant took place. - log.warning( + log.runtime( con_status + ' -> But failed to handshake? Ignoring..\n' @@ -522,24 +522,50 @@ class Actor: # the peer was cancelled we try to wait for them # to tear down their side of the connection before # moving on with closing our own side. - if local_nursery: - if chan._cancel_called: - log.cancel( - 'Waiting on cancel request to peer\n' - f'`Portal.cancel_actor()` => {chan.uid}\n' - ) + if ( + local_nursery + and ( + self._cancel_called + or + chan._cancel_called + ) + # + # ^-TODO-^ along with this is there another condition + # that we should filter with to avoid entering this + # waiting block needlessly? + # -[ ] maybe `and local_nursery.cancelled` and/or + # only if the `._children` table is empty or has + # only `Portal`s with .chan._cancel_called == + # True` as per what we had below; the MAIN DIFF + # BEING that just bc one `Portal.cancel_actor()` + # was called, doesn't mean the whole actor-nurse + # is gonna exit any time soon right!? + # + # or + # all(chan._cancel_called for chan in chans) + + ): + log.cancel( + 'Waiting on cancel request to peer\n' + f'`Portal.cancel_actor()` => {chan.uid}\n' + ) # XXX: this is a soft wait on the channel (and its # underlying transport protocol) to close from the # remote peer side since we presume that any channel - # which is mapped to a sub-actor (i.e. it's managed by - # one of our local nurseries) has a message is sent to - # the peer likely by this actor (which is now in - # a cancelled condition) when the local runtime here is - # now cancelled while (presumably) in the middle of msg - # loop processing. - with trio.move_on_after(0.5) as cs: - cs.shield = True + # which is mapped to a sub-actor (i.e. it's managed + # by local actor-nursery) has a message that is sent + # to the peer likely by this actor (which may be in + # a shutdown sequence due to cancellation) when the + # local runtime here is now cancelled while + # (presumably) in the middle of msg loop processing. + chan_info: str = ( + f'{chan.uid}\n' + f'|_{chan}\n' + f' |_{chan.transport}\n\n' + ) + with trio.move_on_after(0.5) as drain_cs: + drain_cs.shield = True # attempt to wait for the far end to close the # channel and bail after timeout (a 2-generals @@ -556,10 +582,7 @@ class Actor: # TODO: factor this into a helper? log.warning( 'Draining msg from disconnected peer\n' - f'{chan.uid}\n' - f'|_{chan}\n' - f' |_{chan.transport}\n\n' - + f'{chan_info}' f'{pformat(msg)}\n' ) # cid: str|None = msg.get('cid') @@ -571,31 +594,62 @@ class Actor: cid, msg, ) - - # NOTE: when no call to `open_root_actor()` was - # made, we implicitly make that call inside - # the first `.open_nursery()`, in this case we - # can assume that we are the root actor and do - # not have to wait for the nursery-enterer to - # exit before shutting down the actor runtime. - # - # see matching note inside `._supervise.open_nursery()` - if not local_nursery._implicit_runtime_started: - log.runtime( - 'Waiting on local actor nursery to exit..\n' - f'|_{local_nursery}\n' - ) - await local_nursery.exited.wait() - - if ( - cs.cancelled_caught - and not local_nursery._implicit_runtime_started - ): + if drain_cs.cancelled_caught: log.warning( - 'Failed to exit local actor nursery?\n' + 'Timed out waiting on IPC transport channel to drain?\n' + f'{chan_info}' + ) + + # XXX NOTE XXX when no explicit call to + # `open_root_actor()` was made by the application + # (normally we implicitly make that call inside + # the first `.open_nursery()` in root-actor + # user/app code), we can assume that either we + # are NOT the root actor or are root but the + # runtime was started manually. and thus DO have + # to wait for the nursery-enterer to exit before + # shutting down the local runtime to avoid + # clobbering any ongoing subactor + # teardown/debugging/graceful-cancel. + # + # see matching note inside `._supervise.open_nursery()` + # + # TODO: should we have a separate cs + timeout + # block here? + if ( + # XXX SO either, + # - not root OR, + # - is root but `open_root_actor()` was + # entered manually (in which case we do + # the equiv wait there using the + # `devx._debug` sub-sys APIs). + not local_nursery._implicit_runtime_started + ): + log.runtime( + 'Waiting on local actor nursery to exit..\n' f'|_{local_nursery}\n' ) - # await _debug.pause() + with trio.move_on_after(0.5) as an_exit_cs: + an_exit_cs.shield = True + await local_nursery.exited.wait() + + # TODO: currently this is always triggering for every + # sub-daemon spawned from the `piker.services._mngr`? + # -[ ] how do we ensure that the IPC is supposed to + # be long lived and isn't just a register? + # |_ in the register case how can we signal that the + # ephemeral msg loop was intentional? + if ( + # not local_nursery._implicit_runtime_started + # and + an_exit_cs.cancelled_caught + ): + log.warning( + 'Timed out waiting on local actor-nursery to exit?\n' + f'{local_nursery}\n' + f' |_{pformat(local_nursery._children)}\n' + ) + # await _debug.pause() if disconnected: # if the transport died and this actor is still -- 2.34.1 From 548fbe725bc44f826ffbf765d6dd79bd3dfce40f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 18 Jun 2024 18:14:58 -0400 Subject: [PATCH 167/190] Flip `infected_asyncio` status msg to `.runtime()` --- tractor/to_asyncio.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tractor/to_asyncio.py b/tractor/to_asyncio.py index 585b0b00..d1451b4c 100644 --- a/tractor/to_asyncio.py +++ b/tractor/to_asyncio.py @@ -577,14 +577,18 @@ def run_as_asyncio_guest( log.runtime(f"trio_main finished: {main_outcome!r}") # start the infection: run trio on the asyncio loop in "guest mode" - log.info(f"Infecting asyncio process with {trio_main}") + log.runtime( + 'Infecting `asyncio`-process with a `trio` guest-run of\n\n' + f'{trio_main!r}\n\n' + f'{trio_done_callback}\n' + ) trio.lowlevel.start_guest_run( trio_main, run_sync_soon_threadsafe=loop.call_soon_threadsafe, done_callback=trio_done_callback, ) - # ``.unwrap()`` will raise here on error + # NOTE `.unwrap()` will raise on error return (await trio_done_fut).unwrap() # might as well if it's installed. -- 2.34.1 From 6b1558b6758a6a059a504daa99eab65a416cc252 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 26 Jun 2024 16:00:14 -0400 Subject: [PATCH 168/190] Add a `Context.portal`, more cancel tooing Might as well add a public maybe-getter for use on the "parent" side since it can be handy to check out-of-band cancellation conditions (like from `Portal.cancel_actor()`). Buncha bitty tweaks for more easily debugging cancel conditions: - add a `@.cancel_called.setter` for hooking into `.cancel_called = True` being set in hard to decipher "who cancelled us" scenarios. - use a new `self_ctxc: bool` var in `.cancel()` to capture the output state from `._is_self_cancelled(remote_error)` at call time so it can be compared against the measured value at crash-time (when REPL-ing it can often have already changed due to runtime teardown sequencing vs. the crash handler hook entry). - proxy `hide_tb` to `.drain_to_final_msg()` from `.wait_for_result()`. - use `remote_error.sender` attr directly instead of through `RAE.msgdata: dict` lookup. - change var name `our_uid` -> `peer_uid`; it's not "ours".. Other various docs/comment updates: - extend the main class doc to include some other name ideas. - change over all remaining `.result()` refs to `.wait_for_result()`. - doc more details on how we want `.outcome` to eventually signature. --- tractor/_context.py | 126 +++++++++++++++++++++++++++++++------------- 1 file changed, 88 insertions(+), 38 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index be492e4e..33d8597b 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -121,10 +121,19 @@ class Unresolved: @dataclass class Context: ''' - An inter-actor, SC transitive, `Task` communication context. + An inter-actor, SC transitive, `trio.Task` (pair) + communication context. - NB: This class should **never be instatiated directly**, it is allocated - by the runtime in 2 ways: + (We've also considered other names and ideas: + - "communicating tasks scope": cts + - "distributed task scope": dts + - "communicating tasks context": ctc + + **Got a better idea for naming? Make an issue dawg!** + ) + + NB: This class should **never be instatiated directly**, it is + allocated by the runtime in 2 ways: - by entering `Portal.open_context()` which is the primary public API for any "parent" task or, - by the RPC machinery's `._rpc._invoke()` as a `ctx` arg @@ -210,6 +219,16 @@ class Context: # more the the `Context` is needed? _portal: Portal | None = None + @property + def portal(self) -> Portal|None: + ''' + Return any wrapping memory-`Portal` if this is + a 'parent'-side task which called `Portal.open_context()`, + otherwise `None`. + + ''' + return self._portal + # NOTE: each side of the context has its own cancel scope # which is exactly the primitive that allows for # cross-actor-task-supervision and thus SC. @@ -299,6 +318,8 @@ class Context: # boxed exception. NOW, it's used for spawning overrun queuing # tasks when `.allow_overruns == True` !!! _scope_nursery: trio.Nursery|None = None + # ^-TODO-^ change name? + # -> `._scope_tn` "scope task nursery" # streaming overrun state tracking _in_overrun: bool = False @@ -408,10 +429,23 @@ class Context: ''' return self._cancel_called + @cancel_called.setter + def cancel_called(self, val: bool) -> None: + ''' + Set the self-cancelled request `bool` value. + + ''' + # to debug who frickin sets it.. + # if val: + # from .devx import pause_from_sync + # pause_from_sync() + + self._cancel_called = val + @property def canceller(self) -> tuple[str, str]|None: ''' - ``Actor.uid: tuple[str, str]`` of the (remote) + `Actor.uid: tuple[str, str]` of the (remote) actor-process who's task was cancelled thus causing this (side of the) context to also be cancelled. @@ -515,7 +549,7 @@ class Context: # the local scope was never cancelled # and instead likely we received a remote side - # # cancellation that was raised inside `.result()` + # # cancellation that was raised inside `.wait_for_result()` # or ( # (se := self._local_error) # and se is re @@ -585,6 +619,10 @@ class Context: self, error: BaseException, + # TODO: manual toggle for cases where we wouldn't normally + # mark ourselves cancelled but want to? + # set_cancel_called: bool = False, + ) -> None: ''' (Maybe) cancel this local scope due to a received remote @@ -603,7 +641,7 @@ class Context: - `Portal.open_context()` - `Portal.result()` - `Context.open_stream()` - - `Context.result()` + - `Context.wait_for_result()` when called/closed by actor local task(s). @@ -729,7 +767,7 @@ class Context: # Cancel the local `._scope`, catch that # `._scope.cancelled_caught` and re-raise any remote error - # once exiting (or manually calling `.result()`) the + # once exiting (or manually calling `.wait_for_result()`) the # `.open_context()` block. cs: trio.CancelScope = self._scope if ( @@ -764,8 +802,9 @@ class Context: # `trio.Cancelled` subtype here ;) # https://github.com/goodboy/tractor/issues/368 message: str = 'Cancelling `Context._scope` !\n\n' + # from .devx import pause_from_sync + # pause_from_sync() self._scope.cancel() - else: message: str = 'NOT cancelling `Context._scope` !\n\n' # from .devx import mk_pdb @@ -845,15 +884,15 @@ class Context: @property def repr_api(self) -> str: + return 'Portal.open_context()' + + # TODO: use `.dev._frame_stack` scanning to find caller! # ci: CallerInfo|None = self._caller_info # if ci: # return ( # f'{ci.api_nsp}()\n' # ) - # TODO: use `.dev._frame_stack` scanning to find caller! - return 'Portal.open_context()' - async def cancel( self, timeout: float = 0.616, @@ -889,7 +928,8 @@ class Context: ''' side: str = self.side - self._cancel_called: bool = True + # XXX for debug via the `@.setter` + self.cancel_called = True header: str = ( f'Cancelling ctx with peer from {side.upper()} side\n\n' @@ -912,7 +952,7 @@ class Context: # `._scope.cancel()` since we expect the eventual # `ContextCancelled` from the other side to trigger this # when the runtime finally receives it during teardown - # (normally in `.result()` called from + # (normally in `.wait_for_result()` called from # `Portal.open_context().__aexit__()`) if side == 'parent': if not self._portal: @@ -1025,10 +1065,10 @@ class Context: ''' __tracebackhide__: bool = hide_tb - our_uid: tuple = self.chan.uid + peer_uid: tuple = self.chan.uid # XXX NOTE XXX: `ContextCancelled`/`StreamOverrun` absorption - # for "graceful cancellation" case: + # for "graceful cancellation" case(s): # # Whenever a "side" of a context (a `Task` running in # an actor) **is** the side which requested ctx @@ -1045,9 +1085,11 @@ class Context: # set to the `Actor.uid` of THIS task (i.e. the # cancellation requesting task's actor is the actor # checking whether it should absorb the ctxc). + self_ctxc: bool = self._is_self_cancelled(remote_error) if ( + self_ctxc + and not raise_ctxc_from_self_call - and self._is_self_cancelled(remote_error) # TODO: ?potentially it is useful to emit certain # warning/cancel logs for the cases where the @@ -1077,8 +1119,8 @@ class Context: and isinstance(remote_error, RemoteActorError) and remote_error.boxed_type is StreamOverrun - # and tuple(remote_error.msgdata['sender']) == our_uid - and tuple(remote_error.sender) == our_uid + # and tuple(remote_error.msgdata['sender']) == peer_uid + and tuple(remote_error.sender) == peer_uid ): # NOTE: we set the local scope error to any "self # cancellation" error-response thus "absorbing" @@ -1140,9 +1182,9 @@ class Context: of the remote cancellation. ''' - __tracebackhide__ = hide_tb + __tracebackhide__: bool = hide_tb assert self._portal, ( - "Context.result() can not be called from callee side!" + '`Context.wait_for_result()` can not be called from callee side!' ) if self._final_result_is_set(): return self._result @@ -1197,10 +1239,11 @@ class Context: # raising something we know might happen # during cancellation ;) (not self._cancel_called) - ) + ), + hide_tb=hide_tb, ) # TODO: eventually make `.outcome: Outcome` and thus return - # `self.outcome.unwrap()` here! + # `self.outcome.unwrap()` here? return self.outcome # TODO: switch this with above! @@ -1284,17 +1327,24 @@ class Context: Any| RemoteActorError| ContextCancelled + # TODO: make this a `outcome.Outcome`! ): ''' - The final "outcome" from an IPC context which can either be - some Value returned from the target `@context`-decorated - remote task-as-func, or an `Error` wrapping an exception - raised from an RPC task fault or cancellation. + Return the "final outcome" (state) of the far end peer task + non-blocking. If the remote task has not completed then this + field always resolves to the module defined `Unresolved` + handle. - Note that if the remote task has not terminated then this - field always resolves to the module defined `Unresolved` handle. + ------ - ------ + TODO->( this is doc-driven-dev content not yet actual ;P ) - TODO: implement this using `outcome.Outcome` types? + The final "outcome" from an IPC context which can be any of: + - some `outcome.Value` which boxes the returned output from the peer task's + `@context`-decorated remote task-as-func, or + - an `outcome.Error` wrapping an exception raised that same RPC task + after a fault or cancellation, or + - an unresolved `outcome.Outcome` when the peer task is still + executing and has not yet completed. ''' return ( @@ -1583,7 +1633,7 @@ class Context: - NEVER `return` early before delivering the msg! bc if the error is a ctxc and there is a task waiting on - `.result()` we need the msg to be + `.wait_for_result()` we need the msg to be `send_chan.send_nowait()`-ed over the `._rx_chan` so that the error is relayed to that waiter task and thus raised in user code! @@ -1828,7 +1878,7 @@ async def open_context_from_portal( When the "callee" (side that is "called"/started by a call to *this* method) returns, the caller side (this) unblocks and any final value delivered from the other end can be - retrieved using the `Contex.result()` api. + retrieved using the `Contex.wait_for_result()` api. The yielded ``Context`` instance further allows for opening bidirectional streams, explicit cancellation and @@ -1965,14 +2015,14 @@ async def open_context_from_portal( yield ctx, first # ??TODO??: do we still want to consider this or is - # the `else:` block handling via a `.result()` + # the `else:` block handling via a `.wait_for_result()` # call below enough?? # - # -[ ] pretty sure `.result()` internals do the + # -[ ] pretty sure `.wait_for_result()` internals do the # same as our ctxc handler below so it ended up # being same (repeated?) behaviour, but ideally we # wouldn't have that duplication either by somehow - # factoring the `.result()` handler impl in a way + # factoring the `.wait_for_result()` handler impl in a way # that we can re-use it around the `yield` ^ here # or vice versa? # @@ -2110,7 +2160,7 @@ async def open_context_from_portal( # AND a group-exc is only raised if there was > 1 # tasks started *here* in the "caller" / opener # block. If any one of those tasks calls - # `.result()` or `MsgStream.receive()` + # `.wait_for_result()` or `MsgStream.receive()` # `._maybe_raise_remote_err()` will be transitively # called and the remote error raised causing all # tasks to be cancelled. @@ -2180,7 +2230,7 @@ async def open_context_from_portal( f'|_{ctx._task}\n' ) # XXX NOTE XXX: the below call to - # `Context.result()` will ALWAYS raise + # `Context.wait_for_result()` will ALWAYS raise # a `ContextCancelled` (via an embedded call to # `Context._maybe_raise_remote_err()`) IFF # a `Context._remote_error` was set by the runtime @@ -2190,10 +2240,10 @@ async def open_context_from_portal( # ALWAYS SET any time "callee" side fails and causes "caller # side" cancellation via a `ContextCancelled` here. try: - result_or_err: Exception|Any = await ctx.result() + result_or_err: Exception|Any = await ctx.wait_for_result() except BaseException as berr: # on normal teardown, if we get some error - # raised in `Context.result()` we still want to + # raised in `Context.wait_for_result()` we still want to # save that error on the ctx's state to # determine things like `.cancelled_caught` for # cases where there was remote cancellation but -- 2.34.1 From 59fa9dc4522fe1c1321cd41cf786b4e1f5964368 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 27 Jun 2024 16:25:46 -0400 Subject: [PATCH 169/190] Prep for legacy RPC API factor-n-remove This change is adding commentary about the upcoming API removal and simplification of nursery + portal internals; no actual code changes are included. The plan to (re)move the old RPC methods: - `ActorNursery.run_in_actor()` - `Portal.run()` - `Portal.run_from_ns()` and any related impl internals out of each conc-primitive and instead into something like a `.hilevel.rpc` set of APIs which then are all implemented using the newer and more lowlevel `Context`/`MsgStream` primitives instead Bo Further, - formally deprecate the `Portal.result()` meth for `.wait_for_result()`. - only `log.info()` about runtime shutdown in the implicit root case. --- tractor/_portal.py | 42 +++++++++++++++++++++++++++++++++++++---- tractor/_spawn.py | 4 ++-- tractor/_supervise.py | 44 +++++++++++++++++++++++++++++++------------ 3 files changed, 72 insertions(+), 18 deletions(-) diff --git a/tractor/_portal.py b/tractor/_portal.py index 2c676e12..0f698836 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -121,7 +121,8 @@ class Portal: ) return self.chan - # TODO: factor this out into an `ActorNursery` wrapper + # TODO: factor this out into a `.highlevel` API-wrapper that uses + # a single `.open_context()` call underneath. async def _submit_for_result( self, ns: str, @@ -141,13 +142,22 @@ class Portal: portal=self, ) + # TODO: we should deprecate this API right? since if we remove + # `.run_in_actor()` (and instead move it to a `.highlevel` + # wrapper api (around a single `.open_context()` call) we don't + # really have any notion of a "main" remote task any more? + # # @api_frame - async def result(self) -> Any: + async def wait_for_result( + self, + hide_tb: bool = True, + ) -> Any: ''' - Return the result(s) from the remote actor's "main" task. + Return the final result delivered by a `Return`-msg from the + remote peer actor's "main" task's `return` statement. ''' - __tracebackhide__ = True + __tracebackhide__: bool = hide_tb # Check for non-rpc errors slapped on the # channel for which we always raise exc = self.channel._exc @@ -182,6 +192,23 @@ class Portal: return self._final_result_pld + # TODO: factor this out into a `.highlevel` API-wrapper that uses + # a single `.open_context()` call underneath. + async def result( + self, + *args, + **kwargs, + ) -> Any|Exception: + typname: str = type(self).__name__ + log.warning( + f'`{typname}.result()` is DEPRECATED!\n' + f'Use `{typname}.wait_for_result()` instead!\n' + ) + return await self.wait_for_result( + *args, + **kwargs, + ) + async def _cancel_streams(self): # terminate all locally running async generator # IPC calls @@ -240,6 +267,7 @@ class Portal: f'{reminfo}' ) + # XXX the one spot we set it? self.channel._cancel_called: bool = True try: # send cancel cmd - might not get response @@ -279,6 +307,8 @@ class Portal: ) return False + # TODO: do we still need this for low level `Actor`-runtime + # method calls or can we also remove it? async def run_from_ns( self, namespace_path: str, @@ -316,6 +346,8 @@ class Portal: expect_msg=Return, ) + # TODO: factor this out into a `.highlevel` API-wrapper that uses + # a single `.open_context()` call underneath. async def run( self, func: str, @@ -370,6 +402,8 @@ class Portal: expect_msg=Return, ) + # TODO: factor this out into a `.highlevel` API-wrapper that uses + # a single `.open_context()` call underneath. @acm async def open_stream_from( self, diff --git a/tractor/_spawn.py b/tractor/_spawn.py index 481e2981..986c2e29 100644 --- a/tractor/_spawn.py +++ b/tractor/_spawn.py @@ -149,7 +149,7 @@ async def exhaust_portal( # XXX: streams should never be reaped here since they should # always be established and shutdown using a context manager api - final: Any = await portal.result() + final: Any = await portal.wait_for_result() except ( Exception, @@ -223,8 +223,8 @@ async def cancel_on_completion( async def hard_kill( proc: trio.Process, - terminate_after: int = 1.6, + terminate_after: int = 1.6, # NOTE: for mucking with `.pause()`-ing inside the runtime # whilst also hacking on it XD # terminate_after: int = 99999, diff --git a/tractor/_supervise.py b/tractor/_supervise.py index 8f3574bb..fb737c12 100644 --- a/tractor/_supervise.py +++ b/tractor/_supervise.py @@ -80,6 +80,7 @@ class ActorNursery: ''' def __init__( self, + # TODO: maybe def these as fields of a struct looking type? actor: Actor, ria_nursery: trio.Nursery, da_nursery: trio.Nursery, @@ -88,8 +89,10 @@ class ActorNursery: ) -> None: # self.supervisor = supervisor # TODO self._actor: Actor = actor - self._ria_nursery = ria_nursery + + # TODO: rename to `._tn` for our conventional "task-nursery" self._da_nursery = da_nursery + self._children: dict[ tuple[str, str], tuple[ @@ -98,15 +101,13 @@ class ActorNursery: Portal | None, ] ] = {} - # portals spawned with ``run_in_actor()`` are - # cancelled when their "main" result arrives - self._cancel_after_result_on_exit: set = set() + self.cancelled: bool = False self._join_procs = trio.Event() self._at_least_one_child_in_debug: bool = False self.errors = errors - self.exited = trio.Event() self._scope_error: BaseException|None = None + self.exited = trio.Event() # NOTE: when no explicit call is made to # `.open_root_actor()` by application code, @@ -116,6 +117,13 @@ class ActorNursery: # and syncing purposes to any actor opened nurseries. self._implicit_runtime_started: bool = False + # TODO: remove the `.run_in_actor()` API and thus this 2ndary + # nursery when that API get's moved outside this primitive! + self._ria_nursery = ria_nursery + # portals spawned with ``run_in_actor()`` are + # cancelled when their "main" result arrives + self._cancel_after_result_on_exit: set = set() + async def start_actor( self, name: str, @@ -126,10 +134,14 @@ class ActorNursery: rpc_module_paths: list[str]|None = None, enable_modules: list[str]|None = None, loglevel: str|None = None, # set log level per subactor - nursery: trio.Nursery|None = None, debug_mode: bool|None = None, infect_asyncio: bool = False, + # TODO: ideally we can rm this once we no longer have + # a `._ria_nursery` since the dependent APIs have been + # removed! + nursery: trio.Nursery|None = None, + ) -> Portal: ''' Start a (daemon) actor: an process that has no designated @@ -200,6 +212,7 @@ class ActorNursery: # |_ dynamic @context decoration on child side # |_ implicit `Portal.open_context() as (ctx, first):` # and `return first` on parent side. + # |_ mention how it's similar to `trio-parallel` API? # -[ ] use @api_frame on the wrapper async def run_in_actor( self, @@ -269,11 +282,14 @@ class ActorNursery: ) -> None: ''' - Cancel this nursery by instructing each subactor to cancel - itself and wait for all subactors to terminate. + Cancel this actor-nursery by instructing each subactor's + runtime to cancel and wait for all underlying sub-processes + to terminate. - If ``hard_killl`` is set to ``True`` then kill the processes - directly without any far end graceful ``trio`` cancellation. + If `hard_kill` is set then kill the processes directly using + the spawning-backend's API/OS-machinery without any attempt + at (graceful) `trio`-style cancellation using our + `Actor.cancel()`. ''' __runtimeframe__: int = 1 # noqa @@ -629,8 +645,12 @@ async def open_nursery( f'|_{an}\n' ) - # shutdown runtime if it was started if implicit_runtime: + # shutdown runtime if it was started and report noisly + # that we're did so. msg += '=> Shutting down actor runtime <=\n' + log.info(msg) - log.info(msg) + else: + # keep noise low during std operation. + log.runtime(msg) -- 2.34.1 From 7db5bbffc50206489305815016d8c7fce625c084 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 28 Jun 2024 12:59:31 -0400 Subject: [PATCH 170/190] Always reset `._state._ctxvar_Context` to prior Not sure how I forgot this but, obviously it's correct context-var semantics to revert the current IPC `Context` (set in the latest `.open_context()` block) such that any prior instance is reset.. This ensures the sanity `assert`s pass inside `.msg._ops.maybe_limit_plds()` and just in general ensures for any task that the last opened `Context` is the one returned from `current_ipc_ctx()`. --- tractor/_context.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tractor/_context.py b/tractor/_context.py index 33d8597b..9b4cb6e6 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -38,6 +38,7 @@ from collections import deque from contextlib import ( asynccontextmanager as acm, ) +from contextvars import Token from dataclasses import ( dataclass, field, @@ -1943,7 +1944,7 @@ async def open_context_from_portal( ) assert ctx._remote_func_type == 'context' assert ctx._caller_info - _ctxvar_Context.set(ctx) + prior_ctx_tok: Token = _ctxvar_Context.set(ctx) # placeholder for any exception raised in the runtime # or by user tasks which cause this context's closure. @@ -2394,6 +2395,9 @@ async def open_context_from_portal( None, ) + # XXX revert to prior IPC-task-ctx scope + _ctxvar_Context.reset(prior_ctx_tok) + def mk_context( chan: Channel, -- 2.34.1 From 8ff682440d54b6fb63fa3553148359f1a9e6dee0 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 28 Jun 2024 14:25:53 -0400 Subject: [PATCH 171/190] Further formalize `greenback` integration Since we more or less require it for `tractor.pause_from_sync()` this refines enable toggles and their relay down the actor tree as well as more explicit logging around init and activation. Tweaks summary: - `.info()` report the module if discovered during root boot. - use a `._state._runtime_vars['use_greenback']: bool` activation flag inside `Actor._from_parent()` to determine if the sub should try to use it and set to `False` if mod-loading fails / not installed. - expose `maybe_init_greenback()` from `.devx` sugpkg. - comment out RTE in `._pause()` for now since we already have it in `.pause_from_sync()`. - always `.exception()` on `maybe_init_greenback()` import errors to clarify the underlying failure deats. - always explicitly report if `._state._runtime_vars['use_greenback']` was NOT set when `.pause_from_sync()` is called. Other `._runtime.async_main()` adjustments: - combine the "internal error call ur parents" message and the failed registry contact status into one new `err_report: str`. - drop the final exception handler's call to `Actor.lifetime_stack.close()` since we're already doing it in the `finally:` block and the earlier call has no currently known benefit. - only report on the `.lifetime_stack()` callbacks if any are detected as registered. --- tractor/_root.py | 27 +++++--- tractor/_runtime.py | 137 +++++++++++++++++++++++++++------------ tractor/_state.py | 2 +- tractor/devx/__init__.py | 1 + tractor/devx/_debug.py | 87 ++++++++++++++++--------- 5 files changed, 173 insertions(+), 81 deletions(-) diff --git a/tractor/_root.py b/tractor/_root.py index bbd9d93d..882285a5 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -21,6 +21,7 @@ Root actor runtime ignition(s). from contextlib import asynccontextmanager as acm from functools import partial import importlib +import inspect import logging import os import signal @@ -115,10 +116,16 @@ async def open_root_actor( if ( debug_mode and maybe_enable_greenback - and await _debug.maybe_init_greenback( - raise_not_found=False, + and ( + maybe_mod := await _debug.maybe_init_greenback( + raise_not_found=False, + ) ) ): + logger.info( + f'Found `greenback` installed @ {maybe_mod}\n' + 'Enabling `tractor.pause_from_sync()` support!\n' + ) os.environ['PYTHONBREAKPOINT'] = ( 'tractor.devx._debug._sync_pause_from_builtin' ) @@ -264,7 +271,9 @@ async def open_root_actor( except OSError: # TODO: make this a "discovery" log level? - logger.warning(f'No actor registry found @ {addr}') + logger.info( + f'No actor registry found @ {addr}\n' + ) async with trio.open_nursery() as tn: for addr in registry_addrs: @@ -278,7 +287,6 @@ async def open_root_actor( # Create a new local root-actor instance which IS NOT THE # REGISTRAR if ponged_addrs: - if ensure_registry: raise RuntimeError( f'Failed to open `{name}`@{ponged_addrs}: ' @@ -365,24 +373,25 @@ async def open_root_actor( ) try: yield actor - except ( Exception, BaseExceptionGroup, ) as err: - - import inspect + # XXX NOTE XXX see equiv note inside + # `._runtime.Actor._stream_handler()` where in the + # non-root or root-that-opened-this-mahually case we + # wait for the local actor-nursery to exit before + # exiting the transport channel handler. entered: bool = await _debug._maybe_enter_pm( err, api_frame=inspect.currentframe(), ) - if ( not entered and not is_multi_cancelled(err) ): - logger.exception('Root actor crashed:\n') + logger.exception('Root actor crashed\n') # ALWAYS re-raise any error bubbled up from the # runtime! diff --git a/tractor/_runtime.py b/tractor/_runtime.py index dfa76cd8..efc9be9c 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -1048,6 +1048,10 @@ class Actor: # TODO: another `Struct` for rtvs.. rvs: dict[str, Any] = spawnspec._runtime_vars if rvs['_debug_mode']: + from .devx import ( + enable_stack_on_sig, + maybe_init_greenback, + ) try: # TODO: maybe return some status msgs upward # to that we can emit them in `con_status` @@ -1055,13 +1059,27 @@ class Actor: log.devx( 'Enabling `stackscope` traces on SIGUSR1' ) - from .devx import enable_stack_on_sig enable_stack_on_sig() + except ImportError: log.warning( '`stackscope` not installed for use in debug mode!' ) + if rvs.get('use_greenback', False): + maybe_mod: ModuleType|None = await maybe_init_greenback() + if maybe_mod: + log.devx( + 'Activated `greenback` ' + 'for `tractor.pause_from_sync()` support!' + ) + else: + rvs['use_greenback'] = False + log.warning( + '`greenback` not installed for use in debug mode!\n' + '`tractor.pause_from_sync()` not available!' + ) + rvs['_is_root'] = False _state._runtime_vars.update(rvs) @@ -1720,8 +1738,8 @@ async def async_main( # Register with the arbiter if we're told its addr log.runtime( - f'Registering `{actor.name}` ->\n' - f'{pformat(accept_addrs)}' + f'Registering `{actor.name}` => {pformat(accept_addrs)}\n' + # ^-TODO-^ we should instead show the maddr here^^ ) # TODO: ideally we don't fan out to all registrars @@ -1779,57 +1797,90 @@ async def async_main( # Blocks here as expected until the root nursery is # killed (i.e. this actor is cancelled or signalled by the parent) - except Exception as err: - log.runtime("Closing all actor lifetime contexts") - actor.lifetime_stack.close() - + except Exception as internal_err: if not is_registered: + err_report: str = ( + '\n' + "Actor runtime (internally) failed BEFORE contacting the registry?\n" + f'registrars -> {actor.reg_addrs} ?!?!\n\n' + + '^^^ THIS IS PROBABLY AN INTERNAL `tractor` BUG! ^^^\n\n' + '\t>> CALMLY CANCEL YOUR CHILDREN AND CALL YOUR PARENTS <<\n\n' + + '\tIf this is a sub-actor hopefully its parent will keep running ' + 'and cancel/reap this sub-process..\n' + '(well, presuming this error was propagated upward)\n\n' + + '\t---------------------------------------------\n' + '\tPLEASE REPORT THIS TRACEBACK IN A BUG REPORT @ ' # oneline + 'https://github.com/goodboy/tractor/issues\n' + '\t---------------------------------------------\n' + ) + # TODO: I guess we could try to connect back # to the parent through a channel and engage a debugger # once we have that all working with std streams locking? - log.exception( - f"Actor errored and failed to register with arbiter " - f"@ {actor.reg_addrs[0]}?") - log.error( - "\n\n\t^^^ THIS IS PROBABLY AN INTERNAL `tractor` BUG! ^^^\n\n" - "\t>> CALMLY CALL THE AUTHORITIES AND HIDE YOUR CHILDREN <<\n\n" - "\tIf this is a sub-actor hopefully its parent will keep running " - "correctly presuming this error was safely ignored..\n\n" - "\tPLEASE REPORT THIS TRACEBACK IN A BUG REPORT: " - "https://github.com/goodboy/tractor/issues\n" - ) + log.exception(err_report) if actor._parent_chan: await try_ship_error_to_remote( actor._parent_chan, - err, + internal_err, ) # always! - match err: + match internal_err: case ContextCancelled(): log.cancel( f'Actor: {actor.uid} was task-context-cancelled with,\n' - f'str(err)' + f'str(internal_err)' ) case _: - log.exception("Actor errored:") - raise + log.exception( + 'Main actor-runtime task errored\n' + f' Closing all actor lifetime contexts..' + teardown_report: str = ( + 'Main actor-runtime task completed\n' ) - # tear down all lifetime contexts if not in guest mode - # XXX: should this just be in the entrypoint? - actor.lifetime_stack.close() - # TODO: we can't actually do this bc the debugger - # uses the _service_n to spawn the lock task, BUT, - # in theory if we had the root nursery surround this finally - # block it might be actually possible to debug THIS - # machinery in the same way as user task code? + # ?TODO? should this be in `._entry`/`._root` mods instead? + # + # teardown any actor-lifetime-bound contexts + ls: ExitStack = actor.lifetime_stack + # only report if there are any registered + cbs: list[Callable] = [ + repr(tup[1].__wrapped__) + for tup in ls._exit_callbacks + ] + if cbs: + cbs_str: str = '\n'.join(cbs) + teardown_report += ( + '-> Closing actor-lifetime-bound callbacks\n\n' + f'}}>\n' + f' |_{ls}\n' + f' |_{cbs_str}\n' + ) + # XXX NOTE XXX this will cause an error which + # prevents any `infected_aio` actor from continuing + # and any callbacks in the `ls` here WILL NOT be + # called!! + # await _debug.pause(shield=True) + + ls.close() + + # XXX TODO but hard XXX + # we can't actually do this bc the debugger uses the + # _service_n to spawn the lock task, BUT, in theory if we had + # the root nursery surround this finally block it might be + # actually possible to debug THIS machinery in the same way + # as user task code? + # # if actor.name == 'brokerd.ib': # with CancelScope(shield=True): # await _debug.breakpoint() @@ -1859,9 +1910,9 @@ async def async_main( failed = True if failed: - log.warning( - f'Failed to unregister {actor.name} from ' - f'registar @ {addr}' + teardown_report += ( + f'-> Failed to unregister {actor.name} from ' + f'registar @ {addr}\n' ) # Ensure all peers (actors connected to us as clients) are finished @@ -1869,13 +1920,17 @@ async def async_main( if any( chan.connected() for chan in chain(*actor._peers.values()) ): - log.runtime( - f"Waiting for remaining peers {actor._peers} to clear") + teardown_report += ( + f'-> Waiting for remaining peers {actor._peers} to clear..\n' + ) + log.runtime(teardown_report) with CancelScope(shield=True): await actor._no_more_peers.wait() - log.runtime("All peer channels are complete") - log.runtime("Runtime completed") + teardown_report += ('-> All peer channels are complete\n') + + teardown_report += ('Actor runtime exited') + log.info(teardown_report) # TODO: rename to `Registry` and move to `._discovery`! diff --git a/tractor/_state.py b/tractor/_state.py index 8c5cca14..9f896005 100644 --- a/tractor/_state.py +++ b/tractor/_state.py @@ -44,7 +44,7 @@ _runtime_vars: dict[str, Any] = { '_root_mailbox': (None, None), '_registry_addrs': [], - # for `breakpoint()` support + # for `tractor.pause_from_sync()` & `breakpoint()` support 'use_greenback': False, } diff --git a/tractor/devx/__init__.py b/tractor/devx/__init__.py index bb72d1f9..cfcff931 100644 --- a/tractor/devx/__init__.py +++ b/tractor/devx/__init__.py @@ -29,6 +29,7 @@ from ._debug import ( shield_sigint_handler as shield_sigint_handler, open_crash_handler as open_crash_handler, maybe_open_crash_handler as maybe_open_crash_handler, + maybe_init_greenback as maybe_init_greenback, post_mortem as post_mortem, mk_pdb as mk_pdb, ) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index ccf57d62..1135932c 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -69,6 +69,7 @@ from trio import ( import tractor from tractor.log import get_logger from tractor._context import Context +from tractor import _state from tractor._state import ( current_actor, is_root_process, @@ -87,9 +88,6 @@ if TYPE_CHECKING: from tractor._runtime import ( Actor, ) - from tractor.msg import ( - _codec, - ) log = get_logger(__name__) @@ -1599,12 +1597,16 @@ async def _pause( try: task: Task = current_task() except RuntimeError as rte: - log.exception('Failed to get current task?') - if actor.is_infected_aio(): - raise RuntimeError( - '`tractor.pause[_from_sync]()` not yet supported ' - 'for infected `asyncio` mode!' - ) from rte + __tracebackhide__: bool = False + log.exception( + 'Failed to get current `trio`-task?' + ) + # if actor.is_infected_aio(): + # mk_pdb().set_trace() + # raise RuntimeError( + # '`tractor.pause[_from_sync]()` not yet supported ' + # 'directly (infected) `asyncio` tasks!' + # ) from rte raise @@ -2163,22 +2165,22 @@ def maybe_import_greenback( return False -async def maybe_init_greenback( - **kwargs, -) -> None|ModuleType: - - if mod := maybe_import_greenback(**kwargs): - await mod.ensure_portal() - log.devx( - '`greenback` portal opened!\n' - 'Sync debug support activated!\n' - ) - return mod +async def maybe_init_greenback(**kwargs) -> None|ModuleType: + try: + if mod := maybe_import_greenback(**kwargs): + await mod.ensure_portal() + log.devx( + '`greenback` portal opened!\n' + 'Sync debug support activated!\n' + ) + return mod + except BaseException: + log.exception('Failed to init `greenback`..') + raise return None - async def _pause_from_bg_root_thread( behalf_of_thread: Thread, repl: PdbREPL, @@ -2324,6 +2326,12 @@ def pause_from_sync( # TODO: once supported, remove this AND the one # inside `._pause()`! + # outstanding impl fixes: + # -[ ] need to make `.shield_sigint()` below work here! + # -[ ] how to handle `asyncio`'s new SIGINT-handler + # injection? + # -[ ] should `breakpoint()` work and what does it normally + # do in `asyncio` ctxs? if actor.is_infected_aio(): raise RuntimeError( '`tractor.pause[_from_sync]()` not yet supported ' @@ -2399,18 +2407,37 @@ def pause_from_sync( else: # we are presumably the `trio.run()` + main thread # raises on not-found by default greenback: ModuleType = maybe_import_greenback() + + # TODO: how to ensure this is either dynamically (if + # needed) called here (in some bg tn??) or that the + # subactor always already called it? + # greenback: ModuleType = await maybe_init_greenback() + message += f'-> imported {greenback}\n' repl_owner: Task = current_task() message += '-> calling `greenback.await_(_pause(debug_func=None))` from sync caller..\n' - out = greenback.await_( - _pause( - debug_func=None, - repl=repl, - hide_tb=hide_tb, - called_from_sync=True, - **_pause_kwargs, + try: + out = greenback.await_( + _pause( + debug_func=None, + repl=repl, + hide_tb=hide_tb, + called_from_sync=True, + **_pause_kwargs, + ) ) - ) + except RuntimeError as rte: + if not _state._runtime_vars.get( + 'use_greenback', + False, + ): + raise RuntimeError( + '`greenback` was never initialized in this actor!?\n\n' + f'{_state._runtime_vars}\n' + ) from rte + + raise + if out: bg_task, repl = out assert repl is repl @@ -2801,10 +2828,10 @@ def open_crash_handler( `trio.run()`. ''' + err: BaseException try: yield except tuple(catch) as err: - if type(err) not in ignore: pdbp.xpm() -- 2.34.1 From c7f153c266ffd69eb954a4aab441c4383edf583f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 28 Jun 2024 14:46:29 -0400 Subject: [PATCH 172/190] Update `MsgTypeError` content matching to latest --- tests/test_pldrx_limiting.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/test_pldrx_limiting.py b/tests/test_pldrx_limiting.py index e5ce691a..bb9a3ef7 100644 --- a/tests/test_pldrx_limiting.py +++ b/tests/test_pldrx_limiting.py @@ -285,14 +285,14 @@ def test_basic_payload_spec( if invalid_started: msg_type_str: str = 'Started' - bad_value_str: str = '10' + bad_value: int = 10 elif invalid_return: msg_type_str: str = 'Return' - bad_value_str: str = "'yo'" + bad_value: str = 'yo' else: # XXX but should never be used below then.. msg_type_str: str = '' - bad_value_str: str = '' + bad_value: str = '' maybe_mte: MsgTypeError|None = None should_raise: Exception|None = ( @@ -307,8 +307,10 @@ def test_basic_payload_spec( raises=should_raise, ensure_in_message=[ f"invalid `{msg_type_str}` msg payload", - f"value: `{bad_value_str}` does not " - f"match type-spec: `{msg_type_str}.pld: PldMsg|NoneType`", + f'{bad_value}', + f'has type {type(bad_value)!r}', + 'not match type-spec', + f'`{msg_type_str}.pld: PldMsg|NoneType`', ], # only for debug # post_mortem=True, -- 2.34.1 From 96960982ffd04392f0fd984085b8942f73c91c39 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 28 Jun 2024 18:45:52 -0400 Subject: [PATCH 173/190] Update `._entry` actor status log Log-report the different types of actor exit conditions including cancel via KBI, error or normal return with varying levels depending on case. Also, start proto-ing out this weird ascii-syntax idea for describing conc system states and implement the first bit in a `nest_from_op()` log-message fmter that joins and indents an obj `repr()` with a tree-like `'>)\n|_'` header. --- tractor/_entry.py | 160 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 141 insertions(+), 19 deletions(-) diff --git a/tractor/_entry.py b/tractor/_entry.py index e22a4f1f..60050ea4 100644 --- a/tractor/_entry.py +++ b/tractor/_entry.py @@ -20,7 +20,8 @@ Sub-process entry points. """ from __future__ import annotations from functools import partial -# import textwrap +import os +import textwrap from typing import ( Any, TYPE_CHECKING, @@ -58,7 +59,7 @@ def _mp_main( ) -> None: ''' - The routine called *after fork* which invokes a fresh ``trio.run`` + The routine called *after fork* which invokes a fresh `trio.run()` ''' actor._forkserver_info = forkserver_info @@ -96,6 +97,107 @@ def _mp_main( log.info(f"Subactor {actor.uid} terminated") +# TODO: move this func to some kinda `.devx._conc_lang.py` eventually +# as we work out our multi-domain state-flow-syntax! +def nest_from_op( + input_op: str, + # + # ?TODO? an idea for a syntax to the state of concurrent systems + # as a "3-domain" (execution, scope, storage) model and using + # a minimal ascii/utf-8 operator-set. + # + # try not to take any of this seriously yet XD + # + # > is a "play operator" indicating (CPU bound) + # exec/work/ops required at the "lowest level computing" + # + # execution primititves (tasks, threads, actors..) denote their + # lifetime with '(' and ')' since parentheses normally are used + # in many langs to denote function calls. + # + # starting = ( + # >( opening/starting; beginning of the thread-of-exec (toe?) + # (> opened/started, (finished spawning toe) + # |_ repr of toe, in py these look like + # + # >) closing/exiting/stopping, + # )> closed/exited/stopped, + # |_ + # [OR <), )< ?? ] + # + # ending = ) + # >c) cancelling to close/exit + # c)> cancelled (caused close), OR? + # |_ + # OR maybe "x) erroring to eventuall exit + # x)> errored and terminated + # |_ + # + # scopes: supers/nurseries, IPC-ctxs, sessions, perms, etc. + # >{ opening + # {> opened + # }> closed + # >} closing + # + # storage: like queues, shm-buffers, files, etc.. + # >[ opening + # [> opened + # |_ + # + # >] closing + # ]> closed + + # IPC ops: channels, transports, msging + # => req msg + # <= resp msg + # <=> 2-way streaming (of msgs) + # <- recv 1 msg + # -> send 1 msg + # + # TODO: still not sure on R/L-HS approach..? + # =>( send-req to exec start (task, actor, thread..) + # (<= recv-req to ^ + # + # (<= recv-req ^ + # <=( recv-resp opened remote exec primitive + # <=) recv-resp closed + # + # )<=c req to stop due to cancel + # c=>) req to stop due to cancel + # + # =>{ recv-req to open + # <={ send-status that it closed + + tree_str: str, + + # NOTE: so move back-from-the-left of the `input_op` by + # this amount. + back_from_op: int = 0, +) -> str: + ''' + Depth-increment the input (presumably hierarchy/supervision) + input "tree string" below the provided `input_op` execution + operator, so injecting a `"\n|_{input_op}\n"`and indenting the + `tree_str` to nest content aligned with the ops last char. + + ''' + return ( + f'{input_op}\n' + + + textwrap.indent( + tree_str, + prefix=( + len(input_op) + - + (back_from_op + 1) + ) * ' ', + ) + ) + + def _trio_main( actor: Actor, *, @@ -107,7 +209,6 @@ def _trio_main( Entry point for a `trio_run_in_process` subactor. ''' - # __tracebackhide__: bool = True _debug.hide_runtime_frames() _state._current_actor = actor @@ -119,7 +220,6 @@ def _trio_main( if actor.loglevel is not None: get_console_log(actor.loglevel) - import os actor_info: str = ( f'|_{actor}\n' f' uid: {actor.uid}\n' @@ -128,13 +228,23 @@ def _trio_main( f' loglevel: {actor.loglevel}\n' ) log.info( - 'Started new trio subactor:\n' + 'Starting new `trio` subactor:\n' + - '>\n' # like a "started/play"-icon from super perspective - + - actor_info, + nest_from_op( + input_op='>(', # see syntax ideas above + tree_str=actor_info, + back_from_op=1, + ) ) - + logmeth = log.info + exit_status: str = ( + 'Subactor exited\n' + + + nest_from_op( + input_op=')>', # like a "closed-to-play"-icon from super perspective + tree_str=actor_info, + ) + ) try: if infect_asyncio: actor._infected_aio = True @@ -143,16 +253,28 @@ def _trio_main( trio.run(trio_main) except KeyboardInterrupt: - log.cancel( - 'Actor received KBI\n' + logmeth = log.cancel + exit_status: str = ( + 'Actor received KBI (aka an OS-cancel)\n' + - actor_info + nest_from_op( + input_op='c)>', # closed due to cancel (see above) + tree_str=actor_info, + ) ) + except BaseException as err: + logmeth = log.error + exit_status: str = ( + 'Main actor task crashed during exit?\n' + + + nest_from_op( + input_op='x)>', # closed by error + tree_str=actor_info, + ) + ) + # NOTE since we raise a tb will already be shown on the + # console, thus we do NOT use `.exception()` above. + raise err + finally: - log.info( - 'Subactor terminated\n' - + - 'x\n' # like a "crossed-out/killed" from super perspective - + - actor_info - ) + logmeth(exit_status) -- 2.34.1 From 0263599cef7186b37dfc153d47f448d4fd056da0 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 28 Jun 2024 19:06:17 -0400 Subject: [PATCH 174/190] Don't use pretty struct stuff in `._invoke` It's too fragile to put in side core RPC machinery since `msgspec.Struct` defs can fail if a field type can't be looked up at creation time (like can easily happen if you conditionally import using `if TYPE_CHECKING:`) Also, - rename `cs` to `rpc_ctx_cs: CancelScope` since it's literally the wrapping RPC `Context._scope`. - report self cancellation via `explain: str` and add tail case for "unknown cause". - put a ?TODO? around what to do about KBIs if a context is opened from an `infected_aio`-actor task. - similar to our nursery and portal add TODO list for moving all `_invoke_non_context()` content out the RPC core and instead implement them as `.hilevel` endpoint helpers (maybe as decorators?)which under neath define `@context`-funcs. --- tractor/_rpc.py | 70 +++++++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 37fea079..09d203b7 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -67,7 +67,7 @@ from .msg import ( MsgCodec, PayloadT, NamespacePath, - pretty_struct, + # pretty_struct, _ops as msgops, ) from tractor.msg.types import ( @@ -88,6 +88,16 @@ if TYPE_CHECKING: log = get_logger('tractor') +# ?TODO? move to a `tractor.lowlevel._rpc` with the below +# func-type-cases implemented "on top of" `@context` defs: +# -[ ] std async func helper decorated with `@rpc_func`? +# -[ ] `Portal.open_stream_from()` with async-gens? +# |_ possibly a duplex form of this with a +# `sent_from_peer = yield send_to_peer` form, which would require +# syncing the send/recv side with possibly `.receive_nowait()` +# on each `yield`? +# -[ ] some kinda `@rpc_acm` maybe that does a fixture style with +# user only defining a single-`yield` generator-func? async def _invoke_non_context( actor: Actor, cancel_scope: CancelScope, @@ -107,8 +117,9 @@ async def _invoke_non_context( ] = trio.TASK_STATUS_IGNORED, ): __tracebackhide__: bool = True + cs: CancelScope|None = None # ref when activated - # TODO: can we unify this with the `context=True` impl below? + # ?TODO? can we unify this with the `context=True` impl below? if inspect.isasyncgen(coro): await chan.send( StartAck( @@ -159,10 +170,6 @@ async def _invoke_non_context( functype='asyncgen', ) ) - # XXX: the async-func may spawn further tasks which push - # back values like an async-generator would but must - # manualy construct the response dict-packet-responses as - # above with cancel_scope as cs: ctx._scope = cs task_status.started(ctx) @@ -174,15 +181,13 @@ async def _invoke_non_context( await chan.send( Stop(cid=cid) ) + + # simplest function/method request-response pattern + # XXX: in the most minimally used case, just a scheduled internal runtime + # call to `Actor._cancel_task()` from the ctx-peer task since we + # don't (yet) have a dedicated IPC msg. + # ------ - ------ else: - # regular async function/method - # XXX: possibly just a scheduled `Actor._cancel_task()` - # from a remote request to cancel some `Context`. - # ------ - ------ - # TODO: ideally we unify this with the above `context=True` - # block such that for any remote invocation ftype, we - # always invoke the far end RPC task scheduling the same - # way: using the linked IPC context machinery. failed_resp: bool = False try: ack = StartAck( @@ -353,8 +358,15 @@ async def _errors_relayed_via_ipc( # channel. task_status.started(err) - # always reraise KBIs so they propagate at the sys-process level. - if isinstance(err, KeyboardInterrupt): + # always propagate KBIs at the sys-process level. + if ( + isinstance(err, KeyboardInterrupt) + + # ?TODO? except when running in asyncio mode? + # |_ wut if you want to open a `@context` FROM an + # infected_aio task? + # and not actor.is_infected_aio() + ): raise # RPC task bookeeping. @@ -457,7 +469,6 @@ async def _invoke( # tb: TracebackType = None cancel_scope = CancelScope() - cs: CancelScope|None = None # ref when activated ctx = actor.get_context( chan=chan, cid=cid, @@ -606,6 +617,8 @@ async def _invoke( # `@context` marked RPC function. # - `._portal` is never set. try: + tn: trio.Nursery + rpc_ctx_cs: CancelScope async with ( trio.open_nursery() as tn, msgops.maybe_limit_plds( @@ -615,7 +628,7 @@ async def _invoke( ), ): ctx._scope_nursery = tn - ctx._scope = tn.cancel_scope + rpc_ctx_cs = ctx._scope = tn.cancel_scope task_status.started(ctx) # TODO: better `trionics` tooling: @@ -641,7 +654,7 @@ async def _invoke( # itself calls `ctx._maybe_cancel_and_set_remote_error()` # which cancels the scope presuming the input error # is not a `.cancel_acked` pleaser. - if ctx._scope.cancelled_caught: + if rpc_ctx_cs.cancelled_caught: our_uid: tuple = actor.uid # first check for and raise any remote error @@ -651,9 +664,7 @@ async def _invoke( if re := ctx._remote_error: ctx._maybe_raise_remote_err(re) - cs: CancelScope = ctx._scope - - if cs.cancel_called: + if rpc_ctx_cs.cancel_called: canceller: tuple = ctx.canceller explain: str = f'{ctx.side!r}-side task was cancelled by ' @@ -679,9 +690,15 @@ async def _invoke( elif canceller == ctx.chan.uid: explain += f'its {ctx.peer_side!r}-side peer' - else: + elif canceller == our_uid: + explain += 'itself' + + elif canceller: explain += 'a remote peer' + else: + explain += 'an unknown cause?' + explain += ( add_div(message=explain) + @@ -910,7 +927,10 @@ async def process_messages( f'IPC msg from peer\n' f'<= {chan.uid}\n\n' - # TODO: avoid fmting depending on loglevel for perf? + # TODO: use of the pprinting of structs is + # FRAGILE and should prolly not be + # + # avoid fmting depending on loglevel for perf? # -[ ] specifically `pretty_struct.pformat()` sub-call..? # - how to only log-level-aware actually call this? # -[ ] use `.msg.pretty_struct` here now instead! @@ -1237,7 +1257,7 @@ async def process_messages( 'Exiting IPC msg loop with final msg\n\n' f'<= peer: {chan.uid}\n' f' |_{chan}\n\n' - f'{pretty_struct.pformat(msg)}' + # f'{pretty_struct.pformat(msg)}' ) log.runtime(message) -- 2.34.1 From 90bd757b481974e7c8863342974876136fc9c676 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 28 Jun 2024 19:17:05 -0400 Subject: [PATCH 175/190] Use `msgspec.Struct.__repr__()` failover impl In case the struct doesn't import a field type (which will cause the `.pformat()` to raise) just report the issue and try to fall back to the original `repr()` version. --- tractor/msg/pretty_struct.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tractor/msg/pretty_struct.py b/tractor/msg/pretty_struct.py index f27fb89c..15e469e2 100644 --- a/tractor/msg/pretty_struct.py +++ b/tractor/msg/pretty_struct.py @@ -34,6 +34,9 @@ from pprint import ( saferepr, ) +from tractor.log import get_logger + +log = get_logger() # TODO: auto-gen type sig for input func both for # type-msgs and logging of RPC tasks? # taken and modified from: @@ -143,7 +146,13 @@ def pformat( else: # the `pprint` recursion-safe format: # https://docs.python.org/3.11/library/pprint.html#pprint.saferepr - val_str: str = saferepr(v) + try: + val_str: str = saferepr(v) + except Exception: + log.exception( + 'Failed to `saferepr({type(struct)})` !?\n' + ) + return _Struct.__repr__(struct) # TODO: LOLOL use `textwrap.indent()` instead dawwwwwg! obj_str += (field_ws + f'{k}: {typ_name} = {val_str},\n') @@ -194,12 +203,20 @@ class Struct( return sin_props pformat = pformat + # __repr__ = pformat # __str__ = __repr__ = pformat # TODO: use a pprint.PrettyPrinter instance around ONLY rendering # inside a known tty? # def __repr__(self) -> str: # ... - __repr__ = pformat + def __repr__(self) -> str: + try: + return pformat(self) + except Exception: + log.exception( + f'Failed to `pformat({type(self)})` !?\n' + ) + return _Struct.__repr__(self) def copy( self, -- 2.34.1 From 0332604044393efc0ef1e7bdff8db9c784d72f12 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 28 Jun 2024 19:24:03 -0400 Subject: [PATCH 176/190] (Re)type annot some tests - For the (still not finished) `test_caps_based_msging`, switch to using the new `PayloadMsg`. - add `testdir` fixture type. --- tests/test_caps_based_msging.py | 45 ++++++++++++--------------------- tests/test_docs_examples.py | 2 +- 2 files changed, 17 insertions(+), 30 deletions(-) diff --git a/tests/test_caps_based_msging.py b/tests/test_caps_based_msging.py index 9a73ba8d..6064c2cf 100644 --- a/tests/test_caps_based_msging.py +++ b/tests/test_caps_based_msging.py @@ -11,9 +11,6 @@ from typing import ( Type, Union, ) -from contextvars import ( - Context, -) from msgspec import ( structs, @@ -27,6 +24,7 @@ import tractor from tractor import ( _state, MsgTypeError, + Context, ) from tractor.msg import ( _codec, @@ -41,7 +39,7 @@ from tractor.msg import ( from tractor.msg.types import ( _payload_msgs, log, - Msg, + PayloadMsg, Started, mk_msg_spec, ) @@ -61,7 +59,7 @@ def mk_custom_codec( uid: tuple[str, str] = tractor.current_actor().uid # XXX NOTE XXX: despite defining `NamespacePath` as a type - # field on our `Msg.pld`, we still need a enc/dec_hook() pair + # field on our `PayloadMsg.pld`, we still need a enc/dec_hook() pair # to cast to/from that type on the wire. See the docs: # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types @@ -321,12 +319,12 @@ def dec_type_union( import importlib types: list[Type] = [] for type_name in type_names: - for ns in [ + for mod in [ typing, importlib.import_module(__name__), ]: if type_ref := getattr( - ns, + mod, type_name, False, ): @@ -744,7 +742,7 @@ def chk_pld_type( # 'Error', .pld: ErrorData codec: MsgCodec = mk_codec( - # NOTE: this ONLY accepts `Msg.pld` fields of a specified + # NOTE: this ONLY accepts `PayloadMsg.pld` fields of a specified # type union. ipc_pld_spec=payload_spec, ) @@ -752,7 +750,7 @@ def chk_pld_type( # make a one-off dec to compare with our `MsgCodec` instance # which does the below `mk_msg_spec()` call internally ipc_msg_spec: Union[Type[Struct]] - msg_types: list[Msg[payload_spec]] + msg_types: list[PayloadMsg[payload_spec]] ( ipc_msg_spec, msg_types, @@ -761,7 +759,7 @@ def chk_pld_type( ) _enc = msgpack.Encoder() _dec = msgpack.Decoder( - type=ipc_msg_spec or Any, # like `Msg[Any]` + type=ipc_msg_spec or Any, # like `PayloadMsg[Any]` ) assert ( @@ -806,7 +804,7 @@ def chk_pld_type( 'cid': '666', 'pld': pld, } - enc_msg: Msg = typedef(**kwargs) + enc_msg: PayloadMsg = typedef(**kwargs) _wire_bytes: bytes = _enc.encode(enc_msg) wire_bytes: bytes = codec.enc.encode(enc_msg) @@ -883,25 +881,16 @@ def test_limit_msgspec(): debug_mode=True ): - # ensure we can round-trip a boxing `Msg` + # ensure we can round-trip a boxing `PayloadMsg` assert chk_pld_type( - # Msg, - Any, - None, + payload_spec=Any, + pld=None, expect_roundtrip=True, ) - # TODO: don't need this any more right since - # `msgspec>=0.15` has the nice generics stuff yah?? - # - # manually override the type annot of the payload - # field and ensure it propagates to all msg-subtypes. - # Msg.__annotations__['pld'] = Any - # verify that a mis-typed payload value won't decode assert not chk_pld_type( - # Msg, - int, + payload_spec=int, pld='doggy', ) @@ -913,18 +902,16 @@ def test_limit_msgspec(): value: Any assert not chk_pld_type( - # Msg, - CustomPayload, + payload_spec=CustomPayload, pld='doggy', ) assert chk_pld_type( - # Msg, - CustomPayload, + payload_spec=CustomPayload, pld=CustomPayload(name='doggy', value='urmom') ) - # uhh bc we can `.pause_from_sync()` now! :surfer: + # yah, we can `.pause_from_sync()` now! # breakpoint() trio.run(main) diff --git a/tests/test_docs_examples.py b/tests/test_docs_examples.py index 63ad07a2..fdf54bca 100644 --- a/tests/test_docs_examples.py +++ b/tests/test_docs_examples.py @@ -19,7 +19,7 @@ from tractor._testing import ( @pytest.fixture def run_example_in_subproc( loglevel: str, - testdir, + testdir: pytest.Testdir, reg_addr: tuple[str, int], ): -- 2.34.1 From a796fb7103ddebed44ad9e293c5224b2e38fdaa2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 28 Jun 2024 19:27:13 -0400 Subject: [PATCH 177/190] Woops, set `.cancel()` level in custom levels table.. --- tractor/log.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tractor/log.py b/tractor/log.py index edb058e3..47f1f259 100644 --- a/tractor/log.py +++ b/tractor/log.py @@ -54,11 +54,12 @@ LOG_FORMAT = ( DATE_FORMAT = '%b %d %H:%M:%S' # FYI, ERROR is 40 +# TODO: use a `bidict` to avoid the :155 check? CUSTOM_LEVELS: dict[str, int] = { 'TRANSPORT': 5, 'RUNTIME': 15, 'DEVX': 17, - 'CANCEL': 18, + 'CANCEL': 22, 'PDB': 500, } STD_PALETTE = { @@ -147,6 +148,8 @@ class StackLevelAdapter(LoggerAdapter): Delegate a log call to the underlying logger, after adding contextual information from this adapter instance. + NOTE: all custom level methods (above) delegate to this! + ''' if self.isEnabledFor(level): stacklevel: int = 3 -- 2.34.1 From 6cd19c408e37024240f5449f4c8ddde957562934 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 28 Jun 2024 19:28:12 -0400 Subject: [PATCH 178/190] Refine some `.trionics` docs and logging - allow passing and report the lib name (`trio` or `tractor`) from `maybe_open_nursery()`. - use `.runtime()` level when reporting `_Cache`-hits in `maybe_open_context()`. - tidy up some doc strings. --- tractor/trionics/_broadcast.py | 9 +++--- tractor/trionics/_mngrs.py | 53 ++++++++++++++++++++++------------ 2 files changed, 39 insertions(+), 23 deletions(-) diff --git a/tractor/trionics/_broadcast.py b/tractor/trionics/_broadcast.py index a5d31871..977b6828 100644 --- a/tractor/trionics/_broadcast.py +++ b/tractor/trionics/_broadcast.py @@ -156,11 +156,12 @@ class BroadcastState(Struct): class BroadcastReceiver(ReceiveChannel): ''' - A memory receive channel broadcaster which is non-lossy for the - fastest consumer. + A memory receive channel broadcaster which is non-lossy for + the fastest consumer. - Additional consumer tasks can receive all produced values by registering - with ``.subscribe()`` and receiving from the new instance it delivers. + Additional consumer tasks can receive all produced values by + registering with ``.subscribe()`` and receiving from the new + instance it delivers. ''' def __init__( diff --git a/tractor/trionics/_mngrs.py b/tractor/trionics/_mngrs.py index 08e70ad2..fd224d65 100644 --- a/tractor/trionics/_mngrs.py +++ b/tractor/trionics/_mngrs.py @@ -18,8 +18,12 @@ Async context manager primitives with hard ``trio``-aware semantics ''' -from contextlib import asynccontextmanager as acm +from __future__ import annotations +from contextlib import ( + asynccontextmanager as acm, +) import inspect +from types import ModuleType from typing import ( Any, AsyncContextManager, @@ -30,13 +34,16 @@ from typing import ( Optional, Sequence, TypeVar, + TYPE_CHECKING, ) import trio - from tractor._state import current_actor from tractor.log import get_logger +if TYPE_CHECKING: + from tractor import ActorNursery + log = get_logger(__name__) @@ -46,8 +53,10 @@ T = TypeVar("T") @acm async def maybe_open_nursery( - nursery: trio.Nursery | None = None, + nursery: trio.Nursery|ActorNursery|None = None, shield: bool = False, + lib: ModuleType = trio, + ) -> AsyncGenerator[trio.Nursery, Any]: ''' Create a new nursery if None provided. @@ -58,13 +67,12 @@ async def maybe_open_nursery( if nursery is not None: yield nursery else: - async with trio.open_nursery() as nursery: + async with lib.open_nursery() as nursery: nursery.cancel_scope.shield = shield yield nursery async def _enter_and_wait( - mngr: AsyncContextManager[T], unwrapped: dict[int, T], all_entered: trio.Event, @@ -91,7 +99,6 @@ async def _enter_and_wait( @acm async def gather_contexts( - mngrs: Sequence[AsyncContextManager[T]], ) -> AsyncGenerator[ @@ -102,15 +109,17 @@ async def gather_contexts( None, ]: ''' - Concurrently enter a sequence of async context managers, each in - a separate ``trio`` task and deliver the unwrapped values in the - same order once all managers have entered. On exit all contexts are - subsequently and concurrently exited. + Concurrently enter a sequence of async context managers (acms), + each from a separate `trio` task and deliver the unwrapped + `yield`-ed values in the same order once all managers have entered. - This function is somewhat similar to common usage of - ``contextlib.AsyncExitStack.enter_async_context()`` (in a loop) in - combo with ``asyncio.gather()`` except the managers are concurrently - entered and exited, and cancellation just works. + On exit, all acms are subsequently and concurrently exited. + + This function is somewhat similar to a batch of non-blocking + calls to `contextlib.AsyncExitStack.enter_async_context()` + (inside a loop) *in combo with* a `asyncio.gather()` to get the + `.__aenter__()`-ed values, except the managers are both + concurrently entered and exited and *cancellation just works*(R). ''' seed: int = id(mngrs) @@ -210,9 +219,10 @@ async def maybe_open_context( ) -> AsyncIterator[tuple[bool, T]]: ''' - Maybe open a context manager if there is not already a _Cached - version for the provided ``key`` for *this* actor. Return the - _Cached instance on a _Cache hit. + Maybe open an async-context-manager (acm) if there is not already + a `_Cached` version for the provided (input) `key` for *this* actor. + + Return the `_Cached` instance on a _Cache hit. ''' fid = id(acm_func) @@ -273,8 +283,13 @@ async def maybe_open_context( else: _Cache.users += 1 log.runtime( - f'Reusing resource for `_Cache` user {_Cache.users}\n\n' - f'{ctx_key!r} -> {yielded!r}\n' + f'Re-using cached resource for user {_Cache.users}\n\n' + f'{ctx_key!r} -> {type(yielded)}\n' + + # TODO: make this work with values but without + # `msgspec.Struct` causing frickin crashes on field-type + # lookups.. + # f'{ctx_key!r} -> {yielded!r}\n' ) lock.release() yield True, yielded -- 2.34.1 From 71e8d466ae66a8ae40ebfc73616703bcaf6b15e9 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Jul 2024 12:21:26 -0400 Subject: [PATCH 179/190] More formal `TransportClosed` reporting/raising Since it was all ad-hoc defined inside `._ipc.MsgpackTCPStream._iter_pkts()` more or less, this starts formalizing a way for particular transport backends to indicate whether a disconnect condition should be re-raised in the RPC msg loop and if not what log level to report it at (if any). Based on our lone transport currently we try to suppress any logging noise from ephemeral connections expected during normal actor interaction and discovery subsys ops: - any short lived discovery related TCP connects are only logged as `.transport()` level. - both `.error()` and raise on any underlying `trio.ClosedResource` cause since that normally means some task touched transport layer internals that it shouldn't have. - do a `.warning()` on anything else unexpected. Impl deats: - extend the `._exceptions.TransportClosed` to accept an input log level, raise-on-report toggle and custom reporting & raising via a new `.report_n_maybe_raise()` method. - construct the TCs with inputs per case in (the newly named) `._iter_pkts(). - call ^ this method from the `TransportClosed` handler block inside the RPC msg loop thus delegating reporting levels and/or raising to the backend's per-case TC instantiating. Related `._ipc` changes: - mask out all the `MsgpackTCPStream._codec` debug helper stuff and drop any lingering cruft from the initial proto-ing of msg-codecs. - rename some attrs/methods: |_`MsgpackTCPStream._iter_packets()` -> `._iter_pkts()` and `._agen` -> `_aiter_pkts`. |_`Channel._aiter_recv()` -> `._aiter_msgs()` and `._agen` -> `_aiter_msgs`. - add `hide_tb: bool` support to `Channel.send()` and only show the frame on non-MTEs. --- tractor/__init__.py | 1 + tractor/_exceptions.py | 55 +++++++- tractor/_ipc.py | 284 +++++++++++++++++++++++++++-------------- tractor/_rpc.py | 20 ++- 4 files changed, 259 insertions(+), 101 deletions(-) diff --git a/tractor/__init__.py b/tractor/__init__.py index c15a391b..4f9e592d 100644 --- a/tractor/__init__.py +++ b/tractor/__init__.py @@ -50,6 +50,7 @@ from ._exceptions import ( ModuleNotExposed as ModuleNotExposed, MsgTypeError as MsgTypeError, RemoteActorError as RemoteActorError, + TransportClosed as TransportClosed, ) from .devx import ( breakpoint as breakpoint, diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 7164d6ab..108134ca 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -906,8 +906,59 @@ class StreamOverrun( ''' -class TransportClosed(trio.ClosedResourceError): - "Underlying channel transport was closed prior to use" +class TransportClosed(trio.BrokenResourceError): + ''' + IPC transport (protocol) connection was closed or broke and + indicates that the wrapping communication `Channel` can no longer + be used to send/receive msgs from the remote peer. + + ''' + def __init__( + self, + message: str, + loglevel: str = 'transport', + cause: BaseException|None = None, + raise_on_report: bool = False, + + ) -> None: + self.message: str = message + self._loglevel = loglevel + super().__init__(message) + + if cause is not None: + self.__cause__ = cause + + # flag to toggle whether the msg loop should raise + # the exc in its `TransportClosed` handler block. + self._raise_on_report = raise_on_report + + def report_n_maybe_raise( + self, + message: str|None = None, + + ) -> None: + ''' + Using the init-specified log level emit a logging report + for this error. + + ''' + message: str = message or self.message + # when a cause is set, slap it onto the log emission. + if cause := self.__cause__: + cause_tb_str: str = ''.join( + traceback.format_tb(cause.__traceback__) + ) + message += ( + f'{cause_tb_str}\n' # tb + f' {cause}\n' # exc repr + ) + + getattr(log, self._loglevel)(message) + + # some errors we want to blow up from + # inside the RPC msg loop + if self._raise_on_report: + raise self from cause class NoResult(RuntimeError): diff --git a/tractor/_ipc.py b/tractor/_ipc.py index e5e3d10f..a1cb0359 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -54,7 +54,7 @@ from tractor._exceptions import ( ) from tractor.msg import ( _ctxvar_MsgCodec, - _codec, + # _codec, XXX see `self._codec` sanity/debug checks MsgCodec, types as msgtypes, pretty_struct, @@ -65,8 +65,18 @@ log = get_logger(__name__) _is_windows = platform.system() == 'Windows' -def get_stream_addrs(stream: trio.SocketStream) -> tuple: - # should both be IP sockets +def get_stream_addrs( + stream: trio.SocketStream +) -> tuple[ + tuple[str, int], # local + tuple[str, int], # remote +]: + ''' + Return the `trio` streaming transport prot's socket-addrs for + both the local and remote sides as a pair. + + ''' + # rn, should both be IP sockets lsockname = stream.socket.getsockname() rsockname = stream.socket.getpeername() return ( @@ -75,17 +85,22 @@ def get_stream_addrs(stream: trio.SocketStream) -> tuple: ) -# TODO: this should be our `Union[*msgtypes.__spec__]` now right? -MsgType = TypeVar("MsgType") - -# TODO: consider using a generic def and indexing with our eventual -# msg definition/types? -# - https://docs.python.org/3/library/typing.html#typing.Protocol -# - https://jcristharif.com/msgspec/usage.html#structs +# from tractor.msg.types import MsgType +# ?TODO? this should be our `Union[*msgtypes.__spec__]` alias now right..? +# => BLEH, except can't bc prots must inherit typevar or param-spec +# vars.. +MsgType = TypeVar('MsgType') +# TODO: break up this mod into a subpkg so we can start adding new +# backends and move this type stuff into a dedicated file.. Bo +# @runtime_checkable class MsgTransport(Protocol[MsgType]): +# +# ^-TODO-^ consider using a generic def and indexing with our +# eventual msg definition/types? +# - https://docs.python.org/3/library/typing.html#typing.Protocol stream: trio.SocketStream drained: list[MsgType] @@ -120,9 +135,9 @@ class MsgTransport(Protocol[MsgType]): ... -# TODO: not sure why we have to inherit here, but it seems to be an -# issue with ``get_msg_transport()`` returning a ``Type[Protocol]``; -# probably should make a `mypy` issue? +# TODO: typing oddity.. not sure why we have to inherit here, but it +# seems to be an issue with `get_msg_transport()` returning +# a `Type[Protocol]`; probably should make a `mypy` issue? class MsgpackTCPStream(MsgTransport): ''' A ``trio.SocketStream`` delivering ``msgpack`` formatted data @@ -145,7 +160,7 @@ class MsgpackTCPStream(MsgTransport): # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types # # TODO: define this as a `Codec` struct which can be - # overriden dynamically by the application/runtime. + # overriden dynamically by the application/runtime? codec: tuple[ Callable[[Any], Any]|None, # coder Callable[[type, Any], Any]|None, # decoder @@ -160,7 +175,7 @@ class MsgpackTCPStream(MsgTransport): self._laddr, self._raddr = get_stream_addrs(stream) # create read loop instance - self._agen = self._iter_packets() + self._aiter_pkts = self._iter_packets() self._send_lock = trio.StrictFIFOLock() # public i guess? @@ -174,15 +189,12 @@ class MsgpackTCPStream(MsgTransport): # allow for custom IPC msg interchange format # dynamic override Bo self._task = trio.lowlevel.current_task() - self._codec: MsgCodec = ( - codec - or - _codec._ctxvar_MsgCodec.get() - ) - # TODO: mask out before release? - # log.runtime( - # f'New {self} created with codec\n' - # f'codec: {self._codec}\n' + + # XXX for ctxvar debug only! + # self._codec: MsgCodec = ( + # codec + # or + # _codec._ctxvar_MsgCodec.get() # ) async def _iter_packets(self) -> AsyncGenerator[dict, None]: @@ -190,6 +202,11 @@ class MsgpackTCPStream(MsgTransport): Yield `bytes`-blob decoded packets from the underlying TCP stream using the current task's `MsgCodec`. + This is a streaming routine implemented as an async generator + func (which was the original design, but could be changed?) + and is allocated by a `.__call__()` inside `.__init__()` where + it is assigned to the `._aiter_pkts` attr. + ''' decodes_failed: int = 0 @@ -204,16 +221,82 @@ class MsgpackTCPStream(MsgTransport): # seem to be getting racy failures here on # arbiter/registry name subs.. trio.BrokenResourceError, - ): - raise TransportClosed( - f'transport {self} was already closed prior ro read' - ) + ) as trans_err: + + loglevel = 'transport' + match trans_err: + # case ( + # ConnectionResetError() + # ): + # loglevel = 'transport' + + # peer actor (graceful??) TCP EOF but `tricycle` + # seems to raise a 0-bytes-read? + case ValueError() if ( + 'unclean EOF' in trans_err.args[0] + ): + pass + + # peer actor (task) prolly shutdown quickly due + # to cancellation + case trio.BrokenResourceError() if ( + 'Connection reset by peer' in trans_err.args[0] + ): + pass + + # unless the disconnect condition falls under "a + # normal operation breakage" we usualy console warn + # about it. + case _: + loglevel: str = 'warning' + + + raise TransportClosed( + message=( + f'IPC transport already closed by peer\n' + f'x)> {type(trans_err)}\n' + f' |_{self}\n' + ), + loglevel=loglevel, + ) from trans_err + + # XXX definitely can happen if transport is closed + # manually by another `trio.lowlevel.Task` in the + # same actor; we use this in some simulated fault + # testing for ex, but generally should never happen + # under normal operation! + # + # NOTE: as such we always re-raise this error from the + # RPC msg loop! + except trio.ClosedResourceError as closure_err: + raise TransportClosed( + message=( + f'IPC transport already manually closed locally?\n' + f'x)> {type(closure_err)} \n' + f' |_{self}\n' + ), + loglevel='error', + raise_on_report=( + closure_err.args[0] == 'another task closed this fd' + or + closure_err.args[0] in ['another task closed this fd'] + ), + ) from closure_err + + # graceful TCP EOF disconnect if header == b'': raise TransportClosed( - f'transport {self} was already closed prior ro read' + message=( + f'IPC transport already gracefully closed\n' + f')>\n' + f'|_{self}\n' + ), + loglevel='transport', + # cause=??? # handy or no? ) + size: int size, = struct.unpack(" None: ''' Send a msgpack encoded py-object-blob-as-msg over TCP. @@ -304,21 +375,24 @@ class MsgpackTCPStream(MsgTransport): invalid msg type ''' - # __tracebackhide__: bool = hide_tb + __tracebackhide__: bool = hide_tb + + # XXX see `trio._sync.AsyncContextManagerMixin` for details + # on the `.acquire()`/`.release()` sequencing.. async with self._send_lock: # NOTE: lookup the `trio.Task.context`'s var for # the current `MsgCodec`. codec: MsgCodec = _ctxvar_MsgCodec.get() - # TODO: mask out before release? - if self._codec.pld_spec != codec.pld_spec: - self._codec = codec - log.runtime( - f'Using new codec in {self}.send()\n' - f'codec: {self._codec}\n\n' - f'msg: {msg}\n' - ) + # XXX for ctxvar debug only! + # if self._codec.pld_spec != codec.pld_spec: + # self._codec = codec + # log.runtime( + # f'Using new codec in {self}.send()\n' + # f'codec: {self._codec}\n\n' + # f'msg: {msg}\n' + # ) if type(msg) not in msgtypes.__msg_types__: if strict_types: @@ -352,6 +426,16 @@ class MsgpackTCPStream(MsgTransport): size: bytes = struct.pack(" + # except BaseException as _err: + # err = _err + # if not isinstance(err, MsgTypeError): + # __tracebackhide__: bool = False + # raise + @property def laddr(self) -> tuple[str, int]: return self._laddr @@ -361,7 +445,7 @@ class MsgpackTCPStream(MsgTransport): return self._raddr async def recv(self) -> Any: - return await self._agen.asend(None) + return await self._aiter_pkts.asend(None) async def drain(self) -> AsyncIterator[dict]: ''' @@ -378,7 +462,7 @@ class MsgpackTCPStream(MsgTransport): yield msg def __aiter__(self): - return self._agen + return self._aiter_pkts def connected(self) -> bool: return self.stream.socket.fileno() != -1 @@ -433,7 +517,7 @@ class Channel: # set after handshake - always uid of far end self.uid: tuple[str, str]|None = None - self._agen = self._aiter_recv() + self._aiter_msgs = self._iter_msgs() self._exc: Exception|None = None # set if far end actor errors self._closed: bool = False @@ -497,8 +581,6 @@ class Channel: ) return self._transport - # TODO: something simliar at the IPC-`Context` - # level so as to support @cm def apply_codec( self, @@ -517,6 +599,7 @@ class Channel: finally: self._transport.codec = orig + # TODO: do a .src/.dst: str for maddrs? def __repr__(self) -> str: if not self._transport: return '' @@ -560,27 +643,43 @@ class Channel: ) return transport + # TODO: something like, + # `pdbp.hideframe_on(errors=[MsgTypeError])` + # instead of the `try/except` hack we have rn.. + # seems like a pretty useful thing to have in general + # along with being able to filter certain stack frame(s / sets) + # possibly based on the current log-level? async def send( self, payload: Any, - # hide_tb: bool = False, + hide_tb: bool = False, ) -> None: ''' Send a coded msg-blob over the transport. ''' - # __tracebackhide__: bool = hide_tb - log.transport( - '=> send IPC msg:\n\n' - f'{pformat(payload)}\n' - ) # type: ignore - assert self._transport - await self._transport.send( - payload, - # hide_tb=hide_tb, - ) + __tracebackhide__: bool = hide_tb + try: + log.transport( + '=> send IPC msg:\n\n' + f'{pformat(payload)}\n' + ) + # assert self._transport # but why typing? + await self._transport.send( + payload, + hide_tb=hide_tb, + ) + except BaseException as _err: + err = _err # bind for introspection + if not isinstance(_err, MsgTypeError): + # assert err + __tracebackhide__: bool = False + else: + assert err.cid + + raise async def recv(self) -> Any: assert self._transport @@ -617,8 +716,11 @@ class Channel: await self.aclose(*args) def __aiter__(self): - return self._agen + return self._aiter_msgs + # ?TODO? run any reconnection sequence? + # -[ ] prolly should be impl-ed as deco-API? + # # async def _reconnect(self) -> None: # """Handle connection failures by polling until a reconnect can be # established. @@ -636,7 +738,6 @@ class Channel: # else: # log.transport("Stream connection re-established!") - # # TODO: run any reconnection sequence # # on_recon = self._recon_seq # # if on_recon: # # await on_recon(self) @@ -650,11 +751,17 @@ class Channel: # " for re-establishment") # await trio.sleep(1) - async def _aiter_recv( + async def _iter_msgs( self ) -> AsyncGenerator[Any, None]: ''' - Async iterate items from underlying stream. + Yield `MsgType` IPC msgs decoded and deliverd from + an underlying `MsgTransport` protocol. + + This is a streaming routine alo implemented as an async-gen + func (same a `MsgTransport._iter_pkts()`) gets allocated by + a `.__call__()` inside `.__init__()` where it is assigned to + the `._aiter_msgs` attr. ''' assert self._transport @@ -680,15 +787,6 @@ class Channel: case _: yield msg - # TODO: if we were gonna do this it should be - # done up at the `MsgStream` layer! - # - # sent = yield item - # if sent is not None: - # # optimization, passing None through all the - # # time is pointless - # await self._transport.send(sent) - except trio.BrokenResourceError: # if not self._autorecon: diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 09d203b7..a77c2af7 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -1196,7 +1196,7 @@ async def process_messages( parent_chan=chan, ) - except TransportClosed: + except TransportClosed as tc: # channels "breaking" (for TCP streams by EOF or 104 # connection-reset) is ok since we don't have a teardown # handshake for them (yet) and instead we simply bail out of @@ -1204,12 +1204,20 @@ async def process_messages( # up.. # # TODO: maybe add a teardown handshake? and, - # -[ ] don't show this msg if it's an ephemeral discovery ep call? + # -[x] don't show this msg if it's an ephemeral discovery ep call? + # |_ see the below `.report_n_maybe_raise()` impl as well as + # tc-exc input details in `MsgpackTCPStream._iter_pkts()` + # for different read-failure cases. # -[ ] figure out how this will break with other transports? - log.runtime( - f'IPC channel closed abruptly\n' - f'<=x peer: {chan.uid}\n' - f' |_{chan.raddr}\n' + tc.report_n_maybe_raise( + message=( + f'peer IPC channel closed abruptly?\n\n' + f'<=x {chan}\n' + f' |_{chan.raddr}\n\n' + ) + + + tc.message + ) # transport **WAS** disconnected -- 2.34.1 From cb998a2b2fe712a3b0b5e939e408e02dd303feb7 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Jul 2024 15:53:33 -0400 Subject: [PATCH 180/190] Add `Portal.chan` property, to wrap `._chan` attr --- tractor/_portal.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tractor/_portal.py b/tractor/_portal.py index 0f698836..3dc7f3a1 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -97,7 +97,7 @@ class Portal: channel: Channel, ) -> None: - self.chan = channel + self._chan: Channel = channel # during the portal's lifetime self._final_result_pld: Any|None = None self._final_result_msg: PayloadMsg|None = None @@ -109,6 +109,10 @@ class Portal: self._streams: set[MsgStream] = set() self.actor: Actor = current_actor() + @property + def chan(self) -> Channel: + return self._chan + @property def channel(self) -> Channel: ''' -- 2.34.1 From 50e02295a9929adb271bd3676afc6a575e4a927d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Jul 2024 16:00:26 -0400 Subject: [PATCH 181/190] Reraise RAEs in `MsgStream.receive()`; truncate tbs To avoid showing lowlevel details of exception handling around the underlying call to `return await self._ctx._pld_rx.recv_pld(ipc=self)`, any time a `RemoteActorError` is unpacked (an raised locally) we re-raise it directly from the captured `src_err` captured so as to present to the user/app caller-code an exception raised directly from the `.receive()` frame. This simplifies traceback call-stacks for any `log.exception()` or `pdb`-REPL output filtering out the lower `PldRx` frames by default. --- tractor/_streaming.py | 109 +++++++++++++++++++----------------------- 1 file changed, 50 insertions(+), 59 deletions(-) diff --git a/tractor/_streaming.py b/tractor/_streaming.py index 314a93b8..bc87164e 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -36,8 +36,8 @@ import warnings import trio from ._exceptions import ( - # _raise_from_no_key_in_msg, ContextCancelled, + RemoteActorError, ) from .log import get_logger from .trionics import ( @@ -101,7 +101,7 @@ class MsgStream(trio.abc.Channel): @property def ctx(self) -> Context: ''' - This stream's IPC `Context` ref. + A read-only ref to this stream's inter-actor-task `Context`. ''' return self._ctx @@ -145,9 +145,8 @@ class MsgStream(trio.abc.Channel): ''' __tracebackhide__: bool = hide_tb - # NOTE: `trio.ReceiveChannel` implements - # EOC handling as follows (aka uses it - # to gracefully exit async for loops): + # NOTE FYI: `trio.ReceiveChannel` implements EOC handling as + # follows (aka uses it to gracefully exit async for loops): # # async def __anext__(self) -> ReceiveType: # try: @@ -165,48 +164,29 @@ class MsgStream(trio.abc.Channel): src_err: Exception|None = None # orig tb try: - ctx: Context = self._ctx return await ctx._pld_rx.recv_pld(ipc=self) # XXX: the stream terminates on either of: - # - via `self._rx_chan.receive()` raising after manual closure - # by the rpc-runtime OR, - # - via a received `{'stop': ...}` msg from remote side. - # |_ NOTE: previously this was triggered by calling - # ``._rx_chan.aclose()`` on the send side of the channel inside - # `Actor._deliver_ctx_payload()`, but now the 'stop' message handling - # has been put just above inside `_raise_from_no_key_in_msg()`. - except ( - trio.EndOfChannel, - ) as eoc: - src_err = eoc + # - `self._rx_chan.receive()` raising after manual closure + # by the rpc-runtime, + # OR + # - via a `Stop`-msg received from remote peer task. + # NOTE + # |_ previously this was triggered by calling + # ``._rx_chan.aclose()`` on the send side of the channel + # inside `Actor._deliver_ctx_payload()`, but now the 'stop' + # message handling gets delegated to `PldRFx.recv_pld()` + # internals. + except trio.EndOfChannel as eoc: + # a graceful stream finished signal self._eoc = eoc + src_err = eoc - # TODO: Locally, we want to close this stream gracefully, by - # terminating any local consumers tasks deterministically. - # Once we have broadcast support, we **don't** want to be - # closing this stream and not flushing a final value to - # remaining (clone) consumers who may not have been - # scheduled to receive it yet. - # try: - # maybe_err_msg_or_res: dict = self._rx_chan.receive_nowait() - # if maybe_err_msg_or_res: - # log.warning( - # 'Discarding un-processed msg:\n' - # f'{maybe_err_msg_or_res}' - # ) - # except trio.WouldBlock: - # # no queued msgs that might be another remote - # # error, so just raise the original EoC - # pass - - # raise eoc - - # a ``ClosedResourceError`` indicates that the internal - # feeder memory receive channel was closed likely by the - # runtime after the associated transport-channel - # disconnected or broke. + # a `ClosedResourceError` indicates that the internal feeder + # memory receive channel was closed likely by the runtime + # after the associated transport-channel disconnected or + # broke. except trio.ClosedResourceError as cre: # by self._rx_chan.receive() src_err = cre log.warning( @@ -218,14 +198,15 @@ class MsgStream(trio.abc.Channel): # terminated and signal this local iterator to stop drained: list[Exception|dict] = await self.aclose() if drained: + # ?TODO? pass these to the `._ctx._drained_msgs: deque` + # and then iterate them as part of any `.wait_for_result()` call? + # # from .devx import pause # await pause() log.warning( - 'Drained context msgs during closure:\n' + 'Drained context msgs during closure\n\n' f'{drained}' ) - # TODO: pass these to the `._ctx._drained_msgs: deque` - # and then iterate them as part of any `.result()` call? # NOTE XXX: if the context was cancelled or remote-errored # but we received the stream close msg first, we @@ -238,28 +219,36 @@ class MsgStream(trio.abc.Channel): from_src_exc=src_err, ) - # propagate any error but hide low-level frame details - # from the caller by default for debug noise reduction. + # propagate any error but hide low-level frame details from + # the caller by default for console/debug-REPL noise + # reduction. if ( hide_tb + and ( - # XXX NOTE XXX don't reraise on certain - # stream-specific internal error types like, - # - # - `trio.EoC` since we want to use the exact instance - # to ensure that it is the error that bubbles upward - # for silent absorption by `Context.open_stream()`. - and not self._eoc + # XXX NOTE special conditions: don't reraise on + # certain stream-specific internal error types like, + # + # - `trio.EoC` since we want to use the exact instance + # to ensure that it is the error that bubbles upward + # for silent absorption by `Context.open_stream()`. + not self._eoc - # - `RemoteActorError` (or `ContextCancelled`) if it gets - # raised from `_raise_from_no_key_in_msg()` since we - # want the same (as the above bullet) for any - # `.open_context()` block bubbled error raised by - # any nearby ctx API remote-failures. - # and not isinstance(src_err, RemoteActorError) + # - `RemoteActorError` (or subtypes like ctxc) + # since we want to present the error as though it is + # "sourced" directly from this `.receive()` call and + # generally NOT include the stack frames raised from + # inside the `PldRx` and/or the transport stack + # layers. + or isinstance(src_err, RemoteActorError) + ) ): raise type(src_err)(*src_err.args) from src_err else: + # for any non-graceful-EOC we want to NOT hide this frame + if not self._eoc: + __tracebackhide__: bool = False + raise src_err async def aclose(self) -> list[Exception|dict]: @@ -385,6 +374,8 @@ class MsgStream(trio.abc.Channel): if not self._eoc: message: str = ( f'Stream self-closed by {self._ctx.side!r}-side before EoC\n' + # } bc a stream is a "scope"/msging-phase inside an IPC + f'x}}>\n' f'|_{self}\n' ) log.cancel(message) -- 2.34.1 From 950a2ec30f218c758915ba971120d82da0d9e2c2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Jul 2024 16:31:58 -0400 Subject: [PATCH 182/190] Use `._entry` proto-ed "lifetime ops" in logging As per a WIP scribbled out TODO in `._entry.nest_from_op()`, change a bunch of "supervisor/lifetime mgmt ops" related log messages to contain some supervisor-annotation "headers" in an effort to give a terser "visual indication" of how some execution/scope/storage primitive entity (like an actor/task/ctx/connection) is being operated on (like, opening/started/closed/cancelled/erroring) from a "supervisor action" POV. Also tweak a bunch more emissions to lower levels to reduce noise around normal inter-actor operations like process and IPC ctx supervision. --- tractor/_context.py | 26 ++++++++++++++++++++------ tractor/_entry.py | 2 +- tractor/_runtime.py | 42 +++++++++++++++++++++++++----------------- tractor/_spawn.py | 25 ++++++++++++++++--------- 4 files changed, 62 insertions(+), 33 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 9b4cb6e6..7b1ac296 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -933,13 +933,14 @@ class Context: self.cancel_called = True header: str = ( - f'Cancelling ctx with peer from {side.upper()} side\n\n' + f'Cancelling ctx from {side.upper()}-side\n' ) reminfo: str = ( # ' =>\n' - f'Context.cancel() => {self.chan.uid}\n' + # f'Context.cancel() => {self.chan.uid}\n' + f'c)=> {self.chan.uid}\n' # f'{self.chan.uid}\n' - f' |_ @{self.dst_maddr}\n' + f' |_ @{self.dst_maddr}\n' f' >> {self.repr_rpc}\n' # f' >> {self._nsf}() -> {codec}[dict]:\n\n' # TODO: pull msg-type from spec re #320 @@ -1267,6 +1268,12 @@ class Context: @property def maybe_error(self) -> BaseException|None: + ''' + Return the (remote) error as outcome or `None`. + + Remote errors take precedence over local ones. + + ''' le: BaseException|None = self._local_error re: RemoteActorError|ContextCancelled|None = self._remote_error @@ -2182,9 +2189,16 @@ async def open_context_from_portal( # handled in the block above ^^^ !! # await _debug.pause() # log.cancel( - log.exception( - f'{ctx.side}-side of `Context` terminated with ' - f'.outcome => {ctx.repr_outcome()}\n' + match scope_err: + case trio.Cancelled: + logmeth = log.cancel + + # XXX explicitly report on any non-graceful-taskc cases + case _: + logmeth = log.exception + + logmeth( + f'ctx {ctx.side!r}-side exited with {ctx.repr_outcome()}\n' ) if debug_mode(): diff --git a/tractor/_entry.py b/tractor/_entry.py index 60050ea4..3f174528 100644 --- a/tractor/_entry.py +++ b/tractor/_entry.py @@ -265,7 +265,7 @@ def _trio_main( except BaseException as err: logmeth = log.error exit_status: str = ( - 'Main actor task crashed during exit?\n' + 'Main actor task exited due to crash?\n' + nest_from_op( input_op='x)>', # closed by error diff --git a/tractor/_runtime.py b/tractor/_runtime.py index efc9be9c..8bdcf6a5 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -66,10 +66,11 @@ from trio import ( ) from tractor.msg import ( - pretty_struct, - NamespacePath, - types as msgtypes, MsgType, + NamespacePath, + Stop, + pretty_struct, + types as msgtypes, ) from ._ipc import Channel from ._context import ( @@ -547,7 +548,8 @@ class Actor: ): log.cancel( 'Waiting on cancel request to peer\n' - f'`Portal.cancel_actor()` => {chan.uid}\n' + f'c)=>\n' + f' |_{chan.uid}\n' ) # XXX: this is a soft wait on the channel (and its @@ -644,12 +646,14 @@ class Actor: # and an_exit_cs.cancelled_caught ): - log.warning( + report: str = ( 'Timed out waiting on local actor-nursery to exit?\n' f'{local_nursery}\n' - f' |_{pformat(local_nursery._children)}\n' ) - # await _debug.pause() + if children := local_nursery._children: + report += f' |_{pformat(children)}\n' + + log.warning(report) if disconnected: # if the transport died and this actor is still @@ -821,14 +825,17 @@ class Actor: # side, )] except KeyError: - log.warning( + report: str = ( 'Ignoring invalid IPC ctx msg!\n\n' - f'<= sender: {uid}\n\n' - # XXX don't need right since it's always in msg? - # f'=> cid: {cid}\n\n' - - f'{pretty_struct.pformat(msg)}\n' + f'<=? {uid}\n\n' + f' |_{pretty_struct.pformat(msg)}\n' ) + match msg: + case Stop(): + log.runtime(report) + case _: + log.warning(report) + return # if isinstance(msg, MsgTypeError): @@ -1340,10 +1347,11 @@ class Actor: return True log.cancel( - 'Cancel request for RPC task\n\n' - f'<= Actor._cancel_task(): {requesting_uid}\n\n' - f'=> {ctx._task}\n' - f' |_ >> {ctx.repr_rpc}\n' + 'Rxed cancel request for RPC task\n' + f'<=c) {requesting_uid}\n' + f' |_{ctx._task}\n' + f' >> {ctx.repr_rpc}\n' + # f'=> {ctx._task}\n' # f' >> Actor._cancel_task() => {ctx._task}\n' # f' |_ {ctx._task}\n\n' diff --git a/tractor/_spawn.py b/tractor/_spawn.py index 986c2e29..562c7e5b 100644 --- a/tractor/_spawn.py +++ b/tractor/_spawn.py @@ -250,8 +250,9 @@ async def hard_kill( ''' log.cancel( - 'Terminating sub-proc:\n' - f'|_{proc}\n' + 'Terminating sub-proc\n' + f'>x)\n' + f' |_{proc}\n' ) # NOTE: this timeout used to do nothing since we were shielding # the ``.wait()`` inside ``new_proc()`` which will pretty much @@ -297,8 +298,8 @@ async def hard_kill( log.critical( # 'Well, the #ZOMBIE_LORD_IS_HERE# to collect\n' '#T-800 deployed to collect zombie B0\n' - f'|\n' - f'|_{proc}\n' + f'>x)\n' + f' |_{proc}\n' ) proc.kill() @@ -326,8 +327,9 @@ async def soft_kill( uid: tuple[str, str] = portal.channel.uid try: log.cancel( - 'Soft killing sub-actor via `Portal.cancel_actor()`\n' - f'|_{proc}\n' + 'Soft killing sub-actor via portal request\n' + f'c)> {portal.chan.uid}\n' + f' |_{proc}\n' ) # wait on sub-proc to signal termination await wait_func(proc) @@ -556,8 +558,9 @@ async def trio_proc( # cancel result waiter that may have been spawned in # tandem if not done already log.cancel( - 'Cancelling existing result waiter task for ' - f'{subactor.uid}' + 'Cancelling portal result reaper task\n' + f'>c)\n' + f' |_{subactor.uid}\n' ) nursery.cancel_scope.cancel() @@ -566,7 +569,11 @@ async def trio_proc( # allowed! Do this **after** cancellation/teardown to avoid # killing the process too early. if proc: - log.cancel(f'Hard reap sequence starting for {subactor.uid}') + log.cancel( + f'Hard reap sequence starting for subactor\n' + f'>x)\n' + f' |_{subactor}@{subactor.uid}\n' + ) with trio.CancelScope(shield=True): # don't clobber an ongoing pdb -- 2.34.1 From bac84a5e23386238aecd143a2a7fc600329ccd2e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 3 Jul 2024 17:01:37 -0400 Subject: [PATCH 183/190] Quieter `Stop` handling on ctx result capture In the `drain_to_final_msg()` impl, since a stream terminating gracefully requires this msg, there's really no reason to `log.cancel()` about it; go `.runtime()` level instead since we're trying de-noise under "normal operation". Also, - passthrough `hide_tb` to taskc-handler's `ctx.maybe_raise()` call. - raise `MessagingError` for the `MsgType` unmatched `case _:`. - detail the doc string motivation a little more. --- tractor/msg/_ops.py | 55 +++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 91c0ddea..f0f3b6b2 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -374,7 +374,7 @@ class PldRx(Struct): case _: src_err = InternalError( - 'Unknown IPC msg ??\n\n' + 'Invalid IPC msg ??\n\n' f'{msg}\n' ) @@ -499,7 +499,7 @@ async def maybe_limit_plds( yield None return - # sanity on scoping + # sanity check on IPC scoping curr_ctx: Context = current_ipc_ctx() assert ctx is curr_ctx @@ -510,6 +510,8 @@ async def maybe_limit_plds( ) as msgdec: yield msgdec + # when the applied spec is unwound/removed, the same IPC-ctx + # should still be in scope. curr_ctx: Context = current_ipc_ctx() assert ctx is curr_ctx @@ -525,16 +527,26 @@ async def drain_to_final_msg( list[MsgType] ]: ''' - Drain IPC msgs delivered to the underlying IPC primitive's - rx-mem-chan (eg. `Context._rx_chan`) from the runtime in - search for a final result or error. + Drain IPC msgs delivered to the underlying IPC context's + rx-mem-chan (i.e. from `Context._rx_chan`) in search for a final + `Return` or `Error` msg. - The motivation here is to ideally capture errors during ctxc - conditions where a canc-request/or local error is sent but the - local task also excepts and enters the - `Portal.open_context().__aexit__()` block wherein we prefer to - capture and raise any remote error or ctxc-ack as part of the - `ctx.result()` cleanup and teardown sequence. + Deliver the `Return` + preceding drained msgs (`list[MsgType]`) + as a pair unless an `Error` is found, in which unpack and raise + it. + + The motivation here is to always capture any remote error relayed + by the remote peer task during a ctxc condition. + + For eg. a ctxc-request may be sent to the peer as part of the + local task's (request for) cancellation but then that same task + **also errors** before executing the teardown in the + `Portal.open_context().__aexit__()` block. In such error-on-exit + cases we want to always capture and raise any delivered remote + error (like an expected ctxc-ACK) as part of the final + `ctx.wait_for_result()` teardown sequence such that the + `Context.outcome` related state always reflect what transpired + even after ctx closure and the `.open_context()` block exit. ''' __tracebackhide__: bool = hide_tb @@ -572,7 +584,6 @@ async def drain_to_final_msg( # |_from tractor.devx._debug import pause # await pause() - # NOTE: we get here if the far end was # `ContextCancelled` in 2 cases: # 1. we requested the cancellation and thus @@ -580,13 +591,13 @@ async def drain_to_final_msg( # 2. WE DID NOT REQUEST that cancel and thus # SHOULD RAISE HERE! except trio.Cancelled as taskc: - # CASE 2: mask the local cancelled-error(s) # only when we are sure the remote error is # the source cause of this local task's # cancellation. ctx.maybe_raise( - # TODO: when use this/ + hide_tb=hide_tb, + # TODO: when use this? # from_src_exc=taskc, ) @@ -659,7 +670,7 @@ async def drain_to_final_msg( # Stop() case Stop(): pre_result_drained.append(msg) - log.cancel( + log.runtime( # normal/expected shutdown transaction 'Remote stream terminated due to "stop" msg:\n\n' f'{pretty_struct.pformat(msg)}\n' ) @@ -719,13 +730,19 @@ async def drain_to_final_msg( pre_result_drained.append(msg) # It's definitely an internal error if any other # msg type without a`'cid'` field arrives here! + report: str = ( + f'Invalid or unknown msg type {type(msg)!r}!?\n' + ) if not msg.cid: - raise InternalError( - 'Unexpected cid-missing msg?\n\n' - f'{msg}\n' + report += ( + '\nWhich also has no `.cid` field?\n' ) - raise RuntimeError('Unknown msg type: {msg}') + raise MessagingError( + report + + + f'\n{msg}\n' + ) else: log.cancel( -- 2.34.1 From 46066c02e4631bdd9c583327c9ca0513b2bdfab2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 4 Jul 2024 15:06:15 -0400 Subject: [PATCH 184/190] More-n-more scops annots in logging --- tractor/_context.py | 11 ++++++---- tractor/_entry.py | 1 + tractor/_portal.py | 6 +++--- tractor/_runtime.py | 50 ++++++++++++++++++++++++++++----------------- 4 files changed, 42 insertions(+), 26 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 7b1ac296..e52ee6b7 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -2376,8 +2376,9 @@ async def open_context_from_portal( and ctx.cancel_acked ): log.cancel( - f'Context cancelled by {ctx.side!r}-side task\n' - f'|_{ctx._task}\n\n' + f'Context cancelled by local {ctx.side!r}-side task\n' + f'c)>\n' + f' |_{ctx._task}\n\n' f'{repr(scope_err)}\n' ) @@ -2393,8 +2394,10 @@ async def open_context_from_portal( # type_only=True, ) log.cancel( - f'Context terminated due to local {ctx.side!r}-side error:\n\n' - f'{ctx.chan.uid} => {outcome_str}\n' + f'Context terminated due to {ctx.side!r}-side\n\n' + # TODO: do an x)> on err and c)> only for ctxc? + f'c)> {outcome_str}\n' + f' |_{ctx.repr_rpc}\n' ) # FINALLY, remove the context from runtime tracking and diff --git a/tractor/_entry.py b/tractor/_entry.py index 3f174528..a072706c 100644 --- a/tractor/_entry.py +++ b/tractor/_entry.py @@ -243,6 +243,7 @@ def _trio_main( nest_from_op( input_op=')>', # like a "closed-to-play"-icon from super perspective tree_str=actor_info, + back_from_op=1, ) ) try: diff --git a/tractor/_portal.py b/tractor/_portal.py index 3dc7f3a1..f5a66836 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -263,11 +263,11 @@ class Portal: return False reminfo: str = ( - f'Portal.cancel_actor() => {self.channel.uid}\n' - f'|_{chan}\n' + f'c)=> {self.channel.uid}\n' + f' |_{chan}\n' ) log.cancel( - f'Requesting runtime cancel for peer\n\n' + f'Requesting actor-runtime cancel for peer\n\n' f'{reminfo}' ) diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 8bdcf6a5..46d12854 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -441,10 +441,10 @@ class Actor: TransportClosed, ): - # XXX: This may propagate up from ``Channel._aiter_recv()`` - # and ``MsgpackStream._inter_packets()`` on a read from the + # XXX: This may propagate up from `Channel._aiter_recv()` + # and `MsgpackStream._inter_packets()` on a read from the # stream particularly when the runtime is first starting up - # inside ``open_root_actor()`` where there is a check for + # inside `open_root_actor()` where there is a check for # a bound listener on the "arbiter" addr. the reset will be # because the handshake was never meant took place. log.runtime( @@ -509,8 +509,9 @@ class Actor: ) except trio.Cancelled: log.cancel( - 'IPC transport msg loop was cancelled for \n' - f'|_{chan}\n' + 'IPC transport msg loop was cancelled\n' + f'c)>\n' + f' |_{chan}\n' ) raise @@ -547,9 +548,9 @@ class Actor: ): log.cancel( - 'Waiting on cancel request to peer\n' + 'Waiting on cancel request to peer..\n' f'c)=>\n' - f' |_{chan.uid}\n' + f' |_{chan.uid}\n' ) # XXX: this is a soft wait on the channel (and its @@ -648,10 +649,14 @@ class Actor: ): report: str = ( 'Timed out waiting on local actor-nursery to exit?\n' - f'{local_nursery}\n' + f'c)>\n' + f' |_{local_nursery}\n' ) if children := local_nursery._children: - report += f' |_{pformat(children)}\n' + # indent from above local-nurse repr + report += ( + f' |_{pformat(children)}\n' + ) log.warning(report) @@ -1238,8 +1243,9 @@ class Actor: # TODO: just use the new `Context.repr_rpc: str` (and # other) repr fields instead of doing this all manual.. msg: str = ( - f'Runtime cancel request from {requester_type}:\n\n' - f'<= .cancel(): {requesting_uid}\n\n' + f'Actor-runtime cancel request from {requester_type}\n\n' + f'<=c) {requesting_uid}\n' + f' |_{self}\n' ) # TODO: what happens here when we self-cancel tho? @@ -1349,7 +1355,7 @@ class Actor: log.cancel( 'Rxed cancel request for RPC task\n' f'<=c) {requesting_uid}\n' - f' |_{ctx._task}\n' + f' |_{ctx._task}\n' f' >> {ctx.repr_rpc}\n' # f'=> {ctx._task}\n' # f' >> Actor._cancel_task() => {ctx._task}\n' @@ -1467,17 +1473,17 @@ class Actor: "IPC channel's " ) rent_chan_repr: str = ( - f' |_{parent_chan}\n\n' + f' |_{parent_chan}\n\n' if parent_chan else '' ) log.cancel( f'Cancelling {descr} RPC tasks\n\n' - f'<= canceller: {req_uid}\n' + f'<=c) {req_uid} [canceller]\n' f'{rent_chan_repr}' - f'=> cancellee: {self.uid}\n' - f' |_{self}.cancel_rpc_tasks()\n' - f' |_tasks: {len(tasks)}\n' + f'c)=> {self.uid} [cancellee]\n' + f' |_{self} [with {len(tasks)} tasks]\n' + # f' |_tasks: {len(tasks)}\n' # f'{tasks_str}' ) for ( @@ -1935,9 +1941,15 @@ async def async_main( with CancelScope(shield=True): await actor._no_more_peers.wait() - teardown_report += ('-> All peer channels are complete\n') + teardown_report += ( + '-> All peer channels are complete\n' + ) - teardown_report += ('Actor runtime exited') + teardown_report += ( + 'Actor runtime exiting\n' + f'>)\n' + f'|_{actor}\n' + ) log.info(teardown_report) -- 2.34.1 From 32f7742e53fca3e7367e1eed19eee1e96a9baf62 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 4 Jul 2024 19:40:11 -0400 Subject: [PATCH 185/190] Finally implement peer-lookup optimization.. There's a been a todo for soo long for this XD Since all `Actor`'s store a set of `._peers` we can try a lookup on that table as a shortcut before pinging the registry Bo Impl deats: - add a new `._discovery.get_peer_by_name()` routine which attempts the `._peers` lookup by combining a copy of that `dict` + an entry added for `Actor._parent_chan` (since all subs have a parent and often the desired contact is just that connection). - change `.find_actor()` (for the `only_first == True` case), `.query_actor()` and `.wait_for_actor()` to call the new helper and deliver appropriate outputs if possible. Other, - deprecate `get_arbiter()` def and all usage in tests and examples. - drop lingering use of `arbiter_sockaddr` arg to various routines. - tweak the `Actor` doc str as well as some code fmting and a tweak to the `._stream_handler()`'s initial `con_status: str` logging value since the way it was could never be reached.. oh and `.warning()` on any new connections which already have a `_pre_chan: Channel` entry in `._peers` so we can start minimizing IPC duplications. --- examples/service_discovery.py | 2 +- tests/test_discovery.py | 6 +- tests/test_local.py | 2 +- tests/test_multi_program.py | 4 +- tractor/__init__.py | 2 +- tractor/_discovery.py | 170 +++++++++++++++++++--------------- tractor/_runtime.py | 96 +++++++++---------- 7 files changed, 151 insertions(+), 131 deletions(-) diff --git a/examples/service_discovery.py b/examples/service_discovery.py index 858f7f12..a0f37b88 100644 --- a/examples/service_discovery.py +++ b/examples/service_discovery.py @@ -9,7 +9,7 @@ async def main(service_name): async with tractor.open_nursery() as an: await an.start_actor(service_name) - async with tractor.get_arbiter('127.0.0.1', 1616) as portal: + async with tractor.get_registry('127.0.0.1', 1616) as portal: print(f"Arbiter is listening on {portal.channel}") async with tractor.wait_for_actor(service_name) as sockaddr: diff --git a/tests/test_discovery.py b/tests/test_discovery.py index cd9dc022..508fdbe1 100644 --- a/tests/test_discovery.py +++ b/tests/test_discovery.py @@ -26,7 +26,7 @@ async def test_reg_then_unreg(reg_addr): portal = await n.start_actor('actor', enable_modules=[__name__]) uid = portal.channel.uid - async with tractor.get_arbiter(*reg_addr) as aportal: + async with tractor.get_registry(*reg_addr) as aportal: # this local actor should be the arbiter assert actor is aportal.actor @@ -160,7 +160,7 @@ async def spawn_and_check_registry( async with tractor.open_root_actor( registry_addrs=[reg_addr], ): - async with tractor.get_arbiter(*reg_addr) as portal: + async with tractor.get_registry(*reg_addr) as portal: # runtime needs to be up to call this actor = tractor.current_actor() @@ -298,7 +298,7 @@ async def close_chans_before_nursery( async with tractor.open_root_actor( registry_addrs=[reg_addr], ): - async with tractor.get_arbiter(*reg_addr) as aportal: + async with tractor.get_registry(*reg_addr) as aportal: try: get_reg = partial(unpack_reg, aportal) diff --git a/tests/test_local.py b/tests/test_local.py index a019d771..ecdad5fe 100644 --- a/tests/test_local.py +++ b/tests/test_local.py @@ -38,7 +38,7 @@ async def test_self_is_registered_localportal(reg_addr): "Verify waiting on the arbiter to register itself using a local portal." actor = tractor.current_actor() assert actor.is_arbiter - async with tractor.get_arbiter(*reg_addr) as portal: + async with tractor.get_registry(*reg_addr) as portal: assert isinstance(portal, tractor._portal.LocalPortal) with trio.fail_after(0.2): diff --git a/tests/test_multi_program.py b/tests/test_multi_program.py index 92f4c52d..860eeebb 100644 --- a/tests/test_multi_program.py +++ b/tests/test_multi_program.py @@ -32,7 +32,7 @@ def test_abort_on_sigint(daemon): @tractor_test async def test_cancel_remote_arbiter(daemon, reg_addr): assert not tractor.current_actor().is_arbiter - async with tractor.get_arbiter(*reg_addr) as portal: + async with tractor.get_registry(*reg_addr) as portal: await portal.cancel_actor() time.sleep(0.1) @@ -41,7 +41,7 @@ async def test_cancel_remote_arbiter(daemon, reg_addr): # no arbiter socket should exist with pytest.raises(OSError): - async with tractor.get_arbiter(*reg_addr) as portal: + async with tractor.get_registry(*reg_addr) as portal: pass diff --git a/tractor/__init__.py b/tractor/__init__.py index 4f9e592d..a27a3b59 100644 --- a/tractor/__init__.py +++ b/tractor/__init__.py @@ -31,7 +31,7 @@ from ._streaming import ( stream as stream, ) from ._discovery import ( - get_arbiter as get_arbiter, + get_registry as get_registry, find_actor as find_actor, wait_for_actor as wait_for_actor, query_actor as query_actor, diff --git a/tractor/_discovery.py b/tractor/_discovery.py index 99a4dd68..a681c63b 100644 --- a/tractor/_discovery.py +++ b/tractor/_discovery.py @@ -26,8 +26,8 @@ from typing import ( TYPE_CHECKING, ) from contextlib import asynccontextmanager as acm -import warnings +from tractor.log import get_logger from .trionics import gather_contexts from ._ipc import _connect_chan, Channel from ._portal import ( @@ -40,11 +40,13 @@ from ._state import ( _runtime_vars, ) - if TYPE_CHECKING: from ._runtime import Actor +log = get_logger(__name__) + + @acm async def get_registry( host: str, @@ -56,14 +58,12 @@ async def get_registry( ]: ''' Return a portal instance connected to a local or remote - arbiter. + registry-service actor; if a connection already exists re-use it + (presumably to call a `.register_actor()` registry runtime RPC + ep). ''' - actor = current_actor() - - if not actor: - raise RuntimeError("No actor instance has been defined yet?") - + actor: Actor = current_actor() if actor.is_registrar: # we're already the arbiter # (likely a re-entrant call from the arbiter actor) @@ -72,6 +72,8 @@ async def get_registry( Channel((host, port)) ) else: + # TODO: try to look pre-existing connection from + # `Actor._peers` and use it instead? async with ( _connect_chan(host, port) as chan, open_portal(chan) as regstr_ptl, @@ -80,19 +82,6 @@ async def get_registry( -# TODO: deprecate and this remove _arbiter form! -@acm -async def get_arbiter(*args, **kwargs): - warnings.warn( - '`tractor.get_arbiter()` is now deprecated!\n' - 'Use `.get_registry()` instead!', - DeprecationWarning, - stacklevel=2, - ) - async with get_registry(*args, **kwargs) as to_yield: - yield to_yield - - @acm async def get_root( **kwargs, @@ -110,22 +99,53 @@ async def get_root( yield portal +def get_peer_by_name( + name: str, + # uuid: str|None = None, + +) -> list[Channel]|None: # at least 1 + ''' + Scan for an existing connection (set) to a named actor + and return any channels from `Actor._peers`. + + This is an optimization method over querying the registrar for + the same info. + + ''' + actor: Actor = current_actor() + to_scan: dict[tuple, list[Channel]] = actor._peers.copy() + pchan: Channel|None = actor._parent_chan + if pchan: + to_scan[pchan.uid].append(pchan) + + for aid, chans in to_scan.items(): + _, peer_name = aid + if name == peer_name: + if not chans: + log.warning( + 'No IPC chans for matching peer {peer_name}\n' + ) + continue + return chans + + return None + + @acm async def query_actor( name: str, - arbiter_sockaddr: tuple[str, int] | None = None, - regaddr: tuple[str, int] | None = None, + regaddr: tuple[str, int]|None = None, ) -> AsyncGenerator[ - tuple[str, int] | None, + tuple[str, int]|None, None, ]: ''' - Make a transport address lookup for an actor name to a specific - registrar. + Lookup a transport address (by actor name) via querying a registrar + listening @ `regaddr`. - Returns the (socket) address or ``None`` if no entry under that - name exists for the given registrar listening @ `regaddr`. + Returns the transport protocol (socket) address or `None` if no + entry under that name exists. ''' actor: Actor = current_actor() @@ -137,14 +157,10 @@ async def query_actor( 'The current actor IS the registry!?' ) - if arbiter_sockaddr is not None: - warnings.warn( - '`tractor.query_actor(regaddr=)` is deprecated.\n' - 'Use `registry_addrs: list[tuple]` instead!', - DeprecationWarning, - stacklevel=2, - ) - regaddr: list[tuple[str, int]] = arbiter_sockaddr + maybe_peers: list[Channel]|None = get_peer_by_name(name) + if maybe_peers: + yield maybe_peers[0].raddr + return reg_portal: Portal regaddr: tuple[str, int] = regaddr or actor.reg_addrs[0] @@ -159,10 +175,28 @@ async def query_actor( yield sockaddr +@acm +async def maybe_open_portal( + addr: tuple[str, int], + name: str, +): + async with query_actor( + name=name, + regaddr=addr, + ) as sockaddr: + pass + + if sockaddr: + async with _connect_chan(*sockaddr) as chan: + async with open_portal(chan) as portal: + yield portal + else: + yield None + + @acm async def find_actor( name: str, - arbiter_sockaddr: tuple[str, int]|None = None, registry_addrs: list[tuple[str, int]]|None = None, only_first: bool = True, @@ -179,29 +213,12 @@ async def find_actor( known to the arbiter. ''' - if arbiter_sockaddr is not None: - warnings.warn( - '`tractor.find_actor(arbiter_sockaddr=)` is deprecated.\n' - 'Use `registry_addrs: list[tuple]` instead!', - DeprecationWarning, - stacklevel=2, - ) - registry_addrs: list[tuple[str, int]] = [arbiter_sockaddr] - - @acm - async def maybe_open_portal_from_reg_addr( - addr: tuple[str, int], - ): - async with query_actor( - name=name, - regaddr=addr, - ) as sockaddr: - if sockaddr: - async with _connect_chan(*sockaddr) as chan: - async with open_portal(chan) as portal: - yield portal - else: - yield None + # optimization path, use any pre-existing peer channel + maybe_peers: list[Channel]|None = get_peer_by_name(name) + if maybe_peers and only_first: + async with open_portal(maybe_peers[0]) as peer_portal: + yield peer_portal + return if not registry_addrs: # XXX NOTE: make sure to dynamically read the value on @@ -217,10 +234,13 @@ async def find_actor( maybe_portals: list[ AsyncContextManager[tuple[str, int]] ] = list( - maybe_open_portal_from_reg_addr(addr) + maybe_open_portal( + addr=addr, + name=name, + ) for addr in registry_addrs ) - + portals: list[Portal] async with gather_contexts( mngrs=maybe_portals, ) as portals: @@ -254,31 +274,31 @@ async def find_actor( @acm async def wait_for_actor( name: str, - arbiter_sockaddr: tuple[str, int] | None = None, registry_addr: tuple[str, int] | None = None, ) -> AsyncGenerator[Portal, None]: ''' - Wait on an actor to register with the arbiter. - - A portal to the first registered actor is returned. + Wait on at least one peer actor to register `name` with the + registrar, yield a `Portal to the first registree. ''' actor: Actor = current_actor() - if arbiter_sockaddr is not None: - warnings.warn( - '`tractor.wait_for_actor(arbiter_sockaddr=)` is deprecated.\n' - 'Use `registry_addr: tuple` instead!', - DeprecationWarning, - stacklevel=2, - ) - registry_addr: tuple[str, int] = arbiter_sockaddr + # optimization path, use any pre-existing peer channel + maybe_peers: list[Channel]|None = get_peer_by_name(name) + if maybe_peers: + async with open_portal(maybe_peers[0]) as peer_portal: + yield peer_portal + return + regaddr: tuple[str, int] = ( + registry_addr + or + actor.reg_addrs[0] + ) # TODO: use `.trionics.gather_contexts()` like # above in `find_actor()` as well? reg_portal: Portal - regaddr: tuple[str, int] = registry_addr or actor.reg_addrs[0] async with get_registry(*regaddr) as reg_portal: sockaddrs = await reg_portal.run_from_ns( 'self', diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 46d12854..63f04837 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -111,25 +111,26 @@ class Actor: ''' The fundamental "runtime" concurrency primitive. - An *actor* is the combination of a regular Python process executing - a ``trio`` task tree, communicating with other actors through - "memory boundary portals" - which provide a native async API around - IPC transport "channels" which themselves encapsulate various - (swappable) network protocols. + An "actor" is the combination of a regular Python process + executing a `trio.run()` task tree, communicating with other + "actors" through "memory boundary portals": `Portal`, which + provide a high-level async API around IPC "channels" (`Channel`) + which themselves encapsulate various (swappable) network + transport protocols for sending msgs between said memory domains + (processes, hosts, non-GIL threads). - - Each "actor" is ``trio.run()`` scheduled "runtime" composed of - many concurrent tasks in a single thread. The "runtime" tasks - conduct a slew of low(er) level functions to make it possible - for message passing between actors as well as the ability to - create new actors (aka new "runtimes" in new processes which - are supervised via a nursery construct). Each task which sends - messages to a task in a "peer" (not necessarily a parent-child, + Each "actor" is `trio.run()` scheduled "runtime" composed of many + concurrent tasks in a single thread. The "runtime" tasks conduct + a slew of low(er) level functions to make it possible for message + passing between actors as well as the ability to create new + actors (aka new "runtimes" in new processes which are supervised + via an "actor-nursery" construct). Each task which sends messages + to a task in a "peer" actor (not necessarily a parent-child, depth hierarchy) is able to do so via an "address", which maps IPC connections across memory boundaries, and a task request id - which allows for per-actor tasks to send and receive messages - to specific peer-actor tasks with which there is an ongoing - RPC/IPC dialog. + which allows for per-actor tasks to send and receive messages to + specific peer-actor tasks with which there is an ongoing RPC/IPC + dialog. ''' # ugh, we need to get rid of this and replace with a "registry" sys @@ -226,17 +227,20 @@ class Actor: # by the user (currently called the "arbiter") self._spawn_method: str = spawn_method - self._peers: defaultdict = defaultdict(list) + self._peers: defaultdict[ + str, # uaid + list[Channel], # IPC conns from peer + ] = defaultdict(list) self._peer_connected: dict[tuple[str, str], trio.Event] = {} self._no_more_peers = trio.Event() self._no_more_peers.set() + + # RPC state self._ongoing_rpc_tasks = trio.Event() self._ongoing_rpc_tasks.set() - - # (chan, cid) -> (cancel_scope, func) self._rpc_tasks: dict[ - tuple[Channel, str], - tuple[Context, Callable, trio.Event] + tuple[Channel, str], # (chan, cid) + tuple[Context, Callable, trio.Event] # (ctx=>, fn(), done?) ] = {} # map {actor uids -> Context} @@ -313,7 +317,10 @@ class Actor: event = self._peer_connected.setdefault(uid, trio.Event()) await event.wait() log.debug(f'{uid!r} successfully connected back to us') - return event, self._peers[uid][-1] + return ( + event, + self._peers[uid][-1], + ) def load_modules( self, @@ -404,32 +411,11 @@ class Actor: ''' self._no_more_peers = trio.Event() # unset by making new chan = Channel.from_stream(stream) - their_uid: tuple[str, str]|None = chan.uid - if their_uid: - log.warning( - f'Re-connection from already known {their_uid}' - ) - else: - log.runtime(f'New connection to us @{chan.raddr}') - - con_status: str = '' - - # TODO: remove this branch since can never happen? - # NOTE: `.uid` is only set after first contact - if their_uid: - con_status = ( - 'IPC Re-connection from already known peer?\n' - ) - else: - con_status = ( - 'New inbound IPC connection <=\n' - ) - - con_status += ( + con_status: str = ( + 'New inbound IPC connection <=\n' f'|_{chan}\n' - # f' |_@{chan.raddr}\n\n' - # ^-TODO-^ remove since alfready in chan.__repr__()? ) + # send/receive initial handshake response try: uid: tuple|None = await self._do_handshake(chan) @@ -454,9 +440,22 @@ class Actor: ) return + familiar: str = 'new-peer' + if _pre_chan := self._peers.get(uid): + familiar: str = 'pre-existing-peer' + uid_short: str = f'{uid[0]}[{uid[1][-6:]}]' con_status += ( - f' -> Handshake with actor `{uid[0]}[{uid[1][-6:]}]` complete\n' + f' -> Handshake with {familiar} `{uid_short}` complete\n' ) + + if _pre_chan: + log.warning( + # con_status += ( + # ^TODO^ swap once we minimize conn duplication + f' -> Wait, we already have IPC with `{uid_short}`??\n' + f' |_{_pre_chan}\n' + ) + # IPC connection tracking for both peers and new children: # - if this is a new channel to a locally spawned # sub-actor there will be a spawn wait even registered @@ -1552,7 +1551,7 @@ class Actor: def accept_addr(self) -> tuple[str, int]: ''' Primary address to which the IPC transport server is - bound. + bound and listening for new connections. ''' # throws OSError on failure @@ -1569,6 +1568,7 @@ class Actor: def get_chans( self, uid: tuple[str, str], + ) -> list[Channel]: ''' Return all IPC channels to the actor with provided `uid`. -- 2.34.1 From d1b4d4be52edc17caad71a732a263fe7faf30402 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 5 Jul 2024 13:31:29 -0400 Subject: [PATCH 186/190] Adjusts advanced fault tests to match new `TransportClosed` semantics --- tests/test_advanced_faults.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/tests/test_advanced_faults.py b/tests/test_advanced_faults.py index dfaeb68e..a4d17791 100644 --- a/tests/test_advanced_faults.py +++ b/tests/test_advanced_faults.py @@ -91,7 +91,8 @@ def test_ipc_channel_break_during_stream( # non-`trio` spawners should never hit the hang condition that # requires the user to do ctl-c to cancel the actor tree. - expect_final_exc = trio.ClosedResourceError + # expect_final_exc = trio.ClosedResourceError + expect_final_exc = tractor.TransportClosed mod: ModuleType = import_path( examples_dir() / 'advanced_faults' @@ -157,7 +158,7 @@ def test_ipc_channel_break_during_stream( if pre_aclose_msgstream: expect_final_exc = KeyboardInterrupt - # NOTE when the parent IPC side dies (even if the child's does as well + # NOTE when the parent IPC side dies (even if the child does as well # but the child fails BEFORE the parent) we always expect the # IPC layer to raise a closed-resource, NEVER do we expect # a stop msg since the parent-side ctx apis will error out @@ -169,7 +170,8 @@ def test_ipc_channel_break_during_stream( and ipc_break['break_child_ipc_after'] is False ): - expect_final_exc = trio.ClosedResourceError + # expect_final_exc = trio.ClosedResourceError + expect_final_exc = tractor.TransportClosed # BOTH but, PARENT breaks FIRST elif ( @@ -180,7 +182,8 @@ def test_ipc_channel_break_during_stream( ipc_break['break_parent_ipc_after'] ) ): - expect_final_exc = trio.ClosedResourceError + # expect_final_exc = trio.ClosedResourceError + expect_final_exc = tractor.TransportClosed with pytest.raises( expected_exception=( @@ -199,8 +202,8 @@ def test_ipc_channel_break_during_stream( **ipc_break, ) ) - except KeyboardInterrupt as kbi: - _err = kbi + except KeyboardInterrupt as _kbi: + kbi = _kbi if expect_final_exc is not KeyboardInterrupt: pytest.fail( 'Rxed unexpected KBI !?\n' @@ -209,6 +212,21 @@ def test_ipc_channel_break_during_stream( raise + except tractor.TransportClosed as _tc: + tc = _tc + if expect_final_exc is KeyboardInterrupt: + pytest.fail( + 'Unexpected transport failure !?\n' + f'{repr(tc)}' + ) + cause: Exception = tc.__cause__ + assert ( + type(cause) is trio.ClosedResourceError + and + cause.args[0] == 'another task closed this fd' + ) + raise + # get raw instance from pytest wrapper value = excinfo.value if isinstance(value, ExceptionGroup): -- 2.34.1 From 5ea324da5ec4033b287ba27cae03310ed466f12e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 22 Aug 2024 17:22:53 -0400 Subject: [PATCH 187/190] Add `tb_hide: bool` ctl flag to `_open_and_supervise_one_cancels_all_nursery()` --- tractor/_supervise.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tractor/_supervise.py b/tractor/_supervise.py index fb737c12..4a538e9f 100644 --- a/tractor/_supervise.py +++ b/tractor/_supervise.py @@ -373,11 +373,12 @@ class ActorNursery: @acm async def _open_and_supervise_one_cancels_all_nursery( actor: Actor, + tb_hide: bool = False, ) -> typing.AsyncGenerator[ActorNursery, None]: # normally don't need to show user by default - __tracebackhide__: bool = True + __tracebackhide__: bool = tb_hide outer_err: BaseException|None = None inner_err: BaseException|None = None -- 2.34.1 From 125876185d25dc355dd1efcabf0ecb59b1fbf8e8 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 22 Aug 2024 20:19:55 -0400 Subject: [PATCH 188/190] Add `indent: str` suport to `Context.pformat()` using `textwrap` --- tractor/_context.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index e52ee6b7..31db2bad 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -46,6 +46,7 @@ from dataclasses import ( from functools import partial import inspect from pprint import pformat +import textwrap from typing import ( Any, AsyncGenerator, @@ -335,6 +336,7 @@ class Context: extra_fields: dict[str, Any]|None = None, # ^-TODO-^ some built-in extra state fields # we'll want in some devx specific cases? + indent: str|None = None, ) -> str: ds: str = '=' @@ -354,7 +356,6 @@ class Context: show_error_fields=True ) fmtstr: str = ( - f'\n' + f'{indent})>\n' ) + # NOTE: making this return a value that can be passed to # `eval()` is entirely **optional** dawggg B) # https://docs.python.org/3/library/functions.html#repr -- 2.34.1 From dbff7e6cd00dc864d07e37424b136e38a93397aa Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 26 Aug 2024 14:29:09 -0400 Subject: [PATCH 189/190] Report any external-rent-task-canceller during msg-drain As in whenever `Context.cancel()` is not (runtime internally) called (i.e. `._cancel_called` is not set), we can attempt to detect the parent `trio` nursery/cancel-scope that is the source. Emit the report with a `.cancel()` level and attempt to repr in "sclang" form as well as unhide the stack frame for debug/traceback-in. --- tractor/msg/_ops.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index f0f3b6b2..2faadb9f 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -590,15 +590,36 @@ async def drain_to_final_msg( # SHOULD NOT raise that far end error, # 2. WE DID NOT REQUEST that cancel and thus # SHOULD RAISE HERE! - except trio.Cancelled as taskc: + except trio.Cancelled as _taskc: + taskc: trio.Cancelled = _taskc + + # report when the cancellation wasn't (ostensibly) due to + # RPC operation, some surrounding parent cancel-scope. + if not ctx._scope.cancel_called: + task: trio.lowlevel.Task = trio.lowlevel.current_task() + rent_n: trio.Nursery = task.parent_nursery + if ( + (local_cs := rent_n.cancel_scope).cancel_called + ): + log.cancel( + 'RPC-ctx cancelled by local-parent scope during drain!\n\n' + f'c}}>\n' + f' |_{rent_n}\n' + f' |_.cancel_scope = {local_cs}\n' + f' |_>c}}\n' + f' |_{ctx.pformat(indent=" "*9)}' + # ^TODO, some (other) simpler repr here? + ) + __tracebackhide__: bool = False + # CASE 2: mask the local cancelled-error(s) # only when we are sure the remote error is # the source cause of this local task's # cancellation. ctx.maybe_raise( hide_tb=hide_tb, - # TODO: when use this? - # from_src_exc=taskc, + from_src_exc=taskc, + # ?TODO? when *should* we use this? ) # CASE 1: we DID request the cancel we simply -- 2.34.1 From 4b92e14c920bebefb0c0013fc804b0de3ccc0a84 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 9 Dec 2024 17:36:52 -0500 Subject: [PATCH 190/190] Denoise duplicate chan logging for now --- tractor/_runtime.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 63f04837..662dd67a 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -449,11 +449,14 @@ class Actor: ) if _pre_chan: - log.warning( # con_status += ( # ^TODO^ swap once we minimize conn duplication - f' -> Wait, we already have IPC with `{uid_short}`??\n' - f' |_{_pre_chan}\n' + # -[ ] last thing might be reg/unreg runtime reqs? + # log.warning( + log.debug( + f'?Wait?\n' + f'We already have IPC with peer {uid_short!r}\n' + f'|_{_pre_chan}\n' ) # IPC connection tracking for both peers and new children: -- 2.34.1