From 5fb568226986d8f2cc3cbabbfd39bc1dfc2b6b3f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 18 Mar 2024 10:21:37 -0400 Subject: [PATCH] First try "relayed boxed errors", or "inceptions" Since adding more complex inter-peer (actor) testing scenarios, we definitely have an immediate need for `trio`'s style of "inceptions" but for nesting `RemoteActorError`s as they're relayed through multiple actor-IPC hops. So for example, a remote error relayed "through" some proxy actor to another ends up packing a `RemoteActorError` into another one such that there are 2 layers of RAEs with the first containing/boxing an original src actor error (type). In support of this extension to `RemoteActorError` we add: - `get_err_type()` error type resolver helper (factored fromthe body of `unpack_error()`) to be used whenever rendering `.src_type`/`.boxed_type`. - `.src_type_str: str` which is pulled from `.msgdata` and holds the above (eventually when unpacked) type as `str`. - `._src_type: BaseException|None` for the original "source" actor's error as unpacked in any remote (actor's) env and exposed as a readonly property `.src_type`. - `.boxed_type_str: str` the same as above but for the "last" boxed error's type; when the RAE is unpacked at its first hop this will be **the same as** `.src_type_str`. - `._boxed_type: BaseException` which now similarly should be "rendered" from the below type-`str` field instead of passed in as a error-type via `boxed_type` (though we still do for the ctxc case atm, see notes). |_ new sanity checks in `.__init__()` mostly as a reminder to handle that ^ ctxc case ^ more elegantly at some point.. |_ obvi we discard the previous `suberror_type` input arg. - fully remove the `.type`/`.type_str` properties instead expecting usage of `.boxed_/.src_` equivalents. - start deprecation of `.src_actor_uid` and make it delegate to new `.src_uid` - add `.relay_uid` propery for the last relay/hop's actor uid. - add `.relay_path: list[str]` which holds the per-hop updated sequence of relay actor uid's which consecutively did boxing of an RAE. - only include `.src_uid` and `.relay_path` in reprol() output. - factor field-to-str rendering into a new `_mk_fields_str()` and use it in `.__repr__()`/`.reprol()`. - add an `.unwrap()` to (attempt to) render the src error. - rework `pack_error()` to handle inceptions including, - packing the correct field-values for the new `boxed_type_str`, `relay_uid`, `src_uid`, `src_type_str`. - always updating the `relay_path` sequence with the uid of the current actor. - adjust `unpack_error()` to match all these changes, - pulling `boxed_type_str` and passing any resolved `boxed_type` to `RemoteActorError.__init__()`. - use the new `Context.maybe_raise()` convenience method. Adjust `._rpc` packing to `ContextCancelled(boxed_type=trio.Cancelled)` and tweak some more log msg formats. --- tractor/_exceptions.py | 367 ++++++++++++++++++++++++++++++++++------- tractor/_rpc.py | 16 +- 2 files changed, 311 insertions(+), 72 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 344f0c3..d6629ad 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -58,16 +58,44 @@ class InternalError(RuntimeError): ''' _body_fields: list[str] = [ - 'src_actor_uid', + 'boxed_type', + 'src_type', + # TODO: format this better if we're going to include it. + # 'relay_path', + 'src_uid', + + # only in sub-types 'canceller', 'sender', ] _msgdata_keys: list[str] = [ - 'type_str', + 'boxed_type_str', ] + _body_fields +def get_err_type(type_name: str) -> BaseException|None: + ''' + Look up an exception type by name from the set of locally + known namespaces: + + - `builtins` + - `tractor._exceptions` + - `trio` + + ''' + for ns in [ + builtins, + _this_mod, + trio, + ]: + if type_ref := getattr( + ns, + type_name, + False, + ): + return type_ref + # TODO: rename to just `RemoteError`? class RemoteActorError(Exception): @@ -81,13 +109,16 @@ class RemoteActorError(Exception): ''' reprol_fields: list[str] = [ - 'src_actor_uid', + # 'src_actor_uid', + 'src_uid', + 'relay_path', + # 'relay_uid', ] def __init__( self, message: str, - suberror_type: Type[BaseException] | None = None, + boxed_type: Type[BaseException]|None = None, **msgdata ) -> None: @@ -101,20 +132,124 @@ class RemoteActorError(Exception): # - .remote_type # also pertains to our long long oustanding issue XD # https://github.com/goodboy/tractor/issues/5 - self.boxed_type: str = suberror_type + # + # TODO: always set ._boxed_type` as `None` by default + # and instead render if from `.boxed_type_str`? + self._boxed_type: BaseException = boxed_type + self._src_type: BaseException|None = None self.msgdata: dict[str, Any] = msgdata - @property - def type(self) -> str: - return self.boxed_type + # TODO: mask out eventually or place in `pack_error()` + # pre-`return` lines? + # sanity on inceptions + if boxed_type is RemoteActorError: + if self.src_type_str == 'RemoteActorError': + import pdbp; pdbp.set_trace() + + assert self.src_type_str != 'RemoteActorError' + assert self.src_uid not in self.relay_path + + # ensure type-str matches and round-tripping from that + # str results in same error type. + # + # TODO NOTE: this is currently exclusively for the + # `ContextCancelled(boxed_type=trio.Cancelled)` case as is + # used inside `._rpc._invoke()` atm though probably we + # should better emphasize that special (one off?) case + # either by customizing `ContextCancelled.__init__()` or + # through a special factor func? + else: + if not self.msgdata.get('boxed_type_str'): + self.msgdata['boxed_type_str'] = str( + type(boxed_type).__name__ + ) + + assert self.boxed_type_str == self.msgdata['boxed_type_str'] + assert self.boxed_type is boxed_type @property - def type_str(self) -> str: - return str(type(self.boxed_type).__name__) + def src_type_str(self) -> str: + ''' + String-name of the source error's type. + This should be the same as `.boxed_type_str` when unpacked + at the first relay/hop's receiving actor. + + ''' + return self.msgdata['src_type_str'] + + @property + def src_type(self) -> str: + ''' + Error type raised by original remote faulting actor. + + ''' + if self._src_type is None: + self._src_type = get_err_type( + self.msgdata['src_type_str'] + ) + + return self._src_type + + @property + def boxed_type_str(self) -> str: + ''' + String-name of the (last hop's) boxed error type. + + ''' + return self.msgdata['boxed_type_str'] + + @property + def boxed_type(self) -> str: + ''' + Error type boxed by last actor IPC hop. + + ''' + if self._boxed_type is None: + self._src_type = get_err_type( + self.msgdata['boxed_type_str'] + ) + + return self._boxed_type + + @property + def relay_path(self) -> list[tuple]: + ''' + Return the list of actors which consecutively relayed + a boxed `RemoteActorError` the src error up until THIS + actor's hop. + + NOTE: a `list` field with the same name is expected to be + passed/updated in `.msgdata`. + + ''' + return self.msgdata['relay_path'] + + @property + def relay_uid(self) -> tuple[str, str]|None: + return tuple( + self.msgdata['relay_path'][-1] + ) + + @property + def src_uid(self) -> tuple[str, str]|None: + if src_uid := ( + self.msgdata.get('src_uid') + # TODO: remove! + or + self.msgdata.get('src_actor_uid') + ): + return tuple(src_uid) + # TODO: use path lookup instead? + # return tuple( + # self.msgdata['relay_path'][0] + # ) + + # TODO: deprecate this for ^! @property def src_actor_uid(self) -> tuple[str, str]|None: - return self.msgdata.get('src_actor_uid') + log.warning('.src_actor_uid` is deprecated, use `.src_uid` instead!') + return self.src_uid @property def tb_str( @@ -129,28 +264,56 @@ class RemoteActorError(Exception): return '' + def _mk_fields_str( + self, + fields: list[str], + end_char: str = '\n', + ) -> str: + _repr: str = '' + for key in fields: + val: Any|None = ( + getattr(self, key, None) + or + self.msgdata.get(key) + ) + # TODO: for `.relay_path` on multiline? + # if not isinstance(val, str): + # val_str = pformat(val) + # else: + val_str: str = repr(val) + + if val: + _repr += f'{key}={val_str}{end_char}' + + return _repr + def reprol(self) -> str: ''' Represent this error for "one line" display, like in a field of our `Context.__repr__()` output. ''' - _repr: str = f'{type(self).__name__}(' - for key in self.reprol_fields: - val: Any|None = self.msgdata.get(key) - if val: - _repr += f'{key}={repr(val)} ' - - return _repr + # TODO: use this matryoshka emjoi XD + # => 🪆 + reprol_str: str = f'{type(self).__name__}(' + _repr: str = self._mk_fields_str( + self.reprol_fields, + end_char=' ', + ) + return ( + reprol_str + + + _repr + ) def __repr__(self) -> str: + ''' + Nicely formatted boxed error meta data + traceback. - fields: str = '' - for key in _body_fields: - val: str|None = self.msgdata.get(key) - if val: - fields += f'{key}={val}\n' - + ''' + fields: str = self._mk_fields_str( + _body_fields, + ) fields: str = textwrap.indent( fields, # prefix=' '*2, @@ -165,8 +328,6 @@ class RemoteActorError(Exception): f' ------ - ------\n' f' _|\n' ) - # f'|\n' - # f' |\n' if indent: body: str = textwrap.indent( body, @@ -178,9 +339,47 @@ class RemoteActorError(Exception): ')>' ) - # TODO: local recontruction of remote exception deats + def unwrap( + self, + ) -> BaseException: + ''' + Unpack the inner-most source error from it's original IPC msg data. + + We attempt to reconstruct (as best as we can) the original + `Exception` from as it would have been raised in the + failing actor's remote env. + + ''' + src_type_ref: Type[BaseException] = self.src_type + if not src_type_ref: + raise TypeError( + 'Failed to lookup src error type:\n' + f'{self.src_type_str}' + ) + + # TODO: better tb insertion and all the fancier dunder + # metadata stuff as per `.__context__` etc. and friends: + # https://github.com/python-trio/trio/issues/611 + return src_type_ref(self.tb_str) + + # TODO: local recontruction of nested inception for a given + # "hop" / relay-node in this error's relay_path? + # => so would render a `RAE[RAE[RAE[Exception]]]` instance + # with all inner errors unpacked? + # -[ ] if this is useful shouldn't be too hard to impl right? # def unbox(self) -> BaseException: - # ... + # ''' + # Unbox to the prior relays (aka last boxing actor's) + # inner error. + + # ''' + # if not self.relay_path: + # return self.unwrap() + + # # TODO.. + # # return self.boxed_type( + # # boxed_type=get_type_ref(.. + # raise NotImplementedError class InternalActorError(RemoteActorError): @@ -232,7 +431,7 @@ class ContextCancelled(RemoteActorError): f'{self}' ) - # to make `.__repr__()` work uniformly + # TODO: to make `.__repr__()` work uniformly? # src_actor_uid = canceller @@ -283,7 +482,8 @@ class MessagingError(Exception): def pack_error( - exc: BaseException, + exc: BaseException|RemoteActorError, + tb: str|None = None, cid: str|None = None, @@ -300,27 +500,60 @@ def pack_error( else: tb_str = traceback.format_exc() + our_uid: tuple = current_actor().uid error_msg: dict[ str, str | tuple[str, str] ] = { 'tb_str': tb_str, - 'type_str': type(exc).__name__, - 'boxed_type': type(exc).__name__, - 'src_actor_uid': current_actor().uid, + 'relay_uid': our_uid, } - # TODO: ?just wholesale proxy `.msgdata: dict`? - # XXX WARNING, when i swapped these ctx-semantics - # tests started hanging..???!!!??? - # if msgdata := exc.getattr('msgdata', {}): - # error_msg.update(msgdata) if ( - isinstance(exc, ContextCancelled) - or isinstance(exc, StreamOverrun) + isinstance(exc, RemoteActorError) ): error_msg.update(exc.msgdata) + # an onion/inception we need to pack + if ( + type(exc) is RemoteActorError + and exc.boxed_type != RemoteActorError + ): + # sanity on source error (if needed when tweaking this) + assert (src_type := exc.src_type) != RemoteActorError + assert error_msg['src_type_str'] != 'RemoteActorError' + assert error_msg['src_type_str'] == src_type.__name__ + assert error_msg['src_uid'] != our_uid + + # set the boxed type to be another boxed type thus + # creating an "inception" when unpacked by + # `unpack_error()` in another actor who gets "relayed" + # this error Bo + # + # NOTE on WHY: since we are re-boxing and already + # boxed src error, we want to overwrite the original + # `boxed_type_str` and instead set it to the type of + # the input `exc` type. + error_msg['boxed_type_str'] = 'RemoteActorError' + + # import pdbp; pdbp.set_trace() + # log.debug( + # 'INCEPTION packing!\n\n' + # f'{pformat(exc.msgdata)}\n\n' + # f'{exc}\n' + # ) + + else: + error_msg['src_uid'] = our_uid + error_msg['src_type_str'] = type(exc).__name__ + error_msg['boxed_type_str'] = type(exc).__name__ + + # XXX alawys append us the last relay in error propagation path + error_msg.setdefault( + 'relay_path', + [], + ).append(our_uid) + pkt: dict = {'error': error_msg} if cid: pkt['cid'] = cid @@ -329,12 +562,10 @@ def pack_error( def unpack_error( - msg: dict[str, Any], chan: Channel|None = None, box_type: RemoteActorError = RemoteActorError, - hide_tb: bool = True, ) -> None|Exception: @@ -357,33 +588,38 @@ def unpack_error( # retrieve the remote error's msg encoded details tb_str: str = error_dict.get('tb_str', '') - message: str = f'{chan.uid}\n' + tb_str - type_name: str = ( - error_dict.get('type_str') - or error_dict['boxed_type'] + message: str = ( + f'{chan.uid}\n' + + + tb_str ) - suberror_type: Type[BaseException] = Exception + boxed_type_str: str = ( + # TODO: deprecate this! + error_dict.get('boxed_type_str') + # or error_dict['boxed_type'] + ) + boxed_type: Type[BaseException] = Exception - if type_name == 'ContextCancelled': - box_type = ContextCancelled - suberror_type = box_type + if boxed_type_str == 'ContextCancelled': + boxed_type = box_type = ContextCancelled - else: # try to lookup a suitable local error type - for ns in [ - builtins, - _this_mod, - trio, - ]: - if suberror_type := getattr( - ns, - type_name, - False, - ): - break + # TODO: already included by `_this_mod` in else loop right? + # + # we have an inception/onion-error so ensure + # we include the relay_path info and the + # original source error. + elif boxed_type_str == 'RemoteActorError': + boxed_type = RemoteActorError + assert len(error_dict['relay_path']) >= 1 + + # try to lookup a suitable error type + # from the local runtime env. + else: + boxed_type = get_err_type(boxed_type_str) exc = box_type( message, - suberror_type=suberror_type, + boxed_type=boxed_type, # unpack other fields into error type init **error_dict, @@ -501,6 +737,11 @@ def _raise_from_no_key_in_msg( # destined for the `Context.result()` call during ctx-exit! stream._eoc: Exception = eoc + # in case there already is some underlying remote error + # that arrived which is probably the source of this stream + # closure + ctx.maybe_raise() + raise eoc from src_err if ( diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 6bdc0c6..d369b41 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -273,7 +273,10 @@ async def _errors_relayed_via_ipc( entered_debug = await _debug._maybe_enter_pm(err) if not entered_debug: - log.exception('Actor crashed:\n') + log.exception( + 'RPC task crashed\n' + f'|_{ctx}' + ) # always (try to) ship RPC errors back to caller if is_rpc: @@ -613,7 +616,8 @@ async def _invoke( # other side. ctxc = ContextCancelled( msg, - suberror_type=trio.Cancelled, + boxed_type=trio.Cancelled, + # boxed_type_str='Cancelled', canceller=canceller, ) # assign local error so that the `.outcome` @@ -671,7 +675,7 @@ async def _invoke( f'`{repr(ctx.outcome)}`', ) ) - log.cancel( + log.runtime( f'IPC context terminated with a final {res_type_str}\n\n' f'{ctx}\n' ) @@ -704,12 +708,6 @@ async def try_ship_error_to_remote( # TODO: special tb fmting for ctxc cases? # tb=tb, ) - # NOTE: the src actor should always be packed into the - # error.. but how should we verify this? - # actor: Actor = _state.current_actor() - # assert err_msg['src_actor_uid'] - # if not err_msg['error'].get('src_actor_uid'): - # import pdbp; pdbp.set_trace() await channel.send(msg) # XXX NOTE XXX in SC terms this is one of the worst things