From 8522f90000e7b84402aa70c419e164ce501e8a5f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 25 Dec 2020 15:07:36 -0500 Subject: [PATCH 1/6] Add type annots to exceptions mod Also add a `is_multi_cancelled()` predicate to test for `trio.MultiError`s that contain entirely cancel signals. Resolves #125 --- tractor/_exceptions.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 7ffe4cb..63e0d09 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -1,6 +1,7 @@ """ Our classy exception set. """ +from typing import Dict, Any import importlib import builtins import traceback @@ -14,7 +15,7 @@ _this_mod = importlib.import_module(__name__) class RemoteActorError(Exception): # TODO: local recontruction of remote exception deats "Remote actor exception bundled locally" - def __init__(self, message, type_str, **msgdata): + def __init__(self, message, type_str, **msgdata) -> None: super().__init__(message) for ns in [builtins, _this_mod, trio]: try: @@ -45,7 +46,7 @@ class ModuleNotExposed(ModuleNotFoundError): "The requested module is not exposed for RPC" -def pack_error(exc): +def pack_error(exc: BaseException) -> Dict[str, Any]: """Create an "error message" for tranmission over a channel (aka the wire). """ @@ -57,7 +58,11 @@ def pack_error(exc): } -def unpack_error(msg, chan=None, err_type=RemoteActorError): +def unpack_error( + msg: Dict[str, Any], + chan=None, + err_type=RemoteActorError +) -> Exception: """Unpack an 'error' message from the wire into a local ``RemoteActorError``. """ @@ -66,3 +71,15 @@ def unpack_error(msg, chan=None, err_type=RemoteActorError): f"{chan.uid}\n" + tb_str, **msg['error'], ) + + +def is_multi_cancelled(exc: BaseException) -> bool: + """Predicate to determine if a ``trio.MultiError`` contains only + ``trio.Cancelled`` sub-exceptions (and is likely the result of + cancelling a collection of subtasks. + + """ + return not trio.MultiError.filter( + lambda exc: exc if not isinstance(exc, trio.Cancelled) else None, + exc, + ) From 5d7a4e2b12997d6e711c06b775bfa78d809745f1 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 25 Dec 2020 15:10:20 -0500 Subject: [PATCH 2/6] Denoise some common teardown "errors" to warnings. --- tractor/_actor.py | 2 +- tractor/_debug.py | 6 ++---- tractor/_ipc.py | 2 +- tractor/_trionics.py | 5 ++++- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tractor/_actor.py b/tractor/_actor.py index 87e6139..25a4f77 100644 --- a/tractor/_actor.py +++ b/tractor/_actor.py @@ -144,7 +144,7 @@ async def _invoke( try: await chan.send(err_msg) except trio.ClosedResourceError: - log.exception( + log.warning( f"Failed to ship error to caller @ {chan.uid}") if cs is None: # error is from above code not from rpc invocation diff --git a/tractor/_debug.py b/tractor/_debug.py index f9af7a6..45cae31 100644 --- a/tractor/_debug.py +++ b/tractor/_debug.py @@ -15,6 +15,7 @@ from .log import get_logger from . import _state from ._discovery import get_root from ._state import is_root_process +from ._exceptions import is_multi_cancelled try: # wtf: only exported when installed in dev mode? @@ -318,10 +319,7 @@ async def _maybe_enter_pm(err): # Really we just want to mostly avoid catching KBIs here so there # might be a simpler check we can do? - and trio.MultiError.filter( - lambda exc: exc if not isinstance(exc, trio.Cancelled) else None, - err, - ) + and not is_multi_cancelled(err) ): log.warning("Actor crashed, entering debug mode") await post_mortem() diff --git a/tractor/_ipc.py b/tractor/_ipc.py index 7f6a498..32b8966 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -51,7 +51,7 @@ class MsgpackStream: data = await self.stream.receive_some(2**10) log.trace(f"received {data}") # type: ignore except trio.BrokenResourceError: - log.error(f"Stream connection {self.raddr} broke") + log.warning(f"Stream connection {self.raddr} broke") return if data == b'': diff --git a/tractor/_trionics.py b/tractor/_trionics.py index 4d9e15f..0d54b24 100644 --- a/tractor/_trionics.py +++ b/tractor/_trionics.py @@ -13,6 +13,7 @@ from ._state import current_actor from .log import get_logger, get_loglevel from ._actor import Actor from ._portal import Portal +from ._exceptions import is_multi_cancelled from . import _state from . import _spawn @@ -246,7 +247,9 @@ async def open_nursery() -> typing.AsyncGenerator[ActorNursery, None]: # For now, shield both. with trio.CancelScope(shield=True): etype = type(err) - if etype in (trio.Cancelled, KeyboardInterrupt): + if etype in (trio.Cancelled, KeyboardInterrupt) or ( + is_multi_cancelled(err) + ): log.warning( f"Nursery for {current_actor().uid} was " f"cancelled with {etype}") From c28ffd8b1cc88fb9569ab47ee97e9b8711380e54 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 25 Dec 2020 15:20:58 -0500 Subject: [PATCH 3/6] Don't exception log multi-cancels --- tractor/_actor.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tractor/_actor.py b/tractor/_actor.py index 25a4f77..3d7e6f0 100644 --- a/tractor/_actor.py +++ b/tractor/_actor.py @@ -25,7 +25,8 @@ from .log import get_logger from ._exceptions import ( pack_error, unpack_error, - ModuleNotExposed + ModuleNotExposed, + is_multi_cancelled, ) from . import _debug from ._discovery import get_arbiter @@ -129,7 +130,11 @@ async def _invoke( except (Exception, trio.MultiError) as err: - if not isinstance(err, trio.ClosedResourceError): + # TODO: maybe we'll want differnet "levels" of debugging + # eventualy such as ('app', 'supervisory', 'runtime') ? + if not isinstance(err, trio.ClosedResourceError) and ( + not is_multi_cancelled(err) + ): log.exception("Actor crashed:") # XXX: is there any case where we'll want to debug IPC # disconnects? I can't think of a reason that inspecting From 0d05a727b6662043c0526a3beff15815d13cb0c7 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 25 Dec 2020 15:28:32 -0500 Subject: [PATCH 4/6] Use error log level by default --- tractor/log.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tractor/log.py b/tractor/log.py index ecee017..a45b46d 100644 --- a/tractor/log.py +++ b/tractor/log.py @@ -11,7 +11,7 @@ from ._state import ActorContextInfo _proj_name = 'tractor' -_default_loglevel = None +_default_loglevel = 'ERROR' # Super sexy formatting thanks to ``colorlog``. # (NOTE: we use the '{' format style) From 6b650c0fe6c378f0eef8ae0a2cfd5c9d397e383a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 26 Dec 2020 15:11:18 -0500 Subject: [PATCH 5/6] Add a "runtime" log level --- tractor/_actor.py | 5 +++-- tractor/_debug.py | 17 ++++++++++++----- tractor/log.py | 2 ++ 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/tractor/_actor.py b/tractor/_actor.py index 3d7e6f0..3a53ed9 100644 --- a/tractor/_actor.py +++ b/tractor/_actor.py @@ -135,13 +135,14 @@ async def _invoke( if not isinstance(err, trio.ClosedResourceError) and ( not is_multi_cancelled(err) ): - log.exception("Actor crashed:") # XXX: is there any case where we'll want to debug IPC # disconnects? I can't think of a reason that inspecting # this type of failure will be useful for respawns or # recovery logic - the only case is some kind of strange bug # in `trio` itself? - await _debug._maybe_enter_pm(err) + entered = await _debug._maybe_enter_pm(err) + if not entered: + log.exception("Actor crashed:") # always ship errors back to caller err_msg = pack_error(err) diff --git a/tractor/_debug.py b/tractor/_debug.py index 45cae31..7f20c5e 100644 --- a/tractor/_debug.py +++ b/tractor/_debug.py @@ -122,13 +122,16 @@ async def _acquire_debug_lock(uid: Tuple[str, str]) -> AsyncIterator[None]: """ task_name = trio.lowlevel.current_task() try: - log.error(f"TTY BEING ACQUIRED by {task_name}:{uid}") + log.debug( + f"Attempting to acquire TTY lock, remote task: {task_name}:{uid}") await _debug_lock.acquire() - log.error(f"TTY lock acquired by {task_name}:{uid}") + + log.debug(f"TTY lock acquired, remote task: {task_name}:{uid}") yield + finally: _debug_lock.release() - log.error(f"TTY lock released by {task_name}:{uid}") + log.debug(f"TTY lock released, remote task: {task_name}:{uid}") # @contextmanager @@ -289,7 +292,7 @@ breakpoint = partial( def _post_mortem(actor): - log.critical(f"\nAttaching to pdb in crashed actor: {actor.uid}\n") + log.runtime(f"\nAttaching to pdb in crashed actor: {actor.uid}\n") pdb = _mk_pdb() # custom Pdb post-mortem entry @@ -321,5 +324,9 @@ async def _maybe_enter_pm(err): # might be a simpler check we can do? and not is_multi_cancelled(err) ): - log.warning("Actor crashed, entering debug mode") + log.debug("Actor crashed, entering debug mode") await post_mortem() + return True + + else: + return False diff --git a/tractor/log.py b/tractor/log.py index a45b46d..e8327a3 100644 --- a/tractor/log.py +++ b/tractor/log.py @@ -31,11 +31,13 @@ LEVELS = { 'GARBAGE': 1, 'TRACE': 5, 'PROFILE': 15, + 'RUNTIME': 500, 'QUIET': 1000, } STD_PALETTE = { 'CRITICAL': 'red', 'ERROR': 'red', + 'RUNTIME': 'white', 'WARNING': 'yellow', 'INFO': 'green', 'DEBUG': 'white', From 5127effd88f6d65a0084fb50d4e8ab009c0f7b5b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 26 Dec 2020 15:11:42 -0500 Subject: [PATCH 6/6] Drop warning level logging assert(s) --- examples/debugging/multi_subactor_root_errors.py | 5 +++++ tests/test_debugger.py | 8 +++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/examples/debugging/multi_subactor_root_errors.py b/examples/debugging/multi_subactor_root_errors.py index 62af999..8a7fa43 100644 --- a/examples/debugging/multi_subactor_root_errors.py +++ b/examples/debugging/multi_subactor_root_errors.py @@ -26,6 +26,11 @@ async def main(): ├─ python -m tractor._child --uid ('name_error', 'a7caf490 ...) `-python -m tractor._child --uid ('spawn_error', '52ee14a5 ...) `-python -m tractor._child --uid ('name_error', '3391222c ...) + + Order of failure: + - nested name_error sub-sub-actor + - root actor should then fail on assert + - program termination """ async with tractor.open_nursery() as n: diff --git a/tests/test_debugger.py b/tests/test_debugger.py index 5360b4b..1f5c6d2 100644 --- a/tests/test_debugger.py +++ b/tests/test_debugger.py @@ -343,11 +343,14 @@ def test_multi_subactors_root_errors(spawn): # should now get attached in root with assert error before = str(child.before.decode()) + # should have come just after priot prompt - assert "Cancelling nursery in ('spawn_error'," in before assert "Attaching to pdb in crashed actor: ('arbiter'" in before assert "AssertionError" in before + # warnings assert we probably don't need + # assert "Cancelling nursery in ('spawn_error'," in before + # continue again child.sendline('c') child.expect(pexpect.EOF) @@ -369,6 +372,9 @@ def test_multi_nested_subactors_error_through_nurseries(spawn): child = spawn('multi_nested_subactors_error_up_through_nurseries') + # startup time can be iffy + time.sleep(1) + for i in range(12): try: child.expect(r"\(Pdb\+\+\)")