From 4e60c1737569d6e267a5190cd25acc56a5a96b4e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 23 Jan 2022 17:04:49 -0500 Subject: [PATCH] Refine the handler for child vs. root cases This gets very close to avoiding any possible hangs to do with tty locking and SIGINT handling minus a special case that will be detailed below. Summary of implementation changes: - convert `_mk_pdb()` -> `with _open_pdb() as pdb:` which implicitly handles the `bdb.BdbQuit` case such that debugger teardown hooks are always called. - rename the handler to `shield_sigint()` and handle a variety of new cases: * the root is in debug but hasn't been cancelled -> call `Actor.cancel_soon()` * the root is in debug but *has* been called (`Actor.cancel_soon()` already called) -> raise KBI * a child is in debug *and* has a task locking the debugger -> ignore SIGINT in child *and* the root actor. - if the debugger instance is provided to the handler at acquire time, on SIGINT handling completion re-print the last pdb++ REPL output so that the user realizes they are still actively in debug. - ignore the unlock case where a race condition of "no task" holding the lock causes the `RuntimeError` normally associated with the "wrong task" doing so (not sure if this is a `trio` bug?). - change debug logs to runtime level. Unhandled case(s): - a child is maybe in debug mode but does not itself have any task using the debugger. * ToDo: we need a way to decide what to do with "intermediate" child actors who themselves either are not in `debug_mode=True` but have children who *are* such that a SIGINT won't cause cancellation of that child-as-parent-of-another-child **iff** any of their children are in in debug mode. --- tractor/_debug.py | 135 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 95 insertions(+), 40 deletions(-) diff --git a/tractor/_debug.py b/tractor/_debug.py index 54e0e80..af66143 100644 --- a/tractor/_debug.py +++ b/tractor/_debug.py @@ -20,6 +20,7 @@ Multi-core debugging for da peeps! """ import bdb import sys +import signal from functools import partial from contextlib import asynccontextmanager as acm from contextlib import contextmanager as cm @@ -163,7 +164,7 @@ async def _acquire_debug_lock( task_name = trio.lowlevel.current_task().name - log.debug( + log.runtime( f"Attempting to acquire TTY lock, remote task: {task_name}:{uid}" ) @@ -176,14 +177,14 @@ async def _acquire_debug_lock( _no_remote_has_tty = trio.Event() try: - log.debug( + log.runtime( f"entering lock checkpoint, remote task: {task_name}:{uid}" ) we_acquired = True await _debug_lock.acquire() _global_actor_in_debug = uid - log.debug(f"TTY lock acquired, remote task: {task_name}:{uid}") + log.runtime(f"TTY lock acquired, remote task: {task_name}:{uid}") # NOTE: critical section: this yield is unshielded! @@ -199,7 +200,10 @@ async def _acquire_debug_lock( finally: # if _global_actor_in_debug == uid: - if we_acquired and _debug_lock.locked(): + if ( + we_acquired + and _debug_lock.locked() + ): _debug_lock.release() # IFF there are no more requesting tasks queued up fire, the @@ -211,13 +215,13 @@ async def _acquire_debug_lock( if ( not stats.owner ): - log.debug(f"No more tasks waiting on tty lock! says {uid}") + log.runtime(f"No more tasks waiting on tty lock! says {uid}") _no_remote_has_tty.set() _no_remote_has_tty = None _global_actor_in_debug = None - log.debug(f"TTY lock released, remote task: {task_name}:{uid}") + log.runtime(f"TTY lock released, remote task: {task_name}:{uid}") @tractor.context @@ -262,9 +266,6 @@ async def _hijack_stdin_for_child( async with ctx.open_stream() as stream: assert await stream.receive() == 'pdb_unlock' - # try: - # assert await stream.receive() == 'pdb_unlock' - except ( # BaseException, trio.MultiError, @@ -272,6 +273,7 @@ async def _hijack_stdin_for_child( trio.Cancelled, # by local cancellation trio.ClosedResourceError, # by self._rx_chan ) as err: + # XXX: there may be a race with the portal teardown # with the calling actor which we can safely ignore. # The alternative would be sending an ack message @@ -282,10 +284,12 @@ async def _hijack_stdin_for_child( if isinstance(err, trio.Cancelled): raise + finally: - log.debug( + log.runtime( "TTY lock released, remote task:" - f"{task_name}:{subactor_uid}") + f"{task_name}:{subactor_uid}" + ) return "pdb_unlock_complete" @@ -358,8 +362,9 @@ async def _breakpoint( # shield: bool = False ) -> None: - '''``tractor`` breakpoint entry for engaging pdb machinery - in the root or a subactor. + ''' + breakpoint entry for engaging pdb machinery in the root or + a subactor. ''' __tracebackhide__ = True @@ -462,7 +467,7 @@ async def _breakpoint( # that locked? owner = _debug_lock.statistics().owner if owner: - raise + raise _global_actor_in_debug = None _local_task_in_debug = None @@ -497,7 +502,13 @@ def _open_pdb() -> PdbwTeardown: raise -def disable_sigint_in_pdb(signum, frame, *args): +def shield_sigint( + signum: int, + frame: 'frame', # type: ignore # noqa + pdb: Optional[PdbwTeardown] = None, + *args, + +) -> None: ''' Specialized debugger compatible SIGINT handler. @@ -506,39 +517,86 @@ def disable_sigint_in_pdb(signum, frame, *args): is always cancelled on ctrl-c. ''' - actor = tractor.current_actor() - if not actor._cancel_called: - log.pdb( - f"{actor.uid} is in debug and has not been cancelled, " - "ignoring SIGINT\n" - ) - else: - log.pdb( - f"{actor.uid} is already cancelling.." - ) - - global _global_actor_in_debug + global _local_task_in_debug, _global_actor_in_debug in_debug = _global_actor_in_debug + + actor = tractor.current_actor() + + # root actor branch that reports whether or not a child + # has locked debugger. if ( is_root_process() and in_debug ): - log.pdb(f'Root SIGINT disabled while {_global_actor_in_debug} is debugging') - - if in_debug[0] != 'root': - pass + name = in_debug[0] + if name != 'root': + log.pdb( + f"Ignoring SIGINT while child in debug mode: `{in_debug}`" + ) + else: + log.pdb( + "Ignoring SIGINT while in debug mode" + ) + + # child actor that has locked the debugger + elif ( + not is_root_process() + ): + task = _local_task_in_debug + if task: + log.pdb( + f"Ignoring SIGINT while task in debug mode: `{task}`" + ) + + # TODO: how to handle the case of an intermediary-child actor + # that **is not** marked in debug mode? + # elif debug_mode(): + + else: + log.pdb( + "Ignoring SIGINT since debug mode is enabled" + ) + + # noone has the debugger so raise KBI + else: + # If we haven't tried to cancel the runtime then do that instead + # of raising a KBI (which may non-gracefully destroy + # a ``trio.run()``). + if not actor._cancel_called: + actor.cancel_soon() + + # If the runtime is already cancelled it likely means the user + # hit ctrl-c again because teardown didn't full take place in + # which case we do the "hard" raising of a local KBI. else: - # actor.cancel_soon() raise KeyboardInterrupt + # maybe redraw/print last REPL output to console + if pdb: + + # TODO: make this work like sticky mode where if there is output + # detected as written to the tty we redraw this part underneath + # and erase the past draw of this same bit above? + # pdb.sticky = True + # pdb._print_if_sticky() + + # also see these links for an approach from ``ptk``: + # https://github.com/goodboy/tractor/issues/130#issuecomment-663752040 + # https://github.com/prompt-toolkit/python-prompt-toolkit/blob/c2c6af8a0308f9e5d7c0e28cb8a02963fe0ce07a/prompt_toolkit/patch_stdout.py + + pdb.do_longlist(None) + print(pdb.prompt, end='', flush=True) + @cm -def disable_sigint(): +def disable_sigint( + pdb: Optional[PdbwTeardown] = None + +) -> None: __tracebackhide__ = True # ensure the ``contextlib.contextmanager`` frame inside the wrapping # ``.__exit__()`` method isn't shown either. - import sys frame = sys._getframe() frame.f_back.f_globals['__tracebackhide__'] = True # NOTE: this seems like a form of cpython bug wherein @@ -548,10 +606,9 @@ def disable_sigint(): # for manual debugging if necessary # pdb.set_trace() - import signal orig_handler = signal.signal( signal.SIGINT, - disable_sigint_in_pdb + partial(shield_sigint, pdb=pdb), ) try: yield @@ -564,10 +621,9 @@ def disable_sigint(): def _set_trace(actor=None): __tracebackhide__ = True - # pdb = _open_pdb() with ( _open_pdb() as pdb, - disable_sigint(), + disable_sigint(pdb=pdb), ): if actor is not None: log.pdb(f"\nAttaching pdb to actor: {actor.uid}\n") @@ -601,10 +657,9 @@ breakpoint = partial( def _post_mortem(actor): __tracebackhide__ = True - # pdb = _mk_pdb() with ( _open_pdb() as pdb, - disable_sigint(), + disable_sigint(pdb=pdb), ): log.pdb(f"\nAttaching to pdb in crashed actor: {actor.uid}\n")