diff --git a/examples/debugging/fast_error_in_root_after_spawn.py b/examples/debugging/fast_error_in_root_after_spawn.py index d22833f..044815b 100644 --- a/examples/debugging/fast_error_in_root_after_spawn.py +++ b/examples/debugging/fast_error_in_root_after_spawn.py @@ -1,8 +1,9 @@ ''' -fast fail test with a context. -ensure the partially initialized sub-actor process +Fast fail test with a context. + +Ensure the partially initialized sub-actor process doesn't cause a hang on error/cancel of the parent -nrusery. +nursery. ''' import trio diff --git a/examples/debugging/root_timeout_while_child_crashed.py b/examples/debugging/root_timeout_while_child_crashed.py index 09a9003..e313672 100644 --- a/examples/debugging/root_timeout_while_child_crashed.py +++ b/examples/debugging/root_timeout_while_child_crashed.py @@ -20,9 +20,9 @@ async def main(): # spawn both actors portal = await n.run_in_actor(key_error) - # XXX: originally a bug causes by this - # where root would enter debugger even - # though child should have it locked. + # XXX: originally a bug caused by this is where root would enter + # the debugger and clobber the tty used by the repl even though + # child should have it locked. with trio.fail_after(1): await trio.Event().wait() diff --git a/tests/test_debugger.py b/tests/test_debugger.py index 277663c..a3eb040 100644 --- a/tests/test_debugger.py +++ b/tests/test_debugger.py @@ -325,7 +325,7 @@ def test_multi_daemon_subactors(spawn, loglevel): # NOTE: previously since we did not have clobber prevention # in the root actor this final resume could result in the debugger # tearing down since both child actors would be cancelled and it was - # unlikely that `bp_forever` would re-acquire the tty loack again. + # unlikely that `bp_forever` would re-acquire the tty lock again. # Now, we should have a final resumption in the root plus a possible # second entry by `bp_forever`. @@ -335,7 +335,7 @@ def test_multi_daemon_subactors(spawn, loglevel): assert next_msg in before - # XXX: hoorayy the root clobering the child here was fixed! + # XXX: hooray the root clobbering the child here was fixed! # IMO, this demonstrates the true power of SC system design. # now the root actor won't clobber the bp_forever child @@ -412,9 +412,9 @@ def test_multi_subactors_root_errors(spawn): def test_multi_nested_subactors_error_through_nurseries(spawn): """Verify deeply nested actors that error trigger debugger entries at each actor nurserly (level) all the way up the tree. - """ - # NOTE: previously, inside this script was a a bug where if the + """ + # NOTE: previously, inside this script was a bug where if the # parent errors before a 2-levels-lower actor has released the lock, # the parent tries to cancel it but it's stuck in the debugger? # A test (below) has now been added to explicitly verify this is @@ -422,9 +422,6 @@ def test_multi_nested_subactors_error_through_nurseries(spawn): child = spawn('multi_nested_subactors_error_up_through_nurseries') - # startup time can be iffy - # time.sleep(1) - for i in range(12): try: child.expect(r"\(Pdb\+\+\)") @@ -502,7 +499,7 @@ def test_root_nursery_cancels_before_child_releases_tty_lock( child.expect(pexpect.EOF) break except pexpect.exceptions.TIMEOUT: - print('child was ablel to grab tty lock again?') + print('child was able to grab tty lock again?') if not timed_out_early: diff --git a/tractor/_debug.py b/tractor/_debug.py index cef3d8e..4fc7495 100644 --- a/tractor/_debug.py +++ b/tractor/_debug.py @@ -66,7 +66,7 @@ class PdbwTeardown(pdbpp.Pdb): # override the pdbpp config with our coolio one DefaultConfig = TractorConfig - # TODO: figure out how to dissallow recursive .set_trace() entry + # TODO: figure out how to disallow recursive .set_trace() entry # since that'll cause deadlock for us. def set_continue(self): try: @@ -125,9 +125,14 @@ class PdbwTeardown(pdbpp.Pdb): @asynccontextmanager async def _acquire_debug_lock( uid: Tuple[str, str] + ) -> AsyncIterator[trio.StrictFIFOLock]: - '''Acquire a actor local FIFO lock meant to mutex entry to a local - debugger entry point to avoid tty clobbering a global root process. + '''Acquire a root-actor local FIFO lock which tracks mutex access of + the process tree's global debugger breakpoint. + + This lock avoids tty clobbering (by preventing multiple processes + reading from stdstreams) and ensures multi-actor, sequential access + to the ``pdb`` repl. ''' global _debug_lock, _global_actor_in_debug, _no_remote_has_tty @@ -153,21 +158,18 @@ async def _acquire_debug_lock( we_acquired = True await _debug_lock.acquire() - # we_acquired = True - _global_actor_in_debug = uid log.debug(f"TTY lock acquired, remote task: {task_name}:{uid}") - # NOTE: critical section! - # this yield is unshielded. - # IF we received a cancel during the shielded lock - # entry of some next-in-queue requesting task, - # then the resumption here will result in that - # Cancelled being raised to our caller below! + # NOTE: critical section: this yield is unshielded! - # in this case the finally below should trigger - # and the surrounding calle side context should cancel - # normally relaying back to the caller. + # IF we received a cancel during the shielded lock entry of some + # next-in-queue requesting task, then the resumption here will + # result in that ``trio.Cancelled`` being raised to our caller + # (likely from ``_hijack_stdin_for_child()`` below)! In + # this case the ``finally:`` below should trigger and the + # surrounding caller side context should cancel normally + # relaying back to the caller. yield _debug_lock @@ -194,19 +196,8 @@ async def _acquire_debug_lock( log.debug(f"TTY lock released, remote task: {task_name}:{uid}") -# @contextmanager -# def _disable_sigint(): -# try: -# # disable sigint handling while in debug -# prior_handler = signal.signal(signal.SIGINT, handler) -# yield -# finally: -# # restore SIGINT handling -# signal.signal(signal.SIGINT, prior_handler) - - @tractor.context -async def _hijack_stdin_relay_to_child( +async def _hijack_stdin_for_child( ctx: tractor.Context, subactor_uid: Tuple[str, str] @@ -235,8 +226,7 @@ async def _hijack_stdin_relay_to_child( # indicate to child that we've locked stdio await ctx.started('Locked') - log.pdb( # type: ignore - f"Actor {subactor_uid} ACQUIRED stdin hijack lock") + log.pdb(f"Actor {subactor_uid} ACQUIRED stdin hijack lock") # wait for unlock pdb by child async with ctx.open_stream() as stream: @@ -245,14 +235,13 @@ async def _hijack_stdin_relay_to_child( except trio.BrokenResourceError: # XXX: there may be a race with the portal teardown - # with the calling actor which we can safely ignore - # the alternative would be sending an ack message + # with the calling actor which we can safely ignore. + # The alternative would be sending an ack message # and allowing the client to wait for us to teardown # first? pass - log.debug( - f"TTY lock released, remote task: {task_name}:{subactor_uid}") + log.debug(f"TTY lock released, remote task: {task_name}:{subactor_uid}") return "pdb_unlock_complete" @@ -299,7 +288,7 @@ async def _breakpoint( # this syncs to child's ``Context.started()`` call. async with portal.open_context( - tractor._debug._hijack_stdin_relay_to_child, + tractor._debug._hijack_stdin_for_child, subactor_uid=actor.uid, ) as (ctx, val): @@ -377,8 +366,7 @@ async def _breakpoint( # may have the tty locked prior global _debug_lock - # TODO: wait, what about multiple root tasks acquiring - # it though.. shrug? + # TODO: wait, what about multiple root tasks acquiring it though? # root process (us) already has it; ignore if _global_actor_in_debug == actor.uid: return @@ -408,8 +396,8 @@ async def _breakpoint( _pdb_release_hook = teardown - # block here one (at the appropriate frame *up* where - # ``breakpoint()`` was awaited and begin handling stdio + # block here one (at the appropriate frame *up*) where + # ``breakpoint()`` was awaited and begin handling stdio. log.debug("Entering the synchronous world of pdb") debug_func(actor) diff --git a/tractor/_root.py b/tractor/_root.py index b9306b6..bfaaf4f 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -174,7 +174,7 @@ async def open_root_actor( yield actor except (Exception, trio.MultiError) as err: - # with trio.CancelScope(shield=True): + entered = await _debug._maybe_enter_pm(err) if not entered: diff --git a/tractor/_trionics.py b/tractor/_trionics.py index 40abc2b..2eb64d1 100644 --- a/tractor/_trionics.py +++ b/tractor/_trionics.py @@ -283,9 +283,10 @@ async def _open_and_supervise_one_cancels_all_nursery( if is_root_process(): log.exception(f"we're root with {err}") - # wait to see if a sub-actor task - # will be scheduled and grab the tty - # lock on the next tick + # TODO: could this make things more deterministic? + # wait to see if a sub-actor task will be + # scheduled and grab the tty lock on the next + # tick? # await trio.testing.wait_all_tasks_blocked() debug_complete = _debug._no_remote_has_tty