From 0956d5f46109945f72d49a40b6a867332974a2df Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 13 Oct 2022 13:12:17 -0400 Subject: [PATCH] Restore the `trio` SIGINT handler, cancel root lock tasks on no-peers Pretty sure this is the final touch to alleviate all our debug lock headaches! Instead of trying to revert to the "last" handler (as `pdb` does internally in the stdlib) we always just revert to the handler `trio` registers during startup. Further this seems to allow cancelling the root-side locking task if it's detected as stale IFF we only do this when the root actor is in a "no more IPC peers" state. Deatz: - (always) set `._debug.Lock._trio_handler` as the `trio` version, not some last used handler to make sure we're getting the ctrl-c handling we want when not in debug mode. - assign the trio handler in `open_root_actor()` `._runtime._async_main()` to be sure it's applied in subactors as well as the root. - only do debug lock blocking and root-side-locking-task cancels when a "no peers" condition is detected in the root actor: i.e. no IPC channels are detected by the root meaning it's impossible any actor has a sane lock-state ongoing for debug mode. --- tractor/_debug.py | 48 ++++++++++++++++++++++++++++++--------------- tractor/_root.py | 16 ++++++++++++--- tractor/_runtime.py | 32 ++++++++++++++++++------------ 3 files changed, 64 insertions(+), 32 deletions(-) diff --git a/tractor/_debug.py b/tractor/_debug.py index 751c646..8fded72 100644 --- a/tractor/_debug.py +++ b/tractor/_debug.py @@ -75,8 +75,10 @@ class Lock: # placeholder for function to set a ``trio.Event`` on debugger exit # pdb_release_hook: Optional[Callable] = None + _trio_handler: Callable | None = None + # actor-wide variable pointing to current task name using debugger - local_task_in_debug: Optional[str] = None + local_task_in_debug: str | None = None # NOTE: set by the current task waiting on the root tty lock from # the CALLER side of the `lock_tty_for_child()` context entry-call @@ -105,19 +107,16 @@ class Lock: @classmethod def shield_sigint(cls): cls._orig_sigint_handler = signal.signal( - signal.SIGINT, - shield_sigint, - ) + signal.SIGINT, + shield_sigint, + ) @classmethod def unshield_sigint(cls): - if cls._orig_sigint_handler is not None: - # restore original sigint handler - signal.signal( - signal.SIGINT, - cls._orig_sigint_handler - ) - + # always restore ``trio``'s sigint handler. see notes below in + # the pdb factory about the nightmare that is that code swapping + # out the handler when the repl activates... + signal.signal(signal.SIGINT, cls._trio_handler) cls._orig_sigint_handler = None @classmethod @@ -544,7 +543,7 @@ def shield_sigint( ) -> None: ''' - Specialized debugger compatible SIGINT handler. + Specialized, debugger-aware SIGINT handler. In childred we always ignore to avoid deadlocks since cancellation should always be managed by the parent supervising actor. The root @@ -601,6 +600,8 @@ def shield_sigint( # which has already terminated to unlock. and any_connected ): + # we are root and some actor is in debug mode + # if uid_in_debug is not None: name = uid_in_debug[0] if name != 'root': log.pdb( @@ -611,6 +612,22 @@ def shield_sigint( log.pdb( "Ignoring SIGINT while in debug mode" ) + elif ( + is_root_process() + ): + log.pdb( + "Ignoring SIGINT since debug mode is enabled" + ) + + # revert back to ``trio`` handler asap! + Lock.unshield_sigint() + if ( + Lock._root_local_task_cs_in_debug + and not Lock._root_local_task_cs_in_debug.cancel_called + ): + Lock._root_local_task_cs_in_debug.cancel() + + # raise KeyboardInterrupt # child actor that has locked the debugger elif not is_root_process(): @@ -636,10 +653,9 @@ def shield_sigint( # https://github.com/goodboy/tractor/issues/320 # elif debug_mode(): - else: - log.pdb( - "Ignoring SIGINT since debug mode is enabled" - ) + else: # XXX: shouldn't ever get here? + print("WTFWTFWTF") + raise KeyboardInterrupt # NOTE: currently (at least on ``fancycompleter`` 0.9.2) # it lookks to be that the last command that was run (eg. ll) diff --git a/tractor/_root.py b/tractor/_root.py index 16c4bb8..cb405f5 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -23,6 +23,7 @@ from functools import partial import importlib import logging import os +import signal from typing import ( Optional, ) @@ -33,7 +34,11 @@ import warnings from exceptiongroup import BaseExceptionGroup import trio -from ._runtime import Actor, Arbiter, async_main +from ._runtime import ( + Actor, + Arbiter, + async_main, +) from . import _debug from . import _spawn from . import _state @@ -76,14 +81,19 @@ async def open_root_actor( rpc_module_paths: Optional[list] = None, ) -> typing.Any: - """Async entry point for ``tractor``. + ''' + Runtime init entry point for ``tractor``. - """ + ''' # Override the global debugger hook to make it play nice with # ``trio``, see: # https://github.com/python-trio/trio/issues/1155#issuecomment-742964018 os.environ['PYTHONBREAKPOINT'] = 'tractor._debug._set_trace' + # attempt to retreive ``trio``'s sigint handler and stash it + # on our debugger lock state. + _debug.Lock._trio_handler = signal.getsignal(signal.SIGINT) + # mark top most level process as root actor _state._runtime_vars['_is_root'] = True diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 5a3a693..9f8fed0 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -25,14 +25,15 @@ from itertools import chain import importlib import importlib.util import inspect -import uuid +import signal +import sys from typing import ( Any, Optional, Union, TYPE_CHECKING, Callable, ) +import uuid from types import ModuleType -import sys import os from contextlib import ExitStack import warnings @@ -709,6 +710,14 @@ class Actor: log.runtime(f"No more channels for {chan.uid}") self._peers.pop(uid, None) + log.runtime(f"Peers is {self._peers}") + + # No more channels to other actors (at all) registered + # as connected. + if not self._peers: + log.runtime("Signalling no more peer channel connections") + self._no_more_peers.set() + # NOTE: block this actor from acquiring the # debugger-TTY-lock since we have no way to know if we # cancelled it and further there is no way to ensure the @@ -722,23 +731,16 @@ class Actor: # if a now stale local task has the TTY lock still # we cancel it to allow servicing other requests for # the lock. + db_cs = pdb_lock._root_local_task_cs_in_debug if ( - pdb_lock._root_local_task_cs_in_debug - and not pdb_lock._root_local_task_cs_in_debug.cancel_called + db_cs + and not db_cs.cancel_called ): log.warning( f'STALE DEBUG LOCK DETECTED FOR {uid}' ) # TODO: figure out why this breaks tests.. - # pdb_lock._root_local_task_cs_in_debug.cancel() - - log.runtime(f"Peers is {self._peers}") - - # No more channels to other actors (at all) registered - # as connected. - if not self._peers: - log.runtime("Signalling no more peer channel connections") - self._no_more_peers.set() + db_cs.cancel() # XXX: is this necessary (GC should do it)? if chan.connected(): @@ -1229,6 +1231,10 @@ async def async_main( and when cancelled effectively cancels the actor. ''' + # attempt to retreive ``trio``'s sigint handler and stash it + # on our debugger lock state. + _debug.Lock._trio_handler = signal.getsignal(signal.SIGINT) + registered_with_arbiter = False try: