forked from goodboy/tractor
				
			Refine the handler for child vs. root cases
This gets very close to avoiding any possible hangs to do with tty
locking and SIGINT handling minus a special case that will be detailed
below.
Summary of implementation changes:
- convert `_mk_pdb()` -> `with _open_pdb() as pdb:` which implicitly
  handles the `bdb.BdbQuit` case such that debugger teardown hooks are
  always called.
- rename the handler to `shield_sigint()` and handle a variety of new
  cases:
  * the root is in debug but hasn't been cancelled -> call
    `Actor.cancel_soon()`
  * the root is in debug but *has* been called (`Actor.cancel_soon()`
    already called) -> raise KBI
  * a child is in debug *and* has a task locking the debugger -> ignore
    SIGINT in child *and* the root actor.
- if the debugger instance is provided to the handler at acquire time,
  on SIGINT handling completion re-print the last pdb++ REPL output so
  that the user realizes they are still actively in debug.
- ignore the unlock case where a race condition of "no task" holding the
  lock causes the `RuntimeError` normally associated with the "wrong
  task" doing so (not sure if this is a `trio` bug?).
- change debug logs to runtime level.
Unhandled case(s):
- a child is maybe in debug mode but does not itself have any task using
  the debugger.
    * ToDo: we need a way to decide what to do with
      "intermediate" child actors who themselves either are not in
      `debug_mode=True` but have children who *are* such that a SIGINT
      won't cause cancellation of that child-as-parent-of-another-child
      **iff** any of their children are in in debug mode.
			
			
				sigintsaviour_citesthackin
			
			
		
							parent
							
								
									6b7b58346f
								
							
						
					
					
						commit
						4e60c17375
					
				| 
						 | 
				
			
			@ -20,6 +20,7 @@ Multi-core debugging for da peeps!
 | 
			
		|||
"""
 | 
			
		||||
import bdb
 | 
			
		||||
import sys
 | 
			
		||||
import signal
 | 
			
		||||
from functools import partial
 | 
			
		||||
from contextlib import asynccontextmanager as acm
 | 
			
		||||
from contextlib import contextmanager as cm
 | 
			
		||||
| 
						 | 
				
			
			@ -163,7 +164,7 @@ async def _acquire_debug_lock(
 | 
			
		|||
 | 
			
		||||
    task_name = trio.lowlevel.current_task().name
 | 
			
		||||
 | 
			
		||||
    log.debug(
 | 
			
		||||
    log.runtime(
 | 
			
		||||
        f"Attempting to acquire TTY lock, remote task: {task_name}:{uid}"
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -176,14 +177,14 @@ async def _acquire_debug_lock(
 | 
			
		|||
        _no_remote_has_tty = trio.Event()
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        log.debug(
 | 
			
		||||
        log.runtime(
 | 
			
		||||
            f"entering lock checkpoint, remote task: {task_name}:{uid}"
 | 
			
		||||
        )
 | 
			
		||||
        we_acquired = True
 | 
			
		||||
        await _debug_lock.acquire()
 | 
			
		||||
 | 
			
		||||
        _global_actor_in_debug = uid
 | 
			
		||||
        log.debug(f"TTY lock acquired, remote task: {task_name}:{uid}")
 | 
			
		||||
        log.runtime(f"TTY lock acquired, remote task: {task_name}:{uid}")
 | 
			
		||||
 | 
			
		||||
        # NOTE: critical section: this yield is unshielded!
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -199,7 +200,10 @@ async def _acquire_debug_lock(
 | 
			
		|||
 | 
			
		||||
    finally:
 | 
			
		||||
        # if _global_actor_in_debug == uid:
 | 
			
		||||
        if we_acquired and _debug_lock.locked():
 | 
			
		||||
        if (
 | 
			
		||||
            we_acquired
 | 
			
		||||
            and _debug_lock.locked()
 | 
			
		||||
        ):
 | 
			
		||||
            _debug_lock.release()
 | 
			
		||||
 | 
			
		||||
        # IFF there are no more requesting tasks queued up fire, the
 | 
			
		||||
| 
						 | 
				
			
			@ -211,13 +215,13 @@ async def _acquire_debug_lock(
 | 
			
		|||
        if (
 | 
			
		||||
            not stats.owner
 | 
			
		||||
        ):
 | 
			
		||||
            log.debug(f"No more tasks waiting on tty lock! says {uid}")
 | 
			
		||||
            log.runtime(f"No more tasks waiting on tty lock! says {uid}")
 | 
			
		||||
            _no_remote_has_tty.set()
 | 
			
		||||
            _no_remote_has_tty = None
 | 
			
		||||
 | 
			
		||||
        _global_actor_in_debug = None
 | 
			
		||||
 | 
			
		||||
        log.debug(f"TTY lock released, remote task: {task_name}:{uid}")
 | 
			
		||||
        log.runtime(f"TTY lock released, remote task: {task_name}:{uid}")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@tractor.context
 | 
			
		||||
| 
						 | 
				
			
			@ -262,9 +266,6 @@ async def _hijack_stdin_for_child(
 | 
			
		|||
                async with ctx.open_stream() as stream:
 | 
			
		||||
                    assert await stream.receive() == 'pdb_unlock'
 | 
			
		||||
 | 
			
		||||
                # try:
 | 
			
		||||
                #     assert await stream.receive() == 'pdb_unlock'
 | 
			
		||||
 | 
			
		||||
        except (
 | 
			
		||||
            # BaseException,
 | 
			
		||||
            trio.MultiError,
 | 
			
		||||
| 
						 | 
				
			
			@ -272,6 +273,7 @@ async def _hijack_stdin_for_child(
 | 
			
		|||
            trio.Cancelled,  # by local cancellation
 | 
			
		||||
            trio.ClosedResourceError,  # by self._rx_chan
 | 
			
		||||
        ) as err:
 | 
			
		||||
 | 
			
		||||
            # XXX: there may be a race with the portal teardown
 | 
			
		||||
            # with the calling actor which we can safely ignore.
 | 
			
		||||
            # The alternative would be sending an ack message
 | 
			
		||||
| 
						 | 
				
			
			@ -282,10 +284,12 @@ async def _hijack_stdin_for_child(
 | 
			
		|||
 | 
			
		||||
            if isinstance(err, trio.Cancelled):
 | 
			
		||||
                raise
 | 
			
		||||
 | 
			
		||||
        finally:
 | 
			
		||||
            log.debug(
 | 
			
		||||
            log.runtime(
 | 
			
		||||
                "TTY lock released, remote task:"
 | 
			
		||||
                f"{task_name}:{subactor_uid}")
 | 
			
		||||
                f"{task_name}:{subactor_uid}"
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
    return "pdb_unlock_complete"
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -358,8 +362,9 @@ async def _breakpoint(
 | 
			
		|||
    # shield: bool = False
 | 
			
		||||
 | 
			
		||||
) -> None:
 | 
			
		||||
    '''``tractor`` breakpoint entry for engaging pdb machinery
 | 
			
		||||
    in the root or a subactor.
 | 
			
		||||
    '''
 | 
			
		||||
    breakpoint entry for engaging pdb machinery in the root or
 | 
			
		||||
    a subactor.
 | 
			
		||||
 | 
			
		||||
    '''
 | 
			
		||||
    __tracebackhide__ = True
 | 
			
		||||
| 
						 | 
				
			
			@ -462,7 +467,7 @@ async def _breakpoint(
 | 
			
		|||
                # that locked?
 | 
			
		||||
                owner = _debug_lock.statistics().owner
 | 
			
		||||
                if owner:
 | 
			
		||||
                   raise
 | 
			
		||||
                    raise
 | 
			
		||||
 | 
			
		||||
            _global_actor_in_debug = None
 | 
			
		||||
            _local_task_in_debug = None
 | 
			
		||||
| 
						 | 
				
			
			@ -497,7 +502,13 @@ def _open_pdb() -> PdbwTeardown:
 | 
			
		|||
        raise
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def disable_sigint_in_pdb(signum, frame, *args):
 | 
			
		||||
def shield_sigint(
 | 
			
		||||
    signum: int,
 | 
			
		||||
    frame: 'frame',  # type: ignore # noqa
 | 
			
		||||
    pdb: Optional[PdbwTeardown] = None,
 | 
			
		||||
    *args,
 | 
			
		||||
 | 
			
		||||
) -> None:
 | 
			
		||||
    '''
 | 
			
		||||
    Specialized debugger compatible SIGINT handler.
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -506,39 +517,86 @@ def disable_sigint_in_pdb(signum, frame, *args):
 | 
			
		|||
    is always cancelled on ctrl-c.
 | 
			
		||||
 | 
			
		||||
    '''
 | 
			
		||||
    actor = tractor.current_actor()
 | 
			
		||||
    if not actor._cancel_called:
 | 
			
		||||
        log.pdb(
 | 
			
		||||
            f"{actor.uid} is in debug and has not been cancelled, "
 | 
			
		||||
            "ignoring SIGINT\n"
 | 
			
		||||
        )
 | 
			
		||||
    else:
 | 
			
		||||
        log.pdb(
 | 
			
		||||
            f"{actor.uid} is already cancelling.."
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    global _global_actor_in_debug
 | 
			
		||||
    global _local_task_in_debug, _global_actor_in_debug
 | 
			
		||||
    in_debug = _global_actor_in_debug
 | 
			
		||||
 | 
			
		||||
    actor = tractor.current_actor()
 | 
			
		||||
 | 
			
		||||
    # root actor branch that reports whether or not a child
 | 
			
		||||
    # has locked debugger.
 | 
			
		||||
    if (
 | 
			
		||||
        is_root_process()
 | 
			
		||||
        and in_debug
 | 
			
		||||
    ):
 | 
			
		||||
        log.pdb(f'Root SIGINT disabled while {_global_actor_in_debug} is debugging')
 | 
			
		||||
 | 
			
		||||
        if in_debug[0] != 'root':
 | 
			
		||||
            pass
 | 
			
		||||
        name = in_debug[0]
 | 
			
		||||
        if name != 'root':
 | 
			
		||||
            log.pdb(
 | 
			
		||||
                f"Ignoring SIGINT while child in debug mode: `{in_debug}`"
 | 
			
		||||
            )
 | 
			
		||||
        else:
 | 
			
		||||
            log.pdb(
 | 
			
		||||
                "Ignoring SIGINT while in debug mode"
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
    # child actor that has locked the debugger
 | 
			
		||||
    elif (
 | 
			
		||||
        not is_root_process()
 | 
			
		||||
    ):
 | 
			
		||||
        task = _local_task_in_debug
 | 
			
		||||
        if task:
 | 
			
		||||
            log.pdb(
 | 
			
		||||
                f"Ignoring SIGINT while task in debug mode: `{task}`"
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        # TODO: how to handle the case of an intermediary-child actor
 | 
			
		||||
        # that **is not** marked in debug mode?
 | 
			
		||||
        # elif debug_mode():
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
            log.pdb(
 | 
			
		||||
                "Ignoring SIGINT since debug mode is enabled"
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
    # noone has the debugger so raise KBI
 | 
			
		||||
    else:
 | 
			
		||||
        # If we haven't tried to cancel the runtime then do that instead
 | 
			
		||||
        # of raising a KBI (which may non-gracefully destroy
 | 
			
		||||
        # a ``trio.run()``). 
 | 
			
		||||
        if not actor._cancel_called:
 | 
			
		||||
            actor.cancel_soon()
 | 
			
		||||
 | 
			
		||||
        # If the runtime is already cancelled it likely means the user
 | 
			
		||||
        # hit ctrl-c again because teardown didn't full take place in
 | 
			
		||||
        # which case we do the "hard" raising of a local KBI.
 | 
			
		||||
        else:
 | 
			
		||||
            # actor.cancel_soon()
 | 
			
		||||
            raise KeyboardInterrupt
 | 
			
		||||
 | 
			
		||||
    # maybe redraw/print last REPL output to console
 | 
			
		||||
    if pdb:
 | 
			
		||||
 | 
			
		||||
        # TODO: make this work like sticky mode where if there is output
 | 
			
		||||
        # detected as written to the tty we redraw this part underneath
 | 
			
		||||
        # and erase the past draw of this same bit above?
 | 
			
		||||
        # pdb.sticky = True
 | 
			
		||||
        # pdb._print_if_sticky()
 | 
			
		||||
 | 
			
		||||
        # also see these links for an approach from ``ptk``:
 | 
			
		||||
        # https://github.com/goodboy/tractor/issues/130#issuecomment-663752040
 | 
			
		||||
        # https://github.com/prompt-toolkit/python-prompt-toolkit/blob/c2c6af8a0308f9e5d7c0e28cb8a02963fe0ce07a/prompt_toolkit/patch_stdout.py
 | 
			
		||||
 | 
			
		||||
        pdb.do_longlist(None)
 | 
			
		||||
        print(pdb.prompt, end='', flush=True)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@cm
 | 
			
		||||
def disable_sigint():
 | 
			
		||||
def disable_sigint(
 | 
			
		||||
    pdb: Optional[PdbwTeardown] = None
 | 
			
		||||
 | 
			
		||||
) -> None:
 | 
			
		||||
    __tracebackhide__ = True
 | 
			
		||||
 | 
			
		||||
    # ensure the ``contextlib.contextmanager`` frame inside the wrapping
 | 
			
		||||
    # ``.__exit__()`` method isn't shown either.
 | 
			
		||||
    import sys
 | 
			
		||||
    frame = sys._getframe()
 | 
			
		||||
    frame.f_back.f_globals['__tracebackhide__'] = True
 | 
			
		||||
    # NOTE: this seems like a form of cpython bug wherein
 | 
			
		||||
| 
						 | 
				
			
			@ -548,10 +606,9 @@ def disable_sigint():
 | 
			
		|||
    # for manual debugging if necessary
 | 
			
		||||
    # pdb.set_trace()
 | 
			
		||||
 | 
			
		||||
    import signal
 | 
			
		||||
    orig_handler = signal.signal(
 | 
			
		||||
        signal.SIGINT,
 | 
			
		||||
        disable_sigint_in_pdb
 | 
			
		||||
        partial(shield_sigint, pdb=pdb),
 | 
			
		||||
    )
 | 
			
		||||
    try:
 | 
			
		||||
        yield
 | 
			
		||||
| 
						 | 
				
			
			@ -564,10 +621,9 @@ def disable_sigint():
 | 
			
		|||
 | 
			
		||||
def _set_trace(actor=None):
 | 
			
		||||
    __tracebackhide__ = True
 | 
			
		||||
    # pdb = _open_pdb()
 | 
			
		||||
    with (
 | 
			
		||||
        _open_pdb() as pdb,
 | 
			
		||||
        disable_sigint(),
 | 
			
		||||
        disable_sigint(pdb=pdb),
 | 
			
		||||
    ):
 | 
			
		||||
        if actor is not None:
 | 
			
		||||
            log.pdb(f"\nAttaching pdb to actor: {actor.uid}\n")
 | 
			
		||||
| 
						 | 
				
			
			@ -601,10 +657,9 @@ breakpoint = partial(
 | 
			
		|||
 | 
			
		||||
def _post_mortem(actor):
 | 
			
		||||
    __tracebackhide__ = True
 | 
			
		||||
    # pdb = _mk_pdb()
 | 
			
		||||
    with (
 | 
			
		||||
        _open_pdb() as pdb,
 | 
			
		||||
        disable_sigint(),
 | 
			
		||||
        disable_sigint(pdb=pdb),
 | 
			
		||||
    ):
 | 
			
		||||
        log.pdb(f"\nAttaching to pdb in crashed actor: {actor.uid}\n")
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue