Refine the handler for child vs. root cases
This gets very close to avoiding any possible hangs to do with tty locking and SIGINT handling minus a special case that will be detailed below. Summary of implementation changes: - convert `_mk_pdb()` -> `with _open_pdb() as pdb:` which implicitly handles the `bdb.BdbQuit` case such that debugger teardown hooks are always called. - rename the handler to `shield_sigint()` and handle a variety of new cases: * the root is in debug but hasn't been cancelled -> call `Actor.cancel_soon()` * the root is in debug but *has* been called (`Actor.cancel_soon()` already called) -> raise KBI * a child is in debug *and* has a task locking the debugger -> ignore SIGINT in child *and* the root actor. - if the debugger instance is provided to the handler at acquire time, on SIGINT handling completion re-print the last pdb++ REPL output so that the user realizes they are still actively in debug. - ignore the unlock case where a race condition of "no task" holding the lock causes the `RuntimeError` normally associated with the "wrong task" doing so (not sure if this is a `trio` bug?). - change debug logs to runtime level. Unhandled case(s): - a child is maybe in debug mode but does not itself have any task using the debugger. * ToDo: we need a way to decide what to do with "intermediate" child actors who themselves either are not in `debug_mode=True` but have children who *are* such that a SIGINT won't cause cancellation of that child-as-parent-of-another-child **iff** any of their children are in in debug mode.sigintsaviour_citesthackin
parent
6b7b58346f
commit
4e60c17375
|
@ -20,6 +20,7 @@ Multi-core debugging for da peeps!
|
|||
"""
|
||||
import bdb
|
||||
import sys
|
||||
import signal
|
||||
from functools import partial
|
||||
from contextlib import asynccontextmanager as acm
|
||||
from contextlib import contextmanager as cm
|
||||
|
@ -163,7 +164,7 @@ async def _acquire_debug_lock(
|
|||
|
||||
task_name = trio.lowlevel.current_task().name
|
||||
|
||||
log.debug(
|
||||
log.runtime(
|
||||
f"Attempting to acquire TTY lock, remote task: {task_name}:{uid}"
|
||||
)
|
||||
|
||||
|
@ -176,14 +177,14 @@ async def _acquire_debug_lock(
|
|||
_no_remote_has_tty = trio.Event()
|
||||
|
||||
try:
|
||||
log.debug(
|
||||
log.runtime(
|
||||
f"entering lock checkpoint, remote task: {task_name}:{uid}"
|
||||
)
|
||||
we_acquired = True
|
||||
await _debug_lock.acquire()
|
||||
|
||||
_global_actor_in_debug = uid
|
||||
log.debug(f"TTY lock acquired, remote task: {task_name}:{uid}")
|
||||
log.runtime(f"TTY lock acquired, remote task: {task_name}:{uid}")
|
||||
|
||||
# NOTE: critical section: this yield is unshielded!
|
||||
|
||||
|
@ -199,7 +200,10 @@ async def _acquire_debug_lock(
|
|||
|
||||
finally:
|
||||
# if _global_actor_in_debug == uid:
|
||||
if we_acquired and _debug_lock.locked():
|
||||
if (
|
||||
we_acquired
|
||||
and _debug_lock.locked()
|
||||
):
|
||||
_debug_lock.release()
|
||||
|
||||
# IFF there are no more requesting tasks queued up fire, the
|
||||
|
@ -211,13 +215,13 @@ async def _acquire_debug_lock(
|
|||
if (
|
||||
not stats.owner
|
||||
):
|
||||
log.debug(f"No more tasks waiting on tty lock! says {uid}")
|
||||
log.runtime(f"No more tasks waiting on tty lock! says {uid}")
|
||||
_no_remote_has_tty.set()
|
||||
_no_remote_has_tty = None
|
||||
|
||||
_global_actor_in_debug = None
|
||||
|
||||
log.debug(f"TTY lock released, remote task: {task_name}:{uid}")
|
||||
log.runtime(f"TTY lock released, remote task: {task_name}:{uid}")
|
||||
|
||||
|
||||
@tractor.context
|
||||
|
@ -262,9 +266,6 @@ async def _hijack_stdin_for_child(
|
|||
async with ctx.open_stream() as stream:
|
||||
assert await stream.receive() == 'pdb_unlock'
|
||||
|
||||
# try:
|
||||
# assert await stream.receive() == 'pdb_unlock'
|
||||
|
||||
except (
|
||||
# BaseException,
|
||||
trio.MultiError,
|
||||
|
@ -272,6 +273,7 @@ async def _hijack_stdin_for_child(
|
|||
trio.Cancelled, # by local cancellation
|
||||
trio.ClosedResourceError, # by self._rx_chan
|
||||
) as err:
|
||||
|
||||
# XXX: there may be a race with the portal teardown
|
||||
# with the calling actor which we can safely ignore.
|
||||
# The alternative would be sending an ack message
|
||||
|
@ -282,10 +284,12 @@ async def _hijack_stdin_for_child(
|
|||
|
||||
if isinstance(err, trio.Cancelled):
|
||||
raise
|
||||
|
||||
finally:
|
||||
log.debug(
|
||||
log.runtime(
|
||||
"TTY lock released, remote task:"
|
||||
f"{task_name}:{subactor_uid}")
|
||||
f"{task_name}:{subactor_uid}"
|
||||
)
|
||||
|
||||
return "pdb_unlock_complete"
|
||||
|
||||
|
@ -358,8 +362,9 @@ async def _breakpoint(
|
|||
# shield: bool = False
|
||||
|
||||
) -> None:
|
||||
'''``tractor`` breakpoint entry for engaging pdb machinery
|
||||
in the root or a subactor.
|
||||
'''
|
||||
breakpoint entry for engaging pdb machinery in the root or
|
||||
a subactor.
|
||||
|
||||
'''
|
||||
__tracebackhide__ = True
|
||||
|
@ -462,7 +467,7 @@ async def _breakpoint(
|
|||
# that locked?
|
||||
owner = _debug_lock.statistics().owner
|
||||
if owner:
|
||||
raise
|
||||
raise
|
||||
|
||||
_global_actor_in_debug = None
|
||||
_local_task_in_debug = None
|
||||
|
@ -497,7 +502,13 @@ def _open_pdb() -> PdbwTeardown:
|
|||
raise
|
||||
|
||||
|
||||
def disable_sigint_in_pdb(signum, frame, *args):
|
||||
def shield_sigint(
|
||||
signum: int,
|
||||
frame: 'frame', # type: ignore # noqa
|
||||
pdb: Optional[PdbwTeardown] = None,
|
||||
*args,
|
||||
|
||||
) -> None:
|
||||
'''
|
||||
Specialized debugger compatible SIGINT handler.
|
||||
|
||||
|
@ -506,39 +517,86 @@ def disable_sigint_in_pdb(signum, frame, *args):
|
|||
is always cancelled on ctrl-c.
|
||||
|
||||
'''
|
||||
actor = tractor.current_actor()
|
||||
if not actor._cancel_called:
|
||||
log.pdb(
|
||||
f"{actor.uid} is in debug and has not been cancelled, "
|
||||
"ignoring SIGINT\n"
|
||||
)
|
||||
else:
|
||||
log.pdb(
|
||||
f"{actor.uid} is already cancelling.."
|
||||
)
|
||||
|
||||
global _global_actor_in_debug
|
||||
global _local_task_in_debug, _global_actor_in_debug
|
||||
in_debug = _global_actor_in_debug
|
||||
|
||||
actor = tractor.current_actor()
|
||||
|
||||
# root actor branch that reports whether or not a child
|
||||
# has locked debugger.
|
||||
if (
|
||||
is_root_process()
|
||||
and in_debug
|
||||
):
|
||||
log.pdb(f'Root SIGINT disabled while {_global_actor_in_debug} is debugging')
|
||||
|
||||
if in_debug[0] != 'root':
|
||||
pass
|
||||
name = in_debug[0]
|
||||
if name != 'root':
|
||||
log.pdb(
|
||||
f"Ignoring SIGINT while child in debug mode: `{in_debug}`"
|
||||
)
|
||||
else:
|
||||
log.pdb(
|
||||
"Ignoring SIGINT while in debug mode"
|
||||
)
|
||||
|
||||
# child actor that has locked the debugger
|
||||
elif (
|
||||
not is_root_process()
|
||||
):
|
||||
task = _local_task_in_debug
|
||||
if task:
|
||||
log.pdb(
|
||||
f"Ignoring SIGINT while task in debug mode: `{task}`"
|
||||
)
|
||||
|
||||
# TODO: how to handle the case of an intermediary-child actor
|
||||
# that **is not** marked in debug mode?
|
||||
# elif debug_mode():
|
||||
|
||||
else:
|
||||
log.pdb(
|
||||
"Ignoring SIGINT since debug mode is enabled"
|
||||
)
|
||||
|
||||
# noone has the debugger so raise KBI
|
||||
else:
|
||||
# If we haven't tried to cancel the runtime then do that instead
|
||||
# of raising a KBI (which may non-gracefully destroy
|
||||
# a ``trio.run()``).
|
||||
if not actor._cancel_called:
|
||||
actor.cancel_soon()
|
||||
|
||||
# If the runtime is already cancelled it likely means the user
|
||||
# hit ctrl-c again because teardown didn't full take place in
|
||||
# which case we do the "hard" raising of a local KBI.
|
||||
else:
|
||||
# actor.cancel_soon()
|
||||
raise KeyboardInterrupt
|
||||
|
||||
# maybe redraw/print last REPL output to console
|
||||
if pdb:
|
||||
|
||||
# TODO: make this work like sticky mode where if there is output
|
||||
# detected as written to the tty we redraw this part underneath
|
||||
# and erase the past draw of this same bit above?
|
||||
# pdb.sticky = True
|
||||
# pdb._print_if_sticky()
|
||||
|
||||
# also see these links for an approach from ``ptk``:
|
||||
# https://github.com/goodboy/tractor/issues/130#issuecomment-663752040
|
||||
# https://github.com/prompt-toolkit/python-prompt-toolkit/blob/c2c6af8a0308f9e5d7c0e28cb8a02963fe0ce07a/prompt_toolkit/patch_stdout.py
|
||||
|
||||
pdb.do_longlist(None)
|
||||
print(pdb.prompt, end='', flush=True)
|
||||
|
||||
|
||||
@cm
|
||||
def disable_sigint():
|
||||
def disable_sigint(
|
||||
pdb: Optional[PdbwTeardown] = None
|
||||
|
||||
) -> None:
|
||||
__tracebackhide__ = True
|
||||
|
||||
# ensure the ``contextlib.contextmanager`` frame inside the wrapping
|
||||
# ``.__exit__()`` method isn't shown either.
|
||||
import sys
|
||||
frame = sys._getframe()
|
||||
frame.f_back.f_globals['__tracebackhide__'] = True
|
||||
# NOTE: this seems like a form of cpython bug wherein
|
||||
|
@ -548,10 +606,9 @@ def disable_sigint():
|
|||
# for manual debugging if necessary
|
||||
# pdb.set_trace()
|
||||
|
||||
import signal
|
||||
orig_handler = signal.signal(
|
||||
signal.SIGINT,
|
||||
disable_sigint_in_pdb
|
||||
partial(shield_sigint, pdb=pdb),
|
||||
)
|
||||
try:
|
||||
yield
|
||||
|
@ -564,10 +621,9 @@ def disable_sigint():
|
|||
|
||||
def _set_trace(actor=None):
|
||||
__tracebackhide__ = True
|
||||
# pdb = _open_pdb()
|
||||
with (
|
||||
_open_pdb() as pdb,
|
||||
disable_sigint(),
|
||||
disable_sigint(pdb=pdb),
|
||||
):
|
||||
if actor is not None:
|
||||
log.pdb(f"\nAttaching pdb to actor: {actor.uid}\n")
|
||||
|
@ -601,10 +657,9 @@ breakpoint = partial(
|
|||
|
||||
def _post_mortem(actor):
|
||||
__tracebackhide__ = True
|
||||
# pdb = _mk_pdb()
|
||||
with (
|
||||
_open_pdb() as pdb,
|
||||
disable_sigint(),
|
||||
disable_sigint(pdb=pdb),
|
||||
):
|
||||
log.pdb(f"\nAttaching to pdb in crashed actor: {actor.uid}\n")
|
||||
|
||||
|
|
Loading…
Reference in New Issue