Refine the handler for child vs. root cases
This gets very close to avoiding any possible hangs to do with tty locking and SIGINT handling minus a special case that will be detailed below. Summary of implementation changes: - convert `_mk_pdb()` -> `with _open_pdb() as pdb:` which implicitly handles the `bdb.BdbQuit` case such that debugger teardown hooks are always called. - rename the handler to `shield_sigint()` and handle a variety of new cases: * the root is in debug but hasn't been cancelled -> call `Actor.cancel_soon()` * the root is in debug but *has* been called (`Actor.cancel_soon()` already called) -> raise KBI * a child is in debug *and* has a task locking the debugger -> ignore SIGINT in child *and* the root actor. - if the debugger instance is provided to the handler at acquire time, on SIGINT handling completion re-print the last pdb++ REPL output so that the user realizes they are still actively in debug. - ignore the unlock case where a race condition of "no task" holding the lock causes the `RuntimeError` normally associated with the "wrong task" doing so (not sure if this is a `trio` bug?). - change debug logs to runtime level. Unhandled case(s): - a child is maybe in debug mode but does not itself have any task using the debugger. * ToDo: we need a way to decide what to do with "intermediate" child actors who themselves either are not in `debug_mode=True` but have children who *are* such that a SIGINT won't cause cancellation of that child-as-parent-of-another-child **iff** any of their children are in in debug mode.310_plus
parent
542fe0372b
commit
ef7921ce11
|
@ -20,6 +20,7 @@ Multi-core debugging for da peeps!
|
||||||
"""
|
"""
|
||||||
import bdb
|
import bdb
|
||||||
import sys
|
import sys
|
||||||
|
import signal
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from contextlib import asynccontextmanager as acm
|
from contextlib import asynccontextmanager as acm
|
||||||
from contextlib import contextmanager as cm
|
from contextlib import contextmanager as cm
|
||||||
|
@ -163,7 +164,7 @@ async def _acquire_debug_lock(
|
||||||
|
|
||||||
task_name = trio.lowlevel.current_task().name
|
task_name = trio.lowlevel.current_task().name
|
||||||
|
|
||||||
log.debug(
|
log.runtime(
|
||||||
f"Attempting to acquire TTY lock, remote task: {task_name}:{uid}"
|
f"Attempting to acquire TTY lock, remote task: {task_name}:{uid}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -176,14 +177,14 @@ async def _acquire_debug_lock(
|
||||||
_no_remote_has_tty = trio.Event()
|
_no_remote_has_tty = trio.Event()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
log.debug(
|
log.runtime(
|
||||||
f"entering lock checkpoint, remote task: {task_name}:{uid}"
|
f"entering lock checkpoint, remote task: {task_name}:{uid}"
|
||||||
)
|
)
|
||||||
we_acquired = True
|
we_acquired = True
|
||||||
await _debug_lock.acquire()
|
await _debug_lock.acquire()
|
||||||
|
|
||||||
_global_actor_in_debug = uid
|
_global_actor_in_debug = uid
|
||||||
log.debug(f"TTY lock acquired, remote task: {task_name}:{uid}")
|
log.runtime(f"TTY lock acquired, remote task: {task_name}:{uid}")
|
||||||
|
|
||||||
# NOTE: critical section: this yield is unshielded!
|
# NOTE: critical section: this yield is unshielded!
|
||||||
|
|
||||||
|
@ -199,7 +200,10 @@ async def _acquire_debug_lock(
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# if _global_actor_in_debug == uid:
|
# if _global_actor_in_debug == uid:
|
||||||
if we_acquired and _debug_lock.locked():
|
if (
|
||||||
|
we_acquired
|
||||||
|
and _debug_lock.locked()
|
||||||
|
):
|
||||||
_debug_lock.release()
|
_debug_lock.release()
|
||||||
|
|
||||||
# IFF there are no more requesting tasks queued up fire, the
|
# IFF there are no more requesting tasks queued up fire, the
|
||||||
|
@ -211,13 +215,13 @@ async def _acquire_debug_lock(
|
||||||
if (
|
if (
|
||||||
not stats.owner
|
not stats.owner
|
||||||
):
|
):
|
||||||
log.debug(f"No more tasks waiting on tty lock! says {uid}")
|
log.runtime(f"No more tasks waiting on tty lock! says {uid}")
|
||||||
_no_remote_has_tty.set()
|
_no_remote_has_tty.set()
|
||||||
_no_remote_has_tty = None
|
_no_remote_has_tty = None
|
||||||
|
|
||||||
_global_actor_in_debug = None
|
_global_actor_in_debug = None
|
||||||
|
|
||||||
log.debug(f"TTY lock released, remote task: {task_name}:{uid}")
|
log.runtime(f"TTY lock released, remote task: {task_name}:{uid}")
|
||||||
|
|
||||||
|
|
||||||
@tractor.context
|
@tractor.context
|
||||||
|
@ -262,9 +266,6 @@ async def _hijack_stdin_for_child(
|
||||||
async with ctx.open_stream() as stream:
|
async with ctx.open_stream() as stream:
|
||||||
assert await stream.receive() == 'pdb_unlock'
|
assert await stream.receive() == 'pdb_unlock'
|
||||||
|
|
||||||
# try:
|
|
||||||
# assert await stream.receive() == 'pdb_unlock'
|
|
||||||
|
|
||||||
except (
|
except (
|
||||||
# BaseException,
|
# BaseException,
|
||||||
trio.MultiError,
|
trio.MultiError,
|
||||||
|
@ -272,6 +273,7 @@ async def _hijack_stdin_for_child(
|
||||||
trio.Cancelled, # by local cancellation
|
trio.Cancelled, # by local cancellation
|
||||||
trio.ClosedResourceError, # by self._rx_chan
|
trio.ClosedResourceError, # by self._rx_chan
|
||||||
) as err:
|
) as err:
|
||||||
|
|
||||||
# XXX: there may be a race with the portal teardown
|
# XXX: there may be a race with the portal teardown
|
||||||
# with the calling actor which we can safely ignore.
|
# with the calling actor which we can safely ignore.
|
||||||
# The alternative would be sending an ack message
|
# The alternative would be sending an ack message
|
||||||
|
@ -282,10 +284,12 @@ async def _hijack_stdin_for_child(
|
||||||
|
|
||||||
if isinstance(err, trio.Cancelled):
|
if isinstance(err, trio.Cancelled):
|
||||||
raise
|
raise
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
log.debug(
|
log.runtime(
|
||||||
"TTY lock released, remote task:"
|
"TTY lock released, remote task:"
|
||||||
f"{task_name}:{subactor_uid}")
|
f"{task_name}:{subactor_uid}"
|
||||||
|
)
|
||||||
|
|
||||||
return "pdb_unlock_complete"
|
return "pdb_unlock_complete"
|
||||||
|
|
||||||
|
@ -358,8 +362,9 @@ async def _breakpoint(
|
||||||
# shield: bool = False
|
# shield: bool = False
|
||||||
|
|
||||||
) -> None:
|
) -> None:
|
||||||
'''``tractor`` breakpoint entry for engaging pdb machinery
|
'''
|
||||||
in the root or a subactor.
|
breakpoint entry for engaging pdb machinery in the root or
|
||||||
|
a subactor.
|
||||||
|
|
||||||
'''
|
'''
|
||||||
__tracebackhide__ = True
|
__tracebackhide__ = True
|
||||||
|
@ -497,7 +502,13 @@ def _open_pdb() -> PdbwTeardown:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
def disable_sigint_in_pdb(signum, frame, *args):
|
def shield_sigint(
|
||||||
|
signum: int,
|
||||||
|
frame: 'frame', # type: ignore # noqa
|
||||||
|
pdb: Optional[PdbwTeardown] = None,
|
||||||
|
*args,
|
||||||
|
|
||||||
|
) -> None:
|
||||||
'''
|
'''
|
||||||
Specialized debugger compatible SIGINT handler.
|
Specialized debugger compatible SIGINT handler.
|
||||||
|
|
||||||
|
@ -506,39 +517,86 @@ def disable_sigint_in_pdb(signum, frame, *args):
|
||||||
is always cancelled on ctrl-c.
|
is always cancelled on ctrl-c.
|
||||||
|
|
||||||
'''
|
'''
|
||||||
actor = tractor.current_actor()
|
global _local_task_in_debug, _global_actor_in_debug
|
||||||
if not actor._cancel_called:
|
|
||||||
log.pdb(
|
|
||||||
f"{actor.uid} is in debug and has not been cancelled, "
|
|
||||||
"ignoring SIGINT\n"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
log.pdb(
|
|
||||||
f"{actor.uid} is already cancelling.."
|
|
||||||
)
|
|
||||||
|
|
||||||
global _global_actor_in_debug
|
|
||||||
in_debug = _global_actor_in_debug
|
in_debug = _global_actor_in_debug
|
||||||
|
|
||||||
|
actor = tractor.current_actor()
|
||||||
|
|
||||||
|
# root actor branch that reports whether or not a child
|
||||||
|
# has locked debugger.
|
||||||
if (
|
if (
|
||||||
is_root_process()
|
is_root_process()
|
||||||
and in_debug
|
and in_debug
|
||||||
):
|
):
|
||||||
log.pdb(f'Root SIGINT disabled while {_global_actor_in_debug} is debugging')
|
name = in_debug[0]
|
||||||
|
if name != 'root':
|
||||||
if in_debug[0] != 'root':
|
log.pdb(
|
||||||
pass
|
f"Ignoring SIGINT while child in debug mode: `{in_debug}`"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
log.pdb(
|
||||||
|
"Ignoring SIGINT while in debug mode"
|
||||||
|
)
|
||||||
|
|
||||||
|
# child actor that has locked the debugger
|
||||||
|
elif (
|
||||||
|
not is_root_process()
|
||||||
|
):
|
||||||
|
task = _local_task_in_debug
|
||||||
|
if task:
|
||||||
|
log.pdb(
|
||||||
|
f"Ignoring SIGINT while task in debug mode: `{task}`"
|
||||||
|
)
|
||||||
|
|
||||||
|
# TODO: how to handle the case of an intermediary-child actor
|
||||||
|
# that **is not** marked in debug mode?
|
||||||
|
# elif debug_mode():
|
||||||
|
|
||||||
|
else:
|
||||||
|
log.pdb(
|
||||||
|
"Ignoring SIGINT since debug mode is enabled"
|
||||||
|
)
|
||||||
|
|
||||||
|
# noone has the debugger so raise KBI
|
||||||
|
else:
|
||||||
|
# If we haven't tried to cancel the runtime then do that instead
|
||||||
|
# of raising a KBI (which may non-gracefully destroy
|
||||||
|
# a ``trio.run()``).
|
||||||
|
if not actor._cancel_called:
|
||||||
|
actor.cancel_soon()
|
||||||
|
|
||||||
|
# If the runtime is already cancelled it likely means the user
|
||||||
|
# hit ctrl-c again because teardown didn't full take place in
|
||||||
|
# which case we do the "hard" raising of a local KBI.
|
||||||
else:
|
else:
|
||||||
# actor.cancel_soon()
|
|
||||||
raise KeyboardInterrupt
|
raise KeyboardInterrupt
|
||||||
|
|
||||||
|
# maybe redraw/print last REPL output to console
|
||||||
|
if pdb:
|
||||||
|
|
||||||
|
# TODO: make this work like sticky mode where if there is output
|
||||||
|
# detected as written to the tty we redraw this part underneath
|
||||||
|
# and erase the past draw of this same bit above?
|
||||||
|
# pdb.sticky = True
|
||||||
|
# pdb._print_if_sticky()
|
||||||
|
|
||||||
|
# also see these links for an approach from ``ptk``:
|
||||||
|
# https://github.com/goodboy/tractor/issues/130#issuecomment-663752040
|
||||||
|
# https://github.com/prompt-toolkit/python-prompt-toolkit/blob/c2c6af8a0308f9e5d7c0e28cb8a02963fe0ce07a/prompt_toolkit/patch_stdout.py
|
||||||
|
|
||||||
|
pdb.do_longlist(None)
|
||||||
|
print(pdb.prompt, end='', flush=True)
|
||||||
|
|
||||||
|
|
||||||
@cm
|
@cm
|
||||||
def disable_sigint():
|
def disable_sigint(
|
||||||
|
pdb: Optional[PdbwTeardown] = None
|
||||||
|
|
||||||
|
) -> None:
|
||||||
__tracebackhide__ = True
|
__tracebackhide__ = True
|
||||||
|
|
||||||
# ensure the ``contextlib.contextmanager`` frame inside the wrapping
|
# ensure the ``contextlib.contextmanager`` frame inside the wrapping
|
||||||
# ``.__exit__()`` method isn't shown either.
|
# ``.__exit__()`` method isn't shown either.
|
||||||
import sys
|
|
||||||
frame = sys._getframe()
|
frame = sys._getframe()
|
||||||
frame.f_back.f_globals['__tracebackhide__'] = True
|
frame.f_back.f_globals['__tracebackhide__'] = True
|
||||||
# NOTE: this seems like a form of cpython bug wherein
|
# NOTE: this seems like a form of cpython bug wherein
|
||||||
|
@ -548,10 +606,9 @@ def disable_sigint():
|
||||||
# for manual debugging if necessary
|
# for manual debugging if necessary
|
||||||
# pdb.set_trace()
|
# pdb.set_trace()
|
||||||
|
|
||||||
import signal
|
|
||||||
orig_handler = signal.signal(
|
orig_handler = signal.signal(
|
||||||
signal.SIGINT,
|
signal.SIGINT,
|
||||||
disable_sigint_in_pdb
|
partial(shield_sigint, pdb=pdb),
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
yield
|
yield
|
||||||
|
@ -564,10 +621,9 @@ def disable_sigint():
|
||||||
|
|
||||||
def _set_trace(actor=None):
|
def _set_trace(actor=None):
|
||||||
__tracebackhide__ = True
|
__tracebackhide__ = True
|
||||||
# pdb = _open_pdb()
|
|
||||||
with (
|
with (
|
||||||
_open_pdb() as pdb,
|
_open_pdb() as pdb,
|
||||||
disable_sigint(),
|
disable_sigint(pdb=pdb),
|
||||||
):
|
):
|
||||||
if actor is not None:
|
if actor is not None:
|
||||||
log.pdb(f"\nAttaching pdb to actor: {actor.uid}\n")
|
log.pdb(f"\nAttaching pdb to actor: {actor.uid}\n")
|
||||||
|
@ -601,10 +657,9 @@ breakpoint = partial(
|
||||||
|
|
||||||
def _post_mortem(actor):
|
def _post_mortem(actor):
|
||||||
__tracebackhide__ = True
|
__tracebackhide__ = True
|
||||||
# pdb = _mk_pdb()
|
|
||||||
with (
|
with (
|
||||||
_open_pdb() as pdb,
|
_open_pdb() as pdb,
|
||||||
disable_sigint(),
|
disable_sigint(pdb=pdb),
|
||||||
):
|
):
|
||||||
log.pdb(f"\nAttaching to pdb in crashed actor: {actor.uid}\n")
|
log.pdb(f"\nAttaching to pdb in crashed actor: {actor.uid}\n")
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue