forked from goodboy/tractor
Add shielding support to `.pause()`
Implement it like you'd expect using simply a wrapping `trio.CancelScope` which is itself shielded by the input `shield: bool` B) There's seemingly still some issues with the frame selection when the REPL engages and not sure how to resolve it yet but at least this does indeed work for practical purposes. Still needs a test obviously!multihomed
parent
78c0d2b234
commit
18a1634025
|
@ -382,7 +382,7 @@ async def wait_for_parent_stdin_hijack(
|
||||||
|
|
||||||
This function is used by any sub-actor to acquire mutex access to
|
This function is used by any sub-actor to acquire mutex access to
|
||||||
the ``pdb`` REPL and thus the root's TTY for interactive debugging
|
the ``pdb`` REPL and thus the root's TTY for interactive debugging
|
||||||
(see below inside ``_pause()``). It can be used to ensure that
|
(see below inside ``pause()``). It can be used to ensure that
|
||||||
an intermediate nursery-owning actor does not clobber its children
|
an intermediate nursery-owning actor does not clobber its children
|
||||||
if they are in debug (see below inside
|
if they are in debug (see below inside
|
||||||
``maybe_wait_for_debugger()``).
|
``maybe_wait_for_debugger()``).
|
||||||
|
@ -448,171 +448,6 @@ def mk_mpdb() -> tuple[MultiActorPdb, Callable]:
|
||||||
return pdb, Lock.unshield_sigint
|
return pdb, Lock.unshield_sigint
|
||||||
|
|
||||||
|
|
||||||
async def _pause(
|
|
||||||
|
|
||||||
debug_func: Callable | None = None,
|
|
||||||
release_lock_signal: trio.Event | None = None,
|
|
||||||
|
|
||||||
# TODO:
|
|
||||||
# shield: bool = False
|
|
||||||
task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED
|
|
||||||
|
|
||||||
) -> None:
|
|
||||||
'''
|
|
||||||
A pause point (more commonly known as a "breakpoint") interrupt
|
|
||||||
instruction for engaging a blocking debugger instance to
|
|
||||||
conduct manual console-based-REPL-interaction from within
|
|
||||||
`tractor`'s async runtime, normally from some single-threaded
|
|
||||||
and currently executing actor-hosted-`trio`-task in some
|
|
||||||
(remote) process.
|
|
||||||
|
|
||||||
NOTE: we use the semantics "pause" since it better encompasses
|
|
||||||
the entirety of the necessary global-runtime-state-mutation any
|
|
||||||
actor-task must access and lock in order to get full isolated
|
|
||||||
control over the process tree's root TTY:
|
|
||||||
https://en.wikipedia.org/wiki/Breakpoint
|
|
||||||
|
|
||||||
'''
|
|
||||||
__tracebackhide__ = True
|
|
||||||
actor = tractor.current_actor()
|
|
||||||
pdb, undo_sigint = mk_mpdb()
|
|
||||||
task_name = trio.lowlevel.current_task().name
|
|
||||||
|
|
||||||
# TODO: is it possible to debug a trio.Cancelled except block?
|
|
||||||
# right now it seems like we can kinda do with by shielding
|
|
||||||
# around ``tractor.breakpoint()`` but not if we move the shielded
|
|
||||||
# scope here???
|
|
||||||
# with trio.CancelScope(shield=shield):
|
|
||||||
# await trio.lowlevel.checkpoint()
|
|
||||||
|
|
||||||
if (
|
|
||||||
not Lock.local_pdb_complete
|
|
||||||
or Lock.local_pdb_complete.is_set()
|
|
||||||
):
|
|
||||||
Lock.local_pdb_complete = trio.Event()
|
|
||||||
|
|
||||||
# TODO: need a more robust check for the "root" actor
|
|
||||||
if (
|
|
||||||
not is_root_process()
|
|
||||||
and actor._parent_chan # a connected child
|
|
||||||
):
|
|
||||||
|
|
||||||
if Lock.local_task_in_debug:
|
|
||||||
|
|
||||||
# Recurrence entry case: this task already has the lock and
|
|
||||||
# is likely recurrently entering a breakpoint
|
|
||||||
if Lock.local_task_in_debug == task_name:
|
|
||||||
# noop on recurrent entry case but we want to trigger
|
|
||||||
# a checkpoint to allow other actors error-propagate and
|
|
||||||
# potetially avoid infinite re-entries in some subactor.
|
|
||||||
await trio.lowlevel.checkpoint()
|
|
||||||
return
|
|
||||||
|
|
||||||
# if **this** actor is already in debug mode block here
|
|
||||||
# waiting for the control to be released - this allows
|
|
||||||
# support for recursive entries to `tractor.breakpoint()`
|
|
||||||
log.warning(f"{actor.uid} already has a debug lock, waiting...")
|
|
||||||
|
|
||||||
await Lock.local_pdb_complete.wait()
|
|
||||||
await trio.sleep(0.1)
|
|
||||||
|
|
||||||
# mark local actor as "in debug mode" to avoid recurrent
|
|
||||||
# entries/requests to the root process
|
|
||||||
Lock.local_task_in_debug = task_name
|
|
||||||
|
|
||||||
# this **must** be awaited by the caller and is done using the
|
|
||||||
# root nursery so that the debugger can continue to run without
|
|
||||||
# being restricted by the scope of a new task nursery.
|
|
||||||
|
|
||||||
# TODO: if we want to debug a trio.Cancelled triggered exception
|
|
||||||
# we have to figure out how to avoid having the service nursery
|
|
||||||
# cancel on this task start? I *think* this works below:
|
|
||||||
# ```python
|
|
||||||
# actor._service_n.cancel_scope.shield = shield
|
|
||||||
# ```
|
|
||||||
# but not entirely sure if that's a sane way to implement it?
|
|
||||||
try:
|
|
||||||
with trio.CancelScope(shield=True):
|
|
||||||
await actor._service_n.start(
|
|
||||||
wait_for_parent_stdin_hijack,
|
|
||||||
actor.uid,
|
|
||||||
)
|
|
||||||
Lock.repl = pdb
|
|
||||||
except RuntimeError:
|
|
||||||
Lock.release()
|
|
||||||
|
|
||||||
if actor._cancel_called:
|
|
||||||
# service nursery won't be usable and we
|
|
||||||
# don't want to lock up the root either way since
|
|
||||||
# we're in (the midst of) cancellation.
|
|
||||||
return
|
|
||||||
|
|
||||||
raise
|
|
||||||
|
|
||||||
elif is_root_process():
|
|
||||||
|
|
||||||
# we also wait in the root-parent for any child that
|
|
||||||
# may have the tty locked prior
|
|
||||||
# TODO: wait, what about multiple root tasks acquiring it though?
|
|
||||||
if Lock.global_actor_in_debug == actor.uid:
|
|
||||||
# re-entrant root process already has it: noop.
|
|
||||||
return
|
|
||||||
|
|
||||||
# XXX: since we need to enter pdb synchronously below,
|
|
||||||
# we have to release the lock manually from pdb completion
|
|
||||||
# callbacks. Can't think of a nicer way then this atm.
|
|
||||||
if Lock._debug_lock.locked():
|
|
||||||
log.warning(
|
|
||||||
'Root actor attempting to shield-acquire active tty lock'
|
|
||||||
f' owned by {Lock.global_actor_in_debug}')
|
|
||||||
|
|
||||||
# must shield here to avoid hitting a ``Cancelled`` and
|
|
||||||
# a child getting stuck bc we clobbered the tty
|
|
||||||
with trio.CancelScope(shield=True):
|
|
||||||
await Lock._debug_lock.acquire()
|
|
||||||
else:
|
|
||||||
# may be cancelled
|
|
||||||
await Lock._debug_lock.acquire()
|
|
||||||
|
|
||||||
Lock.global_actor_in_debug = actor.uid
|
|
||||||
Lock.local_task_in_debug = task_name
|
|
||||||
Lock.repl = pdb
|
|
||||||
|
|
||||||
try:
|
|
||||||
# breakpoint()
|
|
||||||
if debug_func is None:
|
|
||||||
# assert release_lock_signal, (
|
|
||||||
# 'Must pass `release_lock_signal: trio.Event` if no '
|
|
||||||
# 'trace func provided!'
|
|
||||||
# )
|
|
||||||
print(f"{actor.uid} ENTERING WAIT")
|
|
||||||
task_status.started()
|
|
||||||
|
|
||||||
# with trio.CancelScope(shield=True):
|
|
||||||
# await release_lock_signal.wait()
|
|
||||||
|
|
||||||
else:
|
|
||||||
# block here one (at the appropriate frame *up*) where
|
|
||||||
# ``breakpoint()`` was awaited and begin handling stdio.
|
|
||||||
log.debug("Entering the synchronous world of pdb")
|
|
||||||
debug_func(actor, pdb)
|
|
||||||
|
|
||||||
except bdb.BdbQuit:
|
|
||||||
Lock.release()
|
|
||||||
raise
|
|
||||||
|
|
||||||
# XXX: apparently we can't do this without showing this frame
|
|
||||||
# in the backtrace on first entry to the REPL? Seems like an odd
|
|
||||||
# behaviour that should have been fixed by now. This is also why
|
|
||||||
# we scrapped all the @cm approaches that were tried previously.
|
|
||||||
# finally:
|
|
||||||
# __tracebackhide__ = True
|
|
||||||
# # frame = sys._getframe()
|
|
||||||
# # last_f = frame.f_back
|
|
||||||
# # last_f.f_globals['__tracebackhide__'] = True
|
|
||||||
# # signal.signal = pdbp.hideframe(signal.signal)
|
|
||||||
|
|
||||||
|
|
||||||
def shield_sigint_handler(
|
def shield_sigint_handler(
|
||||||
signum: int,
|
signum: int,
|
||||||
frame: 'frame', # type: ignore # noqa
|
frame: 'frame', # type: ignore # noqa
|
||||||
|
@ -774,6 +609,7 @@ def shield_sigint_handler(
|
||||||
def _set_trace(
|
def _set_trace(
|
||||||
actor: tractor.Actor | None = None,
|
actor: tractor.Actor | None = None,
|
||||||
pdb: MultiActorPdb | None = None,
|
pdb: MultiActorPdb | None = None,
|
||||||
|
shield: bool = False,
|
||||||
):
|
):
|
||||||
__tracebackhide__ = True
|
__tracebackhide__ = True
|
||||||
actor: tractor.Actor = actor or tractor.current_actor()
|
actor: tractor.Actor = actor or tractor.current_actor()
|
||||||
|
@ -785,14 +621,20 @@ def _set_trace(
|
||||||
|
|
||||||
if (
|
if (
|
||||||
frame
|
frame
|
||||||
and pdb
|
and (
|
||||||
|
pdb
|
||||||
and actor is not None
|
and actor is not None
|
||||||
|
) or shield
|
||||||
):
|
):
|
||||||
|
# pdbp.set_trace()
|
||||||
log.pdb(f"\nAttaching pdb to actor: {actor.uid}\n")
|
log.pdb(f"\nAttaching pdb to actor: {actor.uid}\n")
|
||||||
# no f!#$&* idea, but when we're in async land
|
# no f!#$&* idea, but when we're in async land
|
||||||
# we need 2x frames up?
|
# we need 2x frames up?
|
||||||
frame = frame.f_back
|
frame = frame.f_back
|
||||||
|
|
||||||
|
# if shield:
|
||||||
|
# frame = frame.f_back
|
||||||
|
|
||||||
else:
|
else:
|
||||||
pdb, undo_sigint = mk_mpdb()
|
pdb, undo_sigint = mk_mpdb()
|
||||||
|
|
||||||
|
@ -804,8 +646,181 @@ def _set_trace(
|
||||||
# undo_
|
# undo_
|
||||||
|
|
||||||
|
|
||||||
# TODO: allow pausing from sync code, normally by remapping
|
|
||||||
# python's builtin breakpoint() hook to this runtime aware version.
|
async def pause(
|
||||||
|
|
||||||
|
debug_func: Callable = _set_trace,
|
||||||
|
release_lock_signal: trio.Event | None = None,
|
||||||
|
|
||||||
|
# allow caller to pause despite task cancellation,
|
||||||
|
# exactly the same as wrapping with:
|
||||||
|
# with CancelScope(shield=True):
|
||||||
|
# await pause()
|
||||||
|
shield: bool = False,
|
||||||
|
|
||||||
|
# TODO:
|
||||||
|
# shield: bool = False
|
||||||
|
task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED
|
||||||
|
|
||||||
|
) -> None:
|
||||||
|
'''
|
||||||
|
A pause point (more commonly known as a "breakpoint") interrupt
|
||||||
|
instruction for engaging a blocking debugger instance to
|
||||||
|
conduct manual console-based-REPL-interaction from within
|
||||||
|
`tractor`'s async runtime, normally from some single-threaded
|
||||||
|
and currently executing actor-hosted-`trio`-task in some
|
||||||
|
(remote) process.
|
||||||
|
|
||||||
|
NOTE: we use the semantics "pause" since it better encompasses
|
||||||
|
the entirety of the necessary global-runtime-state-mutation any
|
||||||
|
actor-task must access and lock in order to get full isolated
|
||||||
|
control over the process tree's root TTY:
|
||||||
|
https://en.wikipedia.org/wiki/Breakpoint
|
||||||
|
|
||||||
|
'''
|
||||||
|
__tracebackhide__ = True
|
||||||
|
actor = tractor.current_actor()
|
||||||
|
pdb, undo_sigint = mk_mpdb()
|
||||||
|
task_name = trio.lowlevel.current_task().name
|
||||||
|
|
||||||
|
if (
|
||||||
|
not Lock.local_pdb_complete
|
||||||
|
or Lock.local_pdb_complete.is_set()
|
||||||
|
):
|
||||||
|
Lock.local_pdb_complete = trio.Event()
|
||||||
|
|
||||||
|
if shield:
|
||||||
|
debug_func = partial(
|
||||||
|
debug_func,
|
||||||
|
shield=shield,
|
||||||
|
)
|
||||||
|
|
||||||
|
with trio.CancelScope(shield=shield):
|
||||||
|
|
||||||
|
# TODO: need a more robust check for the "root" actor
|
||||||
|
if (
|
||||||
|
not is_root_process()
|
||||||
|
and actor._parent_chan # a connected child
|
||||||
|
):
|
||||||
|
|
||||||
|
if Lock.local_task_in_debug:
|
||||||
|
|
||||||
|
# Recurrence entry case: this task already has the lock and
|
||||||
|
# is likely recurrently entering a breakpoint
|
||||||
|
if Lock.local_task_in_debug == task_name:
|
||||||
|
# noop on recurrent entry case but we want to trigger
|
||||||
|
# a checkpoint to allow other actors error-propagate and
|
||||||
|
# potetially avoid infinite re-entries in some subactor.
|
||||||
|
await trio.lowlevel.checkpoint()
|
||||||
|
return
|
||||||
|
|
||||||
|
# if **this** actor is already in debug mode block here
|
||||||
|
# waiting for the control to be released - this allows
|
||||||
|
# support for recursive entries to `tractor.breakpoint()`
|
||||||
|
log.warning(f"{actor.uid} already has a debug lock, waiting...")
|
||||||
|
|
||||||
|
await Lock.local_pdb_complete.wait()
|
||||||
|
await trio.sleep(0.1)
|
||||||
|
|
||||||
|
# mark local actor as "in debug mode" to avoid recurrent
|
||||||
|
# entries/requests to the root process
|
||||||
|
Lock.local_task_in_debug = task_name
|
||||||
|
|
||||||
|
# this **must** be awaited by the caller and is done using the
|
||||||
|
# root nursery so that the debugger can continue to run without
|
||||||
|
# being restricted by the scope of a new task nursery.
|
||||||
|
|
||||||
|
# TODO: if we want to debug a trio.Cancelled triggered exception
|
||||||
|
# we have to figure out how to avoid having the service nursery
|
||||||
|
# cancel on this task start? I *think* this works below:
|
||||||
|
# ```python
|
||||||
|
# actor._service_n.cancel_scope.shield = shield
|
||||||
|
# ```
|
||||||
|
# but not entirely sure if that's a sane way to implement it?
|
||||||
|
try:
|
||||||
|
with trio.CancelScope(shield=True):
|
||||||
|
await actor._service_n.start(
|
||||||
|
wait_for_parent_stdin_hijack,
|
||||||
|
actor.uid,
|
||||||
|
)
|
||||||
|
Lock.repl = pdb
|
||||||
|
except RuntimeError:
|
||||||
|
Lock.release()
|
||||||
|
|
||||||
|
if actor._cancel_called:
|
||||||
|
# service nursery won't be usable and we
|
||||||
|
# don't want to lock up the root either way since
|
||||||
|
# we're in (the midst of) cancellation.
|
||||||
|
return
|
||||||
|
|
||||||
|
raise
|
||||||
|
|
||||||
|
elif is_root_process():
|
||||||
|
|
||||||
|
# we also wait in the root-parent for any child that
|
||||||
|
# may have the tty locked prior
|
||||||
|
# TODO: wait, what about multiple root tasks acquiring it though?
|
||||||
|
if Lock.global_actor_in_debug == actor.uid:
|
||||||
|
# re-entrant root process already has it: noop.
|
||||||
|
return
|
||||||
|
|
||||||
|
# XXX: since we need to enter pdb synchronously below,
|
||||||
|
# we have to release the lock manually from pdb completion
|
||||||
|
# callbacks. Can't think of a nicer way then this atm.
|
||||||
|
if Lock._debug_lock.locked():
|
||||||
|
log.warning(
|
||||||
|
'Root actor attempting to shield-acquire active tty lock'
|
||||||
|
f' owned by {Lock.global_actor_in_debug}')
|
||||||
|
|
||||||
|
# must shield here to avoid hitting a ``Cancelled`` and
|
||||||
|
# a child getting stuck bc we clobbered the tty
|
||||||
|
with trio.CancelScope(shield=True):
|
||||||
|
await Lock._debug_lock.acquire()
|
||||||
|
else:
|
||||||
|
# may be cancelled
|
||||||
|
await Lock._debug_lock.acquire()
|
||||||
|
|
||||||
|
Lock.global_actor_in_debug = actor.uid
|
||||||
|
Lock.local_task_in_debug = task_name
|
||||||
|
Lock.repl = pdb
|
||||||
|
|
||||||
|
try:
|
||||||
|
if debug_func is None:
|
||||||
|
# assert release_lock_signal, (
|
||||||
|
# 'Must pass `release_lock_signal: trio.Event` if no '
|
||||||
|
# 'trace func provided!'
|
||||||
|
# )
|
||||||
|
print(f"{actor.uid} ENTERING WAIT")
|
||||||
|
task_status.started()
|
||||||
|
|
||||||
|
# with trio.CancelScope(shield=True):
|
||||||
|
# await release_lock_signal.wait()
|
||||||
|
|
||||||
|
else:
|
||||||
|
# block here one (at the appropriate frame *up*) where
|
||||||
|
# ``breakpoint()`` was awaited and begin handling stdio.
|
||||||
|
log.debug("Entering the synchronous world of pdb")
|
||||||
|
debug_func(actor, pdb)
|
||||||
|
|
||||||
|
except bdb.BdbQuit:
|
||||||
|
Lock.release()
|
||||||
|
raise
|
||||||
|
|
||||||
|
# XXX: apparently we can't do this without showing this frame
|
||||||
|
# in the backtrace on first entry to the REPL? Seems like an odd
|
||||||
|
# behaviour that should have been fixed by now. This is also why
|
||||||
|
# we scrapped all the @cm approaches that were tried previously.
|
||||||
|
# finally:
|
||||||
|
# __tracebackhide__ = True
|
||||||
|
# # frame = sys._getframe()
|
||||||
|
# # last_f = frame.f_back
|
||||||
|
# # last_f.f_globals['__tracebackhide__'] = True
|
||||||
|
# # signal.signal = pdbp.hideframe(signal.signal)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: allow pausing from sync code.
|
||||||
|
# normally by remapping python's builtin breakpoint() hook to this
|
||||||
|
# runtime aware version which takes care of all .
|
||||||
def pause_from_sync() -> None:
|
def pause_from_sync() -> None:
|
||||||
print("ENTER SYNC PAUSE")
|
print("ENTER SYNC PAUSE")
|
||||||
import greenback
|
import greenback
|
||||||
|
@ -858,10 +873,6 @@ def pause_from_sync() -> None:
|
||||||
# using the "pause" semantics instead since
|
# using the "pause" semantics instead since
|
||||||
# that better covers actually somewhat "pausing the runtime"
|
# that better covers actually somewhat "pausing the runtime"
|
||||||
# for this particular paralell task to do debugging B)
|
# for this particular paralell task to do debugging B)
|
||||||
pause = partial(
|
|
||||||
_pause,
|
|
||||||
_set_trace,
|
|
||||||
)
|
|
||||||
# pp = pause # short-hand for "pause point"
|
# pp = pause # short-hand for "pause point"
|
||||||
|
|
||||||
|
|
||||||
|
@ -895,7 +906,7 @@ def _post_mortem(
|
||||||
|
|
||||||
|
|
||||||
post_mortem = partial(
|
post_mortem = partial(
|
||||||
_pause,
|
pause,
|
||||||
_post_mortem,
|
_post_mortem,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue