forked from goodboy/tractor
1
0
Fork 0

Comment all `.pause(shield=True)` attempts again, need to solve cancel scope `.__exit__()` frame hiding issue..

multihomed
Tyler Goodlet 2023-10-10 09:55:11 -04:00
parent 575a24adf1
commit 6d951c526a
1 changed files with 128 additions and 116 deletions

View File

@ -631,6 +631,7 @@ def _set_trace(
# no f!#$&* idea, but when we're in async land # no f!#$&* idea, but when we're in async land
# we need 2x frames up? # we need 2x frames up?
frame = frame.f_back frame = frame.f_back
# frame = frame.f_back
# if shield: # if shield:
# frame = frame.f_back # frame = frame.f_back
@ -646,17 +647,19 @@ def _set_trace(
# undo_ # undo_
async def pause( async def pause(
debug_func: Callable = _set_trace, debug_func: Callable = _set_trace,
release_lock_signal: trio.Event | None = None, release_lock_signal: trio.Event | None = None,
# allow caller to pause despite task cancellation, # TODO: allow caller to pause despite task cancellation,
# exactly the same as wrapping with: # exactly the same as wrapping with:
# with CancelScope(shield=True): # with CancelScope(shield=True):
# await pause() # await pause()
shield: bool = False, # => the REMAINING ISSUE is that the scope's .__exit__() frame
# is always show in the debugger on entry.. and there seems to
# be no way to override it?..
# shield: bool = False,
# TODO: # TODO:
# shield: bool = False # shield: bool = False
@ -689,133 +692,142 @@ async def pause(
): ):
Lock.local_pdb_complete = trio.Event() Lock.local_pdb_complete = trio.Event()
if shield: # if shield:
debug_func = partial( debug_func = partial(
debug_func, debug_func,
shield=shield, # shield=shield,
) )
with trio.CancelScope(shield=shield): # def _exit(self, *args, **kwargs):
# __tracebackhide__: bool = True
# super().__exit__(*args, **kwargs)
# trio.CancelScope.__exit__.__tracebackhide__ = True
# import types
# with trio.CancelScope(shield=shield) as cs:
# cs.__exit__ = types.MethodType(_exit, cs)
# cs.__exit__.__tracebackhide__ = True
# TODO: need a more robust check for the "root" actor # TODO: need a more robust check for the "root" actor
if ( if (
not is_root_process() not is_root_process()
and actor._parent_chan # a connected child and actor._parent_chan # a connected child
): ):
if Lock.local_task_in_debug: if Lock.local_task_in_debug:
# Recurrence entry case: this task already has the lock and # Recurrence entry case: this task already has the lock and
# is likely recurrently entering a breakpoint # is likely recurrently entering a breakpoint
if Lock.local_task_in_debug == task_name: if Lock.local_task_in_debug == task_name:
# noop on recurrent entry case but we want to trigger # noop on recurrent entry case but we want to trigger
# a checkpoint to allow other actors error-propagate and # a checkpoint to allow other actors error-propagate and
# potetially avoid infinite re-entries in some subactor. # potetially avoid infinite re-entries in some subactor.
await trio.lowlevel.checkpoint() await trio.lowlevel.checkpoint()
return
# if **this** actor is already in debug mode block here
# waiting for the control to be released - this allows
# support for recursive entries to `tractor.breakpoint()`
log.warning(f"{actor.uid} already has a debug lock, waiting...")
await Lock.local_pdb_complete.wait()
await trio.sleep(0.1)
# mark local actor as "in debug mode" to avoid recurrent
# entries/requests to the root process
Lock.local_task_in_debug = task_name
# this **must** be awaited by the caller and is done using the
# root nursery so that the debugger can continue to run without
# being restricted by the scope of a new task nursery.
# TODO: if we want to debug a trio.Cancelled triggered exception
# we have to figure out how to avoid having the service nursery
# cancel on this task start? I *think* this works below:
# ```python
# actor._service_n.cancel_scope.shield = shield
# ```
# but not entirely sure if that's a sane way to implement it?
try:
with trio.CancelScope(shield=True):
await actor._service_n.start(
wait_for_parent_stdin_hijack,
actor.uid,
)
Lock.repl = pdb
except RuntimeError:
Lock.release()
if actor._cancel_called:
# service nursery won't be usable and we
# don't want to lock up the root either way since
# we're in (the midst of) cancellation.
return
raise
elif is_root_process():
# we also wait in the root-parent for any child that
# may have the tty locked prior
# TODO: wait, what about multiple root tasks acquiring it though?
if Lock.global_actor_in_debug == actor.uid:
# re-entrant root process already has it: noop.
return return
# XXX: since we need to enter pdb synchronously below, # if **this** actor is already in debug mode block here
# we have to release the lock manually from pdb completion # waiting for the control to be released - this allows
# callbacks. Can't think of a nicer way then this atm. # support for recursive entries to `tractor.breakpoint()`
if Lock._debug_lock.locked(): log.warning(f"{actor.uid} already has a debug lock, waiting...")
log.warning(
'Root actor attempting to shield-acquire active tty lock'
f' owned by {Lock.global_actor_in_debug}')
# must shield here to avoid hitting a ``Cancelled`` and await Lock.local_pdb_complete.wait()
# a child getting stuck bc we clobbered the tty await trio.sleep(0.1)
with trio.CancelScope(shield=True):
await Lock._debug_lock.acquire()
else:
# may be cancelled
await Lock._debug_lock.acquire()
Lock.global_actor_in_debug = actor.uid # mark local actor as "in debug mode" to avoid recurrent
Lock.local_task_in_debug = task_name # entries/requests to the root process
Lock.repl = pdb Lock.local_task_in_debug = task_name
# this **must** be awaited by the caller and is done using the
# root nursery so that the debugger can continue to run without
# being restricted by the scope of a new task nursery.
# TODO: if we want to debug a trio.Cancelled triggered exception
# we have to figure out how to avoid having the service nursery
# cancel on this task start? I *think* this works below:
# ```python
# actor._service_n.cancel_scope.shield = shield
# ```
# but not entirely sure if that's a sane way to implement it?
try: try:
if debug_func is None: with trio.CancelScope(shield=True):
# assert release_lock_signal, ( await actor._service_n.start(
# 'Must pass `release_lock_signal: trio.Event` if no ' wait_for_parent_stdin_hijack,
# 'trace func provided!' actor.uid,
# ) )
print(f"{actor.uid} ENTERING WAIT") Lock.repl = pdb
task_status.started() except RuntimeError:
# with trio.CancelScope(shield=True):
# await release_lock_signal.wait()
else:
# block here one (at the appropriate frame *up*) where
# ``breakpoint()`` was awaited and begin handling stdio.
log.debug("Entering the synchronous world of pdb")
debug_func(actor, pdb)
except bdb.BdbQuit:
Lock.release() Lock.release()
if actor._cancel_called:
# service nursery won't be usable and we
# don't want to lock up the root either way since
# we're in (the midst of) cancellation.
return
raise raise
# XXX: apparently we can't do this without showing this frame elif is_root_process():
# in the backtrace on first entry to the REPL? Seems like an odd
# behaviour that should have been fixed by now. This is also why # we also wait in the root-parent for any child that
# we scrapped all the @cm approaches that were tried previously. # may have the tty locked prior
# finally: # TODO: wait, what about multiple root tasks acquiring it though?
# __tracebackhide__ = True if Lock.global_actor_in_debug == actor.uid:
# # frame = sys._getframe() # re-entrant root process already has it: noop.
# # last_f = frame.f_back return
# # last_f.f_globals['__tracebackhide__'] = True
# # signal.signal = pdbp.hideframe(signal.signal) # XXX: since we need to enter pdb synchronously below,
# we have to release the lock manually from pdb completion
# callbacks. Can't think of a nicer way then this atm.
if Lock._debug_lock.locked():
log.warning(
'Root actor attempting to shield-acquire active tty lock'
f' owned by {Lock.global_actor_in_debug}')
# must shield here to avoid hitting a ``Cancelled`` and
# a child getting stuck bc we clobbered the tty
with trio.CancelScope(shield=True):
await Lock._debug_lock.acquire()
else:
# may be cancelled
await Lock._debug_lock.acquire()
Lock.global_actor_in_debug = actor.uid
Lock.local_task_in_debug = task_name
Lock.repl = pdb
try:
if debug_func is None:
# assert release_lock_signal, (
# 'Must pass `release_lock_signal: trio.Event` if no '
# 'trace func provided!'
# )
print(f"{actor.uid} ENTERING WAIT")
task_status.started()
# with trio.CancelScope(shield=True):
# await release_lock_signal.wait()
else:
# block here one (at the appropriate frame *up*) where
# ``breakpoint()`` was awaited and begin handling stdio.
log.debug("Entering the synchronous world of pdb")
debug_func(actor, pdb)
except bdb.BdbQuit:
Lock.release()
raise
# XXX: apparently we can't do this without showing this frame
# in the backtrace on first entry to the REPL? Seems like an odd
# behaviour that should have been fixed by now. This is also why
# we scrapped all the @cm approaches that were tried previously.
# finally:
# __tracebackhide__ = True
# # frame = sys._getframe()
# # last_f = frame.f_back
# # last_f.f_globals['__tracebackhide__'] = True
# # signal.signal = pdbp.hideframe(signal.signal)
# TODO: allow pausing from sync code. # TODO: allow pausing from sync code.