forked from goodboy/tractor
Always propagate SIGINT when no locking peer found
A hopefully significant fix here is to always avoid suppressing a SIGINT when the root actor can not detect an active IPC connections (via a connected channel) to the supposed debug lock holding actor. In that case it is most likely that the actor has either terminated or has lost its connection for debugger control and there is no way the root can verify the lock is in use; thus we choose to allow KBI cancellation. Drop the (by comment) `try`-`finally` block in `_hijoack_stdin_for_child()` around the `_acquire_debug_lock()` call since all that logic should now be handled internal to that locking manager. Try to catch a weird error around the `.do_longlist()` method call that seems to sometimes break on py3.10 and latest `pdbpp`.310_plus
parent
20b902b300
commit
5457aa566c
tractor
|
@ -178,12 +178,6 @@ async def _acquire_debug_lock(
|
||||||
|
|
||||||
we_acquired = False
|
we_acquired = False
|
||||||
|
|
||||||
if _no_remote_has_tty is None:
|
|
||||||
# mark the tty lock as being in use so that the runtime
|
|
||||||
# can try to avoid clobbering any connection from a child
|
|
||||||
# that's currently relying on it.
|
|
||||||
_no_remote_has_tty = trio.Event()
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
log.runtime(
|
log.runtime(
|
||||||
f"entering lock checkpoint, remote task: {task_name}:{uid}"
|
f"entering lock checkpoint, remote task: {task_name}:{uid}"
|
||||||
|
@ -191,6 +185,12 @@ async def _acquire_debug_lock(
|
||||||
we_acquired = True
|
we_acquired = True
|
||||||
await _debug_lock.acquire()
|
await _debug_lock.acquire()
|
||||||
|
|
||||||
|
if _no_remote_has_tty is None:
|
||||||
|
# mark the tty lock as being in use so that the runtime
|
||||||
|
# can try to avoid clobbering any connection from a child
|
||||||
|
# that's currently relying on it.
|
||||||
|
_no_remote_has_tty = trio.Event()
|
||||||
|
|
||||||
_global_actor_in_debug = uid
|
_global_actor_in_debug = uid
|
||||||
log.runtime(f"TTY lock acquired, remote task: {task_name}:{uid}")
|
log.runtime(f"TTY lock acquired, remote task: {task_name}:{uid}")
|
||||||
|
|
||||||
|
@ -208,6 +208,7 @@ async def _acquire_debug_lock(
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# if _global_actor_in_debug == uid:
|
# if _global_actor_in_debug == uid:
|
||||||
|
|
||||||
if (
|
if (
|
||||||
we_acquired
|
we_acquired
|
||||||
and _debug_lock.locked()
|
and _debug_lock.locked()
|
||||||
|
@ -224,12 +225,15 @@ async def _acquire_debug_lock(
|
||||||
not stats.owner
|
not stats.owner
|
||||||
):
|
):
|
||||||
log.runtime(f"No more tasks waiting on tty lock! says {uid}")
|
log.runtime(f"No more tasks waiting on tty lock! says {uid}")
|
||||||
|
if _no_remote_has_tty is not None:
|
||||||
_no_remote_has_tty.set()
|
_no_remote_has_tty.set()
|
||||||
_no_remote_has_tty = None
|
_no_remote_has_tty = None
|
||||||
|
|
||||||
_global_actor_in_debug = None
|
_global_actor_in_debug = None
|
||||||
|
|
||||||
log.runtime(f"TTY lock released, remote task: {task_name}:{uid}")
|
log.runtime(
|
||||||
|
f"TTY lock released, remote task: {task_name}:{uid}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@tractor.context
|
@tractor.context
|
||||||
|
@ -244,6 +248,10 @@ async def _hijack_stdin_for_child(
|
||||||
the pdbpp debugger console can be allocated to a sub-actor for repl
|
the pdbpp debugger console can be allocated to a sub-actor for repl
|
||||||
bossing.
|
bossing.
|
||||||
|
|
||||||
|
NOTE: this task is invoked in the root actor-process of the actor
|
||||||
|
tree. It is meant to be invoked as an rpc-task which should be
|
||||||
|
highly reliable at cleaning out the tty-lock state when complete!
|
||||||
|
|
||||||
'''
|
'''
|
||||||
task_name = trio.lowlevel.current_task().name
|
task_name = trio.lowlevel.current_task().name
|
||||||
|
|
||||||
|
@ -265,9 +273,9 @@ async def _hijack_stdin_for_child(
|
||||||
with (
|
with (
|
||||||
trio.CancelScope(shield=True),
|
trio.CancelScope(shield=True),
|
||||||
):
|
):
|
||||||
try:
|
# try:
|
||||||
lock = None
|
# lock = None
|
||||||
async with _acquire_debug_lock(subactor_uid) as lock:
|
async with _acquire_debug_lock(subactor_uid): # as lock:
|
||||||
|
|
||||||
# indicate to child that we've locked stdio
|
# indicate to child that we've locked stdio
|
||||||
await ctx.started('Locked')
|
await ctx.started('Locked')
|
||||||
|
@ -279,32 +287,35 @@ async def _hijack_stdin_for_child(
|
||||||
async with ctx.open_stream() as stream:
|
async with ctx.open_stream() as stream:
|
||||||
assert await stream.receive() == 'pdb_unlock'
|
assert await stream.receive() == 'pdb_unlock'
|
||||||
|
|
||||||
except (
|
# except (
|
||||||
BaseException,
|
# BaseException,
|
||||||
# trio.MultiError,
|
# # trio.MultiError,
|
||||||
# Exception,
|
# # Exception,
|
||||||
# trio.BrokenResourceError,
|
# # trio.BrokenResourceError,
|
||||||
# trio.Cancelled, # by local cancellation
|
# # trio.Cancelled, # by local cancellation
|
||||||
# trio.ClosedResourceError, # by self._rx_chan
|
# # trio.ClosedResourceError, # by self._rx_chan
|
||||||
# ContextCancelled,
|
# # ContextCancelled,
|
||||||
# ConnectionResetError,
|
# # ConnectionResetError,
|
||||||
):
|
# ):
|
||||||
# XXX: there may be a race with the portal teardown
|
# # XXX: there may be a race with the portal teardown
|
||||||
# with the calling actor which we can safely ignore.
|
# # with the calling actor which we can safely ignore.
|
||||||
# The alternative would be sending an ack message
|
# # The alternative would be sending an ack message
|
||||||
# and allowing the client to wait for us to teardown
|
# # and allowing the client to wait for us to teardown
|
||||||
# first?
|
# # first?
|
||||||
if lock and lock.locked():
|
# if lock and lock.locked():
|
||||||
lock.release()
|
# try:
|
||||||
|
# lock.release()
|
||||||
|
# except RuntimeError:
|
||||||
|
# log.exception(f"we don't own the tty lock?")
|
||||||
|
|
||||||
# if isinstance(err, trio.Cancelled):
|
# # if isinstance(err, trio.Cancelled):
|
||||||
raise
|
# raise
|
||||||
|
|
||||||
finally:
|
# finally:
|
||||||
log.runtime(
|
# log.runtime(
|
||||||
"TTY lock released, remote task:"
|
# "TTY lock released, remote task:"
|
||||||
f"{task_name}:{subactor_uid}"
|
# f"{task_name}:{subactor_uid}"
|
||||||
)
|
# )
|
||||||
|
|
||||||
return "pdb_unlock_complete"
|
return "pdb_unlock_complete"
|
||||||
|
|
||||||
|
@ -585,20 +596,43 @@ def shield_sigint(
|
||||||
__tracebackhide__ = True
|
__tracebackhide__ = True
|
||||||
|
|
||||||
global _local_task_in_debug, _global_actor_in_debug
|
global _local_task_in_debug, _global_actor_in_debug
|
||||||
in_debug = _global_actor_in_debug
|
uid_in_debug = _global_actor_in_debug
|
||||||
|
|
||||||
actor = tractor.current_actor()
|
actor = tractor.current_actor()
|
||||||
|
|
||||||
|
any_connected = False
|
||||||
|
if uid_in_debug is not None:
|
||||||
|
# try to see if the supposed (sub)actor in debug still
|
||||||
|
# has an active connection to *this* actor, and if not
|
||||||
|
# it's likely they aren't using the TTY lock / debugger
|
||||||
|
# and we should propagate SIGINT normally.
|
||||||
|
chans = actor._peers.get(tuple(uid_in_debug))
|
||||||
|
if chans:
|
||||||
|
any_connected = any(chan.connected() for chan in chans)
|
||||||
|
if not any_connected:
|
||||||
|
log.warning(
|
||||||
|
'A global actor reported to be in debug '
|
||||||
|
'but no connection exists for this child:\n'
|
||||||
|
f'{uid_in_debug}\n'
|
||||||
|
'Allowing SIGINT propagation..'
|
||||||
|
)
|
||||||
|
|
||||||
# root actor branch that reports whether or not a child
|
# root actor branch that reports whether or not a child
|
||||||
# has locked debugger.
|
# has locked debugger.
|
||||||
if (
|
if (
|
||||||
is_root_process()
|
is_root_process()
|
||||||
and in_debug
|
and uid_in_debug is not None
|
||||||
|
|
||||||
|
# XXX: only if there is an existing connection to the
|
||||||
|
# (sub-)actor in debug do we ignore SIGINT in this
|
||||||
|
# parent! Otherwise we may hang waiting for an actor
|
||||||
|
# which has already terminated to unlock.
|
||||||
|
and any_connected
|
||||||
):
|
):
|
||||||
name = in_debug[0]
|
name = uid_in_debug[0]
|
||||||
if name != 'root':
|
if name != 'root':
|
||||||
log.pdb(
|
log.pdb(
|
||||||
f"Ignoring SIGINT while child in debug mode: `{in_debug}`"
|
f"Ignoring SIGINT while child in debug mode: `{uid_in_debug}`"
|
||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -652,8 +686,12 @@ def shield_sigint(
|
||||||
# https://github.com/goodboy/tractor/issues/130#issuecomment-663752040
|
# https://github.com/goodboy/tractor/issues/130#issuecomment-663752040
|
||||||
# https://github.com/prompt-toolkit/python-prompt-toolkit/blob/c2c6af8a0308f9e5d7c0e28cb8a02963fe0ce07a/prompt_toolkit/patch_stdout.py
|
# https://github.com/prompt-toolkit/python-prompt-toolkit/blob/c2c6af8a0308f9e5d7c0e28cb8a02963fe0ce07a/prompt_toolkit/patch_stdout.py
|
||||||
|
|
||||||
|
try:
|
||||||
pdb_obj.do_longlist(None)
|
pdb_obj.do_longlist(None)
|
||||||
print(pdb_obj.prompt, end='', flush=True)
|
print(pdb_obj.prompt, end='', flush=True)
|
||||||
|
except AttributeError:
|
||||||
|
log.exception('pdbpp longlist failed...')
|
||||||
|
raise KeyboardInterrupt
|
||||||
|
|
||||||
|
|
||||||
def _set_trace(
|
def _set_trace(
|
||||||
|
|
Loading…
Reference in New Issue