forked from goodboy/tractor
1
0
Fork 0

Attempt to avoid pdb lockups on channel breakage

Always try to release the root tty lock on broken connection errors.
alpha2
Tyler Goodlet 2021-08-19 12:36:55 -04:00
parent 4f166500d0
commit 79f0d6fda0
1 changed files with 30 additions and 18 deletions

View File

@ -238,24 +238,29 @@ async def _hijack_stdin_for_child(
with trio.CancelScope(shield=True): with trio.CancelScope(shield=True):
async with _acquire_debug_lock(subactor_uid): try:
lock = None
async with _acquire_debug_lock(subactor_uid) as lock:
# indicate to child that we've locked stdio # indicate to child that we've locked stdio
await ctx.started('Locked') await ctx.started('Locked')
log.pdb(f"Actor {subactor_uid} ACQUIRED stdin hijack lock") log.pdb(f"Actor {subactor_uid} ACQUIRED stdin hijack lock")
# wait for unlock pdb by child # wait for unlock pdb by child
async with ctx.open_stream() as stream: async with ctx.open_stream() as stream:
try:
assert await stream.receive() == 'pdb_unlock' assert await stream.receive() == 'pdb_unlock'
except trio.BrokenResourceError: # try:
# XXX: there may be a race with the portal teardown # assert await stream.receive() == 'pdb_unlock'
# with the calling actor which we can safely ignore.
# The alternative would be sending an ack message except trio.BrokenResourceError:
# and allowing the client to wait for us to teardown # XXX: there may be a race with the portal teardown
# first? # with the calling actor which we can safely ignore.
pass # The alternative would be sending an ack message
# and allowing the client to wait for us to teardown
# first?
if lock and lock.locked():
lock.release()
log.debug(f"TTY lock released, remote task: {task_name}:{subactor_uid}") log.debug(f"TTY lock released, remote task: {task_name}:{subactor_uid}")
@ -299,7 +304,7 @@ async def _breakpoint(
try: try:
async with get_root() as portal: async with get_root() as portal:
log.error('got portal') log.pdb('got portal')
# this syncs to child's ``Context.started()`` call. # this syncs to child's ``Context.started()`` call.
async with portal.open_context( async with portal.open_context(
@ -309,7 +314,7 @@ async def _breakpoint(
) as (ctx, val): ) as (ctx, val):
log.error('locked context') log.pdb('locked context')
assert val == 'Locked' assert val == 'Locked'
async with ctx.open_stream() as stream: async with ctx.open_stream() as stream:
@ -392,10 +397,17 @@ async def _breakpoint(
# callbacks. Can't think of a nicer way then this atm. # callbacks. Can't think of a nicer way then this atm.
if _debug_lock.locked(): if _debug_lock.locked():
log.warning( log.warning(
'Root actor attempting to acquire active tty lock' 'Root actor attempting to shield-acquire active tty lock'
f' owned by {_global_actor_in_debug}') f' owned by {_global_actor_in_debug}')
await _debug_lock.acquire() with trio.CancelScope(shield=True):
# must shield here to avoid hitting a ``Cancelled`` and
# a child getting stuck bc we clobbered the tty
await _debug_lock.acquire()
else:
# may be cancelled
await _debug_lock.acquire()
_global_actor_in_debug = actor.uid _global_actor_in_debug = actor.uid
_local_task_in_debug = task_name _local_task_in_debug = task_name