From 79f0d6fda0986424353b7175e5a319eeab86d406 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 19 Aug 2021 12:36:55 -0400 Subject: [PATCH] Attempt to avoid pdb lockups on channel breakage Always try to release the root tty lock on broken connection errors. --- tractor/_debug.py | 48 +++++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/tractor/_debug.py b/tractor/_debug.py index 9ac79a1..e9bfaf1 100644 --- a/tractor/_debug.py +++ b/tractor/_debug.py @@ -238,24 +238,29 @@ async def _hijack_stdin_for_child( with trio.CancelScope(shield=True): - async with _acquire_debug_lock(subactor_uid): + try: + lock = None + async with _acquire_debug_lock(subactor_uid) as lock: - # indicate to child that we've locked stdio - await ctx.started('Locked') - log.pdb(f"Actor {subactor_uid} ACQUIRED stdin hijack lock") + # indicate to child that we've locked stdio + await ctx.started('Locked') + log.pdb(f"Actor {subactor_uid} ACQUIRED stdin hijack lock") - # wait for unlock pdb by child - async with ctx.open_stream() as stream: - try: + # wait for unlock pdb by child + async with ctx.open_stream() as stream: assert await stream.receive() == 'pdb_unlock' - except trio.BrokenResourceError: - # XXX: there may be a race with the portal teardown - # with the calling actor which we can safely ignore. - # The alternative would be sending an ack message - # and allowing the client to wait for us to teardown - # first? - pass + # try: + # assert await stream.receive() == 'pdb_unlock' + + except trio.BrokenResourceError: + # XXX: there may be a race with the portal teardown + # with the calling actor which we can safely ignore. + # The alternative would be sending an ack message + # and allowing the client to wait for us to teardown + # first? + if lock and lock.locked(): + lock.release() log.debug(f"TTY lock released, remote task: {task_name}:{subactor_uid}") @@ -299,7 +304,7 @@ async def _breakpoint( try: async with get_root() as portal: - log.error('got portal') + log.pdb('got portal') # this syncs to child's ``Context.started()`` call. async with portal.open_context( @@ -309,7 +314,7 @@ async def _breakpoint( ) as (ctx, val): - log.error('locked context') + log.pdb('locked context') assert val == 'Locked' async with ctx.open_stream() as stream: @@ -392,10 +397,17 @@ async def _breakpoint( # callbacks. Can't think of a nicer way then this atm. if _debug_lock.locked(): log.warning( - 'Root actor attempting to acquire active tty lock' + 'Root actor attempting to shield-acquire active tty lock' f' owned by {_global_actor_in_debug}') - await _debug_lock.acquire() + with trio.CancelScope(shield=True): + # must shield here to avoid hitting a ``Cancelled`` and + # a child getting stuck bc we clobbered the tty + await _debug_lock.acquire() + + else: + # may be cancelled + await _debug_lock.acquire() _global_actor_in_debug = actor.uid _local_task_in_debug = task_name