From 5983c082187a7e67eb15229ff7e3924e8d47b912 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 25 Jun 2021 20:52:08 -0400 Subject: [PATCH] Fix hard kill in debug mode; only do it when debug lock is empty --- tractor/_spawn.py | 70 +++++++++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 23 deletions(-) diff --git a/tractor/_spawn.py b/tractor/_spawn.py index 03ca984..8bd82e2 100644 --- a/tractor/_spawn.py +++ b/tractor/_spawn.py @@ -28,6 +28,7 @@ from ._state import ( is_root_process, _runtime_vars, ) +from ._debug import _global_actor_in_debug from .log import get_logger from ._portal import Portal @@ -154,6 +155,26 @@ async def cancel_on_completion( # cancel the process now that we have a final result await portal.cancel_actor() +async def do_hard_kill( + proc: trio.Process, +) -> None: + # NOTE: this timeout used to do nothing since we were shielding + # the ``.wait()`` inside ``new_proc()`` which will pretty much + # never release until the process exits, now it acts as + # a hard-kill time ultimatum. + with trio.move_on_after(3) as cs: + + # NOTE: This ``__aexit__()`` shields internally. + async with proc: # calls ``trio.Process.aclose()`` + log.debug(f"Terminating {proc}") + + if cs.cancelled_caught: + # XXX: should pretty much never get here unless we have + # to move the bits from ``proc.__aexit__()`` out and + # into here. + log.critical(f"HARD KILLING {proc}") + proc.kill() + @asynccontextmanager async def spawn_subactor( @@ -201,33 +222,31 @@ async def spawn_subactor( # root-processe's ``trio.Process.aclose()`` can clobber # any existing debugger session so we avoid and _runtime_vars['_debug_mode'] + and _global_actor_in_debug is not None ): - # XXX: this is ``trio.Process.aclose()`` minus - # the std-streams pre-closing steps and ``Process.kill()`` - # calls. + # XXX: this is ``trio.Process.aclose()`` MINUS the + # std-streams pre-closing steps inside ``proc.__aexit__()`` + # (see below) which incluses a ``Process.kill()`` call + + log.critical( + "Root process tty is locked in debug mode by " + f"{_global_actor_in_debug}. If the console is hanging, you " + "may need to trigger a KBI to kill any " + "not-fully-initialized" " subprocesses and allow errors " + "from `trio` to propagate" + ) try: + # one more graceful wait try can can be cancelled by KBI + # sent by user. await proc.wait() + finally: if proc.returncode is None: - # XXX: skip this when in debug and a session might - # still be live - # proc.kill() - with trio.CancelScope(shield=True): - await proc.wait() + # with trio.CancelScope(shield=True): + await do_hard_kill(proc) else: - # NOTE: this timeout used to do nothing since we were shielding - # the ``.wait()`` inside ``new_proc()`` which will pretty much - # never release until the process exits, now it acts as - # a hard-kill time ultimatum. - with trio.move_on_after(3) as cs: - - # NOTE: This ``__aexit__()`` shields internally. - async with proc: # calls ``trio.Process.aclose()`` - log.debug(f"Terminating {proc}") - - if cs.cancelled_caught: - log.critical(f"HARD KILLING {proc}") - proc.kill() + # with trio.CancelScope(shield=True): + await do_hard_kill(proc) async def new_proc( @@ -304,9 +323,14 @@ async def new_proc( # reaping more stringently without the shield # we used to have below... - # always "hard" join sub procs: - # no actor zombies allowed # with trio.CancelScope(shield=True): + # async with proc: + + # Always "hard" join sub procs since no actor zombies + # are allowed! + + # this is a "light" (cancellable) join, the hard join is + # in the enclosing scope (see above). await proc.wait() log.debug(f"Joined {proc}")