From ef725c597267fbfe3e6c4b1576e5741e798075a3 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 25 Jun 2021 20:52:08 -0400 Subject: [PATCH] Always hard kill sub-procs on teardown Adds a new hard kill routine for the `trio` spawning backend. --- tractor/_spawn.py | 52 +++++++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/tractor/_spawn.py b/tractor/_spawn.py index d893479..ca66de1 100644 --- a/tractor/_spawn.py +++ b/tractor/_spawn.py @@ -22,7 +22,10 @@ from multiprocessing import forkserver # type: ignore from typing import Tuple from . import _forkserver_override -from ._state import current_actor, is_main_process +from ._state import ( + current_actor, + is_main_process, +) from .log import get_logger from ._portal import Portal from ._actor import Actor, ActorFailure @@ -149,6 +152,27 @@ async def cancel_on_completion( await portal.cancel_actor() +async def do_hard_kill( + proc: trio.Process, +) -> None: + # NOTE: this timeout used to do nothing since we were shielding + # the ``.wait()`` inside ``new_proc()`` which will pretty much + # never release until the process exits, now it acts as + # a hard-kill time ultimatum. + with trio.move_on_after(3) as cs: + + # NOTE: This ``__aexit__()`` shields internally. + async with proc: # calls ``trio.Process.aclose()`` + log.debug(f"Terminating {proc}") + + if cs.cancelled_caught: + # XXX: should pretty much never get here unless we have + # to move the bits from ``proc.__aexit__()`` out and + # into here. + log.critical(f"HARD KILLING {proc}") + proc.kill() + + @asynccontextmanager async def spawn_subactor( subactor: 'Actor', @@ -180,26 +204,15 @@ async def spawn_subactor( proc = await trio.open_process(spawn_cmd) try: yield proc + finally: + # XXX: do this **after** cancellation/tearfown # to avoid killing the process too early # since trio does this internally on ``__aexit__()`` - # NOTE: we always "shield" join sub procs in - # the outer scope since no actor zombies are - # ever allowed. This ``__aexit__()`` also shields - # internally. log.debug(f"Attempting to kill {proc}") - - # NOTE: this timeout effectively does nothing right now since - # we are shielding the ``.wait()`` inside ``new_proc()`` which - # will pretty much never release until the process exits. - with trio.move_on_after(3) as cs: - async with proc: - log.debug(f"Terminating {proc}") - if cs.cancelled_caught: - log.critical(f"HARD KILLING {proc}") - proc.kill() + await do_hard_kill(proc) async def new_proc( @@ -277,9 +290,14 @@ async def new_proc( # reaping more stringently without the shield # we used to have below... - # always "hard" join sub procs: - # no actor zombies allowed # with trio.CancelScope(shield=True): + # async with proc: + + # Always "hard" join sub procs since no actor zombies + # are allowed! + + # this is a "light" (cancellable) join, the hard join is + # in the enclosing scope (see above). await proc.wait() log.debug(f"Joined {proc}")