commit
d05885d650
|
@ -0,0 +1,12 @@
|
|||
Fix graceful cancellation of daemon actors
|
||||
|
||||
Previously, his was a bug where if the soft wait on a sub-process (the
|
||||
``await .proc.wait()``) in the reaper task teardown was cancelled we
|
||||
would fail over to the hard reaping sequence (meant for culling off any
|
||||
potential zombies via system kill signals). The hard reap has a timeout
|
||||
of 3s (currently though in theory we could make it shorter?) before
|
||||
system signalling kicks in. This means that any daemon actor still
|
||||
running during nursery exit would get hard reaped (3s later) instead of
|
||||
cancelled via IPC message. Now we catch the ``trio.Cancelled``, call
|
||||
``Portal.cancel_actor()`` on the daemon and expect the child to
|
||||
self-terminate after the runtime cancels and shuts down the process.
|
|
@ -501,3 +501,53 @@ def test_cancel_while_childs_child_in_sync_sleep(
|
|||
|
||||
with pytest.raises(AssertionError):
|
||||
trio.run(main)
|
||||
|
||||
|
||||
def test_fast_graceful_cancel_when_spawn_task_in_soft_proc_wait_for_daemon(
|
||||
start_method,
|
||||
):
|
||||
'''
|
||||
This is a very subtle test which demonstrates how cancellation
|
||||
during process collection can result in non-optimal teardown
|
||||
performance on daemon actors. The fix for this test was to handle
|
||||
``trio.Cancelled`` specially in the spawn task waiting in
|
||||
`proc.wait()` such that ``Portal.cancel_actor()`` is called before
|
||||
executing the "hard reap" sequence (which has an up to 3 second
|
||||
delay currently).
|
||||
|
||||
In other words, if we can cancel the actor using a graceful remote
|
||||
cancellation, and it's faster, we might as well do it.
|
||||
|
||||
'''
|
||||
kbi_delay = 0.2
|
||||
|
||||
async def main():
|
||||
start = time.time()
|
||||
try:
|
||||
async with trio.open_nursery() as nurse:
|
||||
async with tractor.open_nursery() as tn:
|
||||
p = await tn.start_actor(
|
||||
'fast_boi',
|
||||
enable_modules=[__name__],
|
||||
)
|
||||
|
||||
async def delayed_kbi():
|
||||
await trio.sleep(kbi_delay)
|
||||
print(f'RAISING KBI after {kbi_delay} s')
|
||||
raise KeyboardInterrupt
|
||||
|
||||
# start task which raises a kbi **after**
|
||||
# the actor nursery ``__aexit__()`` has
|
||||
# been run.
|
||||
nurse.start_soon(delayed_kbi)
|
||||
|
||||
await p.run(do_nuthin)
|
||||
finally:
|
||||
duration = time.time() - start
|
||||
if duration > 2.9:
|
||||
raise trio.TooSlowError(
|
||||
'daemon cancel was slower then necessary..'
|
||||
)
|
||||
|
||||
with pytest.raises(KeyboardInterrupt):
|
||||
trio.run(main)
|
||||
|
|
|
@ -295,7 +295,17 @@ async def new_proc(
|
|||
# ``trio.Process.__aexit__()`` (it tears down stdio
|
||||
# which will kill any waiting remote pdb trace).
|
||||
# This is a "soft" (cancellable) join/reap.
|
||||
try:
|
||||
await proc.wait()
|
||||
except trio.Cancelled:
|
||||
# if cancelled during a soft wait, cancel the child
|
||||
# actor before entering the hard reap sequence
|
||||
# below. This means we try to do a graceful teardown
|
||||
# via sending a cancel message before getting out
|
||||
# zombie killing tools.
|
||||
with trio.CancelScope(shield=True):
|
||||
await portal.cancel_actor()
|
||||
raise
|
||||
|
||||
# cancel result waiter that may have been spawned in
|
||||
# tandem if not done already
|
||||
|
|
Loading…
Reference in New Issue