forked from goodboy/tractor
				
			
						commit
						d05885d650
					
				| 
						 | 
				
			
			@ -0,0 +1,12 @@
 | 
			
		|||
Fix graceful cancellation of daemon actors
 | 
			
		||||
 | 
			
		||||
Previously, his was a bug where if the soft wait on a sub-process (the
 | 
			
		||||
``await .proc.wait()``) in the reaper task teardown was cancelled we
 | 
			
		||||
would fail over to the hard reaping sequence (meant for culling off any
 | 
			
		||||
potential zombies via system kill signals). The hard reap has a timeout
 | 
			
		||||
of 3s (currently though in theory we could make it shorter?) before
 | 
			
		||||
system signalling kicks in. This means that any daemon actor still
 | 
			
		||||
running during nursery exit would get hard reaped (3s later) instead of
 | 
			
		||||
cancelled via IPC message. Now we catch the ``trio.Cancelled``, call
 | 
			
		||||
``Portal.cancel_actor()`` on the daemon and expect the child to
 | 
			
		||||
self-terminate after the runtime cancels and shuts down the process.
 | 
			
		||||
| 
						 | 
				
			
			@ -501,3 +501,53 @@ def test_cancel_while_childs_child_in_sync_sleep(
 | 
			
		|||
 | 
			
		||||
    with pytest.raises(AssertionError):
 | 
			
		||||
        trio.run(main)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_fast_graceful_cancel_when_spawn_task_in_soft_proc_wait_for_daemon(
 | 
			
		||||
    start_method,
 | 
			
		||||
):
 | 
			
		||||
    '''
 | 
			
		||||
    This is a very subtle test which demonstrates how cancellation
 | 
			
		||||
    during process collection can result in non-optimal teardown
 | 
			
		||||
    performance on daemon actors. The fix for this test was to handle
 | 
			
		||||
    ``trio.Cancelled`` specially in the spawn task waiting in
 | 
			
		||||
    `proc.wait()` such that ``Portal.cancel_actor()`` is called before
 | 
			
		||||
    executing the "hard reap" sequence (which has an up to 3 second
 | 
			
		||||
    delay currently).
 | 
			
		||||
 | 
			
		||||
    In other words, if we can cancel the actor using a graceful remote
 | 
			
		||||
    cancellation, and it's faster, we might as well do it.
 | 
			
		||||
 | 
			
		||||
    '''
 | 
			
		||||
    kbi_delay = 0.2
 | 
			
		||||
 | 
			
		||||
    async def main():
 | 
			
		||||
        start = time.time()
 | 
			
		||||
        try:
 | 
			
		||||
            async with trio.open_nursery() as nurse:
 | 
			
		||||
                async with tractor.open_nursery() as tn:
 | 
			
		||||
                    p = await tn.start_actor(
 | 
			
		||||
                        'fast_boi',
 | 
			
		||||
                        enable_modules=[__name__],
 | 
			
		||||
                    )
 | 
			
		||||
 | 
			
		||||
                    async def delayed_kbi():
 | 
			
		||||
                        await trio.sleep(kbi_delay)
 | 
			
		||||
                        print(f'RAISING KBI after {kbi_delay} s')
 | 
			
		||||
                        raise KeyboardInterrupt
 | 
			
		||||
 | 
			
		||||
                    # start task which raises a kbi **after**
 | 
			
		||||
                    # the actor nursery ``__aexit__()`` has
 | 
			
		||||
                    # been run.
 | 
			
		||||
                    nurse.start_soon(delayed_kbi)
 | 
			
		||||
 | 
			
		||||
                    await p.run(do_nuthin)
 | 
			
		||||
        finally:
 | 
			
		||||
            duration = time.time() - start
 | 
			
		||||
            if duration > 2.9:
 | 
			
		||||
                raise trio.TooSlowError(
 | 
			
		||||
                    'daemon cancel was slower then necessary..'
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
    with pytest.raises(KeyboardInterrupt):
 | 
			
		||||
        trio.run(main)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -295,7 +295,17 @@ async def new_proc(
 | 
			
		|||
                # ``trio.Process.__aexit__()`` (it tears down stdio
 | 
			
		||||
                # which will kill any waiting remote pdb trace).
 | 
			
		||||
                # This is a "soft" (cancellable) join/reap.
 | 
			
		||||
                try:
 | 
			
		||||
                    await proc.wait()
 | 
			
		||||
                except trio.Cancelled:
 | 
			
		||||
                    # if cancelled during a soft wait, cancel the child
 | 
			
		||||
                    # actor before entering the hard reap sequence
 | 
			
		||||
                    # below. This means we try to do a graceful teardown
 | 
			
		||||
                    # via sending a cancel message before getting out
 | 
			
		||||
                    # zombie killing tools.
 | 
			
		||||
                    with trio.CancelScope(shield=True):
 | 
			
		||||
                        await portal.cancel_actor()
 | 
			
		||||
                    raise
 | 
			
		||||
 | 
			
		||||
                # cancel result waiter that may have been spawned in
 | 
			
		||||
                # tandem if not done already
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue