forked from goodboy/tractor
				
			
						commit
						d05885d650
					
				| 
						 | 
					@ -0,0 +1,12 @@
 | 
				
			||||||
 | 
					Fix graceful cancellation of daemon actors
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Previously, his was a bug where if the soft wait on a sub-process (the
 | 
				
			||||||
 | 
					``await .proc.wait()``) in the reaper task teardown was cancelled we
 | 
				
			||||||
 | 
					would fail over to the hard reaping sequence (meant for culling off any
 | 
				
			||||||
 | 
					potential zombies via system kill signals). The hard reap has a timeout
 | 
				
			||||||
 | 
					of 3s (currently though in theory we could make it shorter?) before
 | 
				
			||||||
 | 
					system signalling kicks in. This means that any daemon actor still
 | 
				
			||||||
 | 
					running during nursery exit would get hard reaped (3s later) instead of
 | 
				
			||||||
 | 
					cancelled via IPC message. Now we catch the ``trio.Cancelled``, call
 | 
				
			||||||
 | 
					``Portal.cancel_actor()`` on the daemon and expect the child to
 | 
				
			||||||
 | 
					self-terminate after the runtime cancels and shuts down the process.
 | 
				
			||||||
| 
						 | 
					@ -501,3 +501,53 @@ def test_cancel_while_childs_child_in_sync_sleep(
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    with pytest.raises(AssertionError):
 | 
					    with pytest.raises(AssertionError):
 | 
				
			||||||
        trio.run(main)
 | 
					        trio.run(main)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_fast_graceful_cancel_when_spawn_task_in_soft_proc_wait_for_daemon(
 | 
				
			||||||
 | 
					    start_method,
 | 
				
			||||||
 | 
					):
 | 
				
			||||||
 | 
					    '''
 | 
				
			||||||
 | 
					    This is a very subtle test which demonstrates how cancellation
 | 
				
			||||||
 | 
					    during process collection can result in non-optimal teardown
 | 
				
			||||||
 | 
					    performance on daemon actors. The fix for this test was to handle
 | 
				
			||||||
 | 
					    ``trio.Cancelled`` specially in the spawn task waiting in
 | 
				
			||||||
 | 
					    `proc.wait()` such that ``Portal.cancel_actor()`` is called before
 | 
				
			||||||
 | 
					    executing the "hard reap" sequence (which has an up to 3 second
 | 
				
			||||||
 | 
					    delay currently).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    In other words, if we can cancel the actor using a graceful remote
 | 
				
			||||||
 | 
					    cancellation, and it's faster, we might as well do it.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    '''
 | 
				
			||||||
 | 
					    kbi_delay = 0.2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    async def main():
 | 
				
			||||||
 | 
					        start = time.time()
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            async with trio.open_nursery() as nurse:
 | 
				
			||||||
 | 
					                async with tractor.open_nursery() as tn:
 | 
				
			||||||
 | 
					                    p = await tn.start_actor(
 | 
				
			||||||
 | 
					                        'fast_boi',
 | 
				
			||||||
 | 
					                        enable_modules=[__name__],
 | 
				
			||||||
 | 
					                    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    async def delayed_kbi():
 | 
				
			||||||
 | 
					                        await trio.sleep(kbi_delay)
 | 
				
			||||||
 | 
					                        print(f'RAISING KBI after {kbi_delay} s')
 | 
				
			||||||
 | 
					                        raise KeyboardInterrupt
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    # start task which raises a kbi **after**
 | 
				
			||||||
 | 
					                    # the actor nursery ``__aexit__()`` has
 | 
				
			||||||
 | 
					                    # been run.
 | 
				
			||||||
 | 
					                    nurse.start_soon(delayed_kbi)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    await p.run(do_nuthin)
 | 
				
			||||||
 | 
					        finally:
 | 
				
			||||||
 | 
					            duration = time.time() - start
 | 
				
			||||||
 | 
					            if duration > 2.9:
 | 
				
			||||||
 | 
					                raise trio.TooSlowError(
 | 
				
			||||||
 | 
					                    'daemon cancel was slower then necessary..'
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    with pytest.raises(KeyboardInterrupt):
 | 
				
			||||||
 | 
					        trio.run(main)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -295,7 +295,17 @@ async def new_proc(
 | 
				
			||||||
                # ``trio.Process.__aexit__()`` (it tears down stdio
 | 
					                # ``trio.Process.__aexit__()`` (it tears down stdio
 | 
				
			||||||
                # which will kill any waiting remote pdb trace).
 | 
					                # which will kill any waiting remote pdb trace).
 | 
				
			||||||
                # This is a "soft" (cancellable) join/reap.
 | 
					                # This is a "soft" (cancellable) join/reap.
 | 
				
			||||||
                await proc.wait()
 | 
					                try:
 | 
				
			||||||
 | 
					                    await proc.wait()
 | 
				
			||||||
 | 
					                except trio.Cancelled:
 | 
				
			||||||
 | 
					                    # if cancelled during a soft wait, cancel the child
 | 
				
			||||||
 | 
					                    # actor before entering the hard reap sequence
 | 
				
			||||||
 | 
					                    # below. This means we try to do a graceful teardown
 | 
				
			||||||
 | 
					                    # via sending a cancel message before getting out
 | 
				
			||||||
 | 
					                    # zombie killing tools.
 | 
				
			||||||
 | 
					                    with trio.CancelScope(shield=True):
 | 
				
			||||||
 | 
					                        await portal.cancel_actor()
 | 
				
			||||||
 | 
					                    raise
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                # cancel result waiter that may have been spawned in
 | 
					                # cancel result waiter that may have been spawned in
 | 
				
			||||||
                # tandem if not done already
 | 
					                # tandem if not done already
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue