From f84ef44992d92e7dbd3f59051ed707f9f7810a67 Mon Sep 17 00:00:00 2001 From: goodboy Date: Sun, 8 Mar 2026 15:13:09 -0400 Subject: [PATCH] Repair lifetime-stack suite's flakiness Event on linux i was noticing lotsa false negatives based on sub teardown race conditions, so this tries to both make way for (eventually?) expanding the set of suite cases and ensure the current ones are more reliable on every run. The main change is to hange the `error_in_child=False` case to use parent-side-cancellation via a new `trio.move_on_after(timeout)` instead of `actor.cancel_soon()` (which is now toggled by a new `self_cancel: bool` but unused rn), and add better teardown assertions. Low level deats, - add `rent_cancel`/`self_cancel` params to `crash_and_clean_tmpdir()` for different cancel paths; default to `rent_cancel=True` which just sleeps forever letting the parent's timeout do the work. - use `trio.move_on_after()` with longer timeouts per case: 1.6s for error, 1s for cancel. - use the `.move_on_after()` cancel-scope to assert `.cancel_called` pnly when `error_in_child=False`, indicating we parent-graceful-cancelled the sub. - add `loglevel` fixture, pass to `open_nursery()`. - log caught `RemoteActorError` via console logger. - add `ids=` to parametrize for readable test names. (this commit msg was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- tests/test_runtime.py | 64 +++++++++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 14 deletions(-) diff --git a/tests/test_runtime.py b/tests/test_runtime.py index 55553dd9..782af81e 100644 --- a/tests/test_runtime.py +++ b/tests/test_runtime.py @@ -22,6 +22,10 @@ def unlink_file(): async def crash_and_clean_tmpdir( tmp_file_path: str, error: bool = True, + rent_cancel: bool = True, + + # XXX unused, but do we really need to test these cases? + self_cancel: bool = False, ): global _file_path _file_path = tmp_file_path @@ -32,43 +36,75 @@ async def crash_and_clean_tmpdir( assert os.path.isfile(tmp_file_path) await trio.sleep(0.1) if error: + print('erroring in subactor!') assert 0 - else: + + elif self_cancel: + print('SELF-cancelling subactor!') actor.cancel_soon() + elif rent_cancel: + await trio.sleep_forever() + + print('subactor exiting task!') + @pytest.mark.parametrize( 'error_in_child', [True, False], + ids='error_in_child={}'.format, ) @tractor_test async def test_lifetime_stack_wipes_tmpfile( tmp_path, error_in_child: bool, + loglevel: str, + # log: tractor.log.StackLevelAdapter, + # ^TODO, once landed via macos support! ): child_tmp_file = tmp_path / "child.txt" child_tmp_file.touch() assert child_tmp_file.exists() path = str(child_tmp_file) + # NOTE, this is expected to cancel the sub + # in the `error_in_child=False` case! + timeout: float = ( + 1.6 if error_in_child + else 1 + ) try: - with trio.move_on_after(0.5): - async with tractor.open_nursery() as n: - await ( # inlined portal - await n.run_in_actor( - crash_and_clean_tmpdir, - tmp_file_path=path, - error=error_in_child, - ) - ).result() - + with trio.move_on_after(timeout) as cs: + async with tractor.open_nursery( + loglevel=loglevel, + ) as an: + await ( # inlined `tractor.Portal` + await an.run_in_actor( + crash_and_clean_tmpdir, + tmp_file_path=path, + error=error_in_child, + ) + ).result() except ( tractor.RemoteActorError, - # tractor.BaseExceptionGroup, BaseExceptionGroup, - ): - pass + ) as _exc: + exc = _exc + from tractor.log import get_console_log + log = get_console_log( + level=loglevel, + name=__name__, + ) + log.exception( + f'Subactor failed as expected with {type(exc)!r}\n' + ) # tmp file should have been wiped by # teardown stack. assert not child_tmp_file.exists() + + if error_in_child: + assert not cs.cancel_called + else: + # expect timeout in some cases? + assert cs.cancel_called