Repair lifetime-stack suite's flakiness
Event on linux i was noticing lotsa false negatives based on sub teardown race conditions, so this tries to both make way for (eventually?) expanding the set of suite cases and ensure the current ones are more reliable on every run. The main change is to hange the `error_in_child=False` case to use parent-side-cancellation via a new `trio.move_on_after(timeout)` instead of `actor.cancel_soon()` (which is now toggled by a new `self_cancel: bool` but unused rn), and add better teardown assertions. Low level deats, - add `rent_cancel`/`self_cancel` params to `crash_and_clean_tmpdir()` for different cancel paths; default to `rent_cancel=True` which just sleeps forever letting the parent's timeout do the work. - use `trio.move_on_after()` with longer timeouts per case: 1.6s for error, 1s for cancel. - use the `.move_on_after()` cancel-scope to assert `.cancel_called` pnly when `error_in_child=False`, indicating we parent-graceful-cancelled the sub. - add `loglevel` fixture, pass to `open_nursery()`. - log caught `RemoteActorError` via console logger. - add `ids=` to parametrize for readable test names. (this commit msg was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-codens_aware
parent
1e0c57c6c5
commit
f84ef44992
|
|
@ -22,6 +22,10 @@ def unlink_file():
|
|||
async def crash_and_clean_tmpdir(
|
||||
tmp_file_path: str,
|
||||
error: bool = True,
|
||||
rent_cancel: bool = True,
|
||||
|
||||
# XXX unused, but do we really need to test these cases?
|
||||
self_cancel: bool = False,
|
||||
):
|
||||
global _file_path
|
||||
_file_path = tmp_file_path
|
||||
|
|
@ -32,43 +36,75 @@ async def crash_and_clean_tmpdir(
|
|||
assert os.path.isfile(tmp_file_path)
|
||||
await trio.sleep(0.1)
|
||||
if error:
|
||||
print('erroring in subactor!')
|
||||
assert 0
|
||||
else:
|
||||
|
||||
elif self_cancel:
|
||||
print('SELF-cancelling subactor!')
|
||||
actor.cancel_soon()
|
||||
|
||||
elif rent_cancel:
|
||||
await trio.sleep_forever()
|
||||
|
||||
print('subactor exiting task!')
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'error_in_child',
|
||||
[True, False],
|
||||
ids='error_in_child={}'.format,
|
||||
)
|
||||
@tractor_test
|
||||
async def test_lifetime_stack_wipes_tmpfile(
|
||||
tmp_path,
|
||||
error_in_child: bool,
|
||||
loglevel: str,
|
||||
# log: tractor.log.StackLevelAdapter,
|
||||
# ^TODO, once landed via macos support!
|
||||
):
|
||||
child_tmp_file = tmp_path / "child.txt"
|
||||
child_tmp_file.touch()
|
||||
assert child_tmp_file.exists()
|
||||
path = str(child_tmp_file)
|
||||
|
||||
# NOTE, this is expected to cancel the sub
|
||||
# in the `error_in_child=False` case!
|
||||
timeout: float = (
|
||||
1.6 if error_in_child
|
||||
else 1
|
||||
)
|
||||
try:
|
||||
with trio.move_on_after(0.5):
|
||||
async with tractor.open_nursery() as n:
|
||||
await ( # inlined portal
|
||||
await n.run_in_actor(
|
||||
with trio.move_on_after(timeout) as cs:
|
||||
async with tractor.open_nursery(
|
||||
loglevel=loglevel,
|
||||
) as an:
|
||||
await ( # inlined `tractor.Portal`
|
||||
await an.run_in_actor(
|
||||
crash_and_clean_tmpdir,
|
||||
tmp_file_path=path,
|
||||
error=error_in_child,
|
||||
)
|
||||
).result()
|
||||
|
||||
except (
|
||||
tractor.RemoteActorError,
|
||||
# tractor.BaseExceptionGroup,
|
||||
BaseExceptionGroup,
|
||||
):
|
||||
pass
|
||||
) as _exc:
|
||||
exc = _exc
|
||||
from tractor.log import get_console_log
|
||||
log = get_console_log(
|
||||
level=loglevel,
|
||||
name=__name__,
|
||||
)
|
||||
log.exception(
|
||||
f'Subactor failed as expected with {type(exc)!r}\n'
|
||||
)
|
||||
|
||||
# tmp file should have been wiped by
|
||||
# teardown stack.
|
||||
assert not child_tmp_file.exists()
|
||||
|
||||
if error_in_child:
|
||||
assert not cs.cancel_called
|
||||
else:
|
||||
# expect timeout in some cases?
|
||||
assert cs.cancel_called
|
||||
|
|
|
|||
Loading…
Reference in New Issue