tractor/tests/test_legacy_one_way_streami...

513 lines
14 KiB
Python
Raw Normal View History

"""
Streaming via the, now legacy, "async-gen API".
"""
import time
2019-03-29 23:10:56 +00:00
from functools import partial
from pathlib import Path
import platform
from typing import (
Callable,
Literal,
)
import trio
import tractor
import pytest
Add (back) a `tractor._testing` sub-pkg Since importing from our top level `conftest.py` is not scaleable or as "future forward thinking" in terms of: - LoC-wise (it's only one file), - prevents "external" (aka non-test) example scripts from importing content easily, - seemingly(?) can't be used via abs-import if using a `[tool.pytest.ini_options]` in a `pyproject.toml` vs. a `pytest.ini`, see: https://docs.pytest.org/en/8.0.x/reference/customize.html#pyproject-toml) => Go back to having an internal "testing" pkg like `trio` (kinda) does. Deats: - move generic top level helpers into pkg-mod including the new `expect_ctxc()` (which i needed in the advanced faults testing script. - move `@tractor_test` into `._testing.pytest` sub-mod. - adjust all the helper imports to be a `from tractor._testing import <..>` Rework `test_ipc_channel_break_during_stream()` and backing script: - make test(s) pull `debug_mode` from new fixture (which is now controlled manually from `--tpdb` flag) and drop the previous parametrized input. - update logic in ^ test for "which-side-fails" cases to better match recently updated/stricter cancel/failure semantics in terms of `ClosedResouruceError` vs. `EndOfChannel` expectations. - handle `ExceptionGroup`s with expected embedded errors in test. - better pendantics around whether to expect a user simulated KBI. - for `examples/advanced_faults/ipc_failure_during_stream.py` script: - generalize ipc breakage in new `break_ipc()` with support for diff internal `trio` methods and a #TODO for future disti frameworks - only make one sub-actor task break and the other just stream. - use new `._testing.expect_ctxc()` around ctx block. - add a bit of exception handling with `print()`s around ctxc (unused except if 'msg' break method is set) and eoc cases. - don't break parent side ipc in loop any more then once after first break, checked via flag var. - add a `pre_close: bool` flag to control whether `MsgStreama.aclose()` is called *before* any ipc breakage method. Still TODO: - drop `pytest.ini` and add the alt section to `pyproject.py`. -> currently can't get `--rootdir=` opt to work.. not showing in console header. -> ^ also breaks on 'tests' `enable_modules` imports in subactors during discovery tests?
2024-03-12 19:48:20 +00:00
from tractor._testing import tractor_test
2022-10-09 22:03:17 +00:00
2019-03-29 23:10:56 +00:00
def test_must_define_ctx():
with pytest.raises(TypeError) as err:
@tractor.stream
async def no_ctx():
pass
assert (
"no_ctx must be `ctx: tractor.Context"
in
str(err.value)
)
2019-03-29 23:10:56 +00:00
@tractor.stream
async def has_ctx(ctx):
2019-03-29 23:10:56 +00:00
pass
async def async_gen_stream(sequence):
for i in sequence:
yield i
await trio.sleep(0.1)
# block indefinitely waiting to be cancelled by ``aclose()`` call
2019-02-16 19:25:06 +00:00
with trio.CancelScope() as cs:
2021-06-14 00:26:41 +00:00
await trio.sleep_forever()
assert 0
assert cs.cancelled_caught
# TODO: deprecated either remove entirely
# or re-impl in terms of `MsgStream` one-sides
# wrapper, but at least remove `Portal.open_stream_from()`
2019-03-29 23:10:56 +00:00
@tractor.stream
2021-06-14 00:26:41 +00:00
async def context_stream(
ctx: tractor.Context,
sequence: list[int],
2021-06-14 00:26:41 +00:00
):
2019-03-29 23:10:56 +00:00
for i in sequence:
await ctx.send_yield(i)
await trio.sleep(0.1)
# block indefinitely waiting to be cancelled by ``aclose()`` call
with trio.CancelScope() as cs:
await trio.sleep(float('inf'))
assert 0
assert cs.cancelled_caught
async def stream_from_single_subactor(
reg_addr,
start_method,
stream_func,
):
"""Verify we can spawn a daemon actor and retrieve streamed data.
"""
# only one per host address, spawns an actor if None
async with tractor.open_nursery(
registry_addrs=[reg_addr],
start_method=start_method,
) as an:
async with tractor.find_actor('streamerd') as portals:
if not portals:
# no brokerd actor found
portal = await an.start_actor(
'streamerd',
2021-04-28 15:55:37 +00:00
enable_modules=[__name__],
)
seq = range(10)
2021-12-15 22:18:30 +00:00
with trio.fail_after(5):
async with portal.open_stream_from(
stream_func,
sequence=list(seq), # has to be msgpack serializable
) as stream:
# it'd sure be nice to have an asyncitertools here...
iseq = iter(seq)
ival = next(iseq)
async for val in stream:
assert val == ival
try:
ival = next(iseq)
except StopIteration:
# should cancel far end task which will be
# caught and no error is raised
await stream.aclose()
await trio.sleep(0.3)
# ensure EOC signalled-state translates
# XXX: not really sure this is correct,
# shouldn't it be a `ClosedResourceError`?
2021-04-28 15:55:37 +00:00
try:
2021-12-15 22:18:30 +00:00
await stream.__anext__()
except StopAsyncIteration:
# stop all spawned subactors
await portal.cancel_actor()
2019-03-29 23:10:56 +00:00
@pytest.mark.parametrize(
'stream_func',
[
async_gen_stream,
context_stream,
],
ids='stream_func={}'.format
2019-03-29 23:10:56 +00:00
)
def test_stream_from_single_subactor(
reg_addr: tuple,
start_method: str,
stream_func: Callable,
):
'''
Verify streaming from a spawned async generator.
'''
trio.run(
2019-03-29 23:10:56 +00:00
partial(
stream_from_single_subactor,
reg_addr,
start_method,
2020-12-22 15:35:05 +00:00
stream_func=stream_func,
2019-03-29 23:10:56 +00:00
),
)
# this is the first 2 actors, streamer_1 and streamer_2
async def stream_data(seed: int):
for i in range(seed):
yield i
# trigger scheduler to simulate practical usage
await trio.sleep(0.0001)
# this is the third actor; the aggregator
async def aggregate(seed: int):
'''
Ensure that the two streams we receive match but only stream
a single set of values to the parent.
'''
async with tractor.open_nursery() as an:
portals = []
for i in range(1, 3):
# fork point
portal = await an.start_actor(
name=f'streamer_{i}',
2021-04-28 15:55:37 +00:00
enable_modules=[__name__],
)
portals.append(portal)
2019-02-15 22:10:57 +00:00
send_chan, recv_chan = trio.open_memory_channel(500)
async def push_to_chan(portal, send_chan):
async with send_chan:
2021-04-28 15:55:37 +00:00
async with portal.open_stream_from(
stream_data,
seed=seed,
2021-04-28 15:55:37 +00:00
) as stream:
async for value in stream:
# leverage trio's built-in backpressure
await send_chan.send(value)
print(
f'FINISHED ITERATING!\n'
f'peer: {portal.channel.aid.uid}'
)
# spawn 2 trio tasks to collect streams and push to a local queue
async with trio.open_nursery() as tn:
for portal in portals:
tn.start_soon(
push_to_chan,
portal,
send_chan.clone(),
)
# close this local task's reference to send side
await send_chan.aclose()
unique_vals = set()
async with recv_chan:
async for value in recv_chan:
if value not in unique_vals:
unique_vals.add(value)
# yield upwards to the spawning parent actor
yield value
assert value in unique_vals
print("FINISHED ITERATING in aggregator")
await an.cancel()
print("WAITING on `ActorNursery` to finish")
print("AGGREGATOR COMPLETE!")
async def a_quadruple_example() -> list[int]:
'''
Open the root-actor which is also a "registrar".
'''
async with tractor.open_nursery() as an:
seed = int(1e3)
pre_start = time.time()
portal = await an.start_actor(
name='aggregator',
2021-04-28 15:55:37 +00:00
enable_modules=[__name__],
)
start = time.time()
# the portal call returns exactly what you'd expect
# as if the remote "aggregate" function was called locally
result_stream = []
2021-04-28 15:55:37 +00:00
async with portal.open_stream_from(aggregate, seed=seed) as stream:
async for value in stream:
result_stream.append(value)
print(f"STREAM TIME = {time.time() - start}")
print(f"STREAM + SPAWN TIME = {time.time() - pre_start}")
assert result_stream == list(range(seed))
2021-04-28 15:55:37 +00:00
await portal.cancel_actor()
return result_stream
async def cancel_after(
wait: float,
reg_addr: tuple,
) -> list[int]:
async with tractor.open_root_actor(
registry_addrs=[reg_addr],
):
with trio.move_on_after(wait):
return await a_quadruple_example()
@pytest.fixture(scope='module')
def time_quad_ex(
reg_addr: tuple,
ci_env: bool,
spawn_backend: str,
):
non_linux: bool = (_sys := platform.system()) != 'Linux'
if ci_env and non_linux:
pytest.skip(f'Test is too flaky on {_sys!r} in CI')
2020-10-13 19:13:24 +00:00
if spawn_backend == 'mp':
'''
no idea but the mp *nix runs are flaking out here often...
'''
2020-09-03 12:44:24 +00:00
pytest.skip("Test is too flaky on mp in CI")
timeout = 7 if non_linux else 4
start = time.time()
results: list[int] = trio.run(
cancel_after,
timeout,
reg_addr,
)
diff: float = time.time() - start
assert results
return results, diff
def get_cpu_state(
icpu: int = 0,
setting: Literal[
'scaling_governor',
'*_pstate_max_freq',
'scaling_max_freq',
# 'scaling_cur_freq',
] = '*_pstate_max_freq',
) -> tuple[
Path,
str|int,
]|None:
'''
Attempt to read the (first) CPU's setting according
to the set `setting` from under the file-sys,
/sys/devices/system/cpu/cpu0/cpufreq/{setting}
Useful to determine latency limits for various perf affected test
suites.
'''
try:
# Read governor for core 0 (usually same for all)
setting_path: Path = list(
Path(f'/sys/devices/system/cpu/cpu{icpu}/cpufreq/')
.glob(f'{setting}')
)[0] # <- XXX must be single match!
with open(
setting_path,
'r',
) as f:
return (
setting_path,
f.read().strip(),
)
except FileNotFoundError:
return None
def test_a_quadruple_example(
time_quad_ex: tuple[list[int], float],
ci_env: bool,
spawn_backend: str,
test_log: tractor.log.StackLevelAdapter,
):
'''
This also serves as a "we'd like to be this fast" smoke test
given past empirical eval of this suite.
'''
non_linux: bool = (_sys := platform.system()) != 'Linux'
this_fast_on_linux: float = 3
this_fast = (
6 if non_linux
else this_fast_on_linux
)
# ^ XXX NOTE,
# i've noticed that tweaking the CPU governor setting
# to not "always" enable "turbo" mode can result in latency
# which causes this limit to be too little. Not sure if it'd
# be worth it to adjust the linux value based on reading the
# CPU conf from the sys?
#
# For ex, see the `auto-cpufreq` docs on such settings,
# https://github.com/AdnanHodzic/auto-cpufreq?tab=readme-ov-file#example-config-file-contents
#
# HENCE this below auxiliary compensation logic..
if not non_linux:
mx_pth, max_freq = get_cpu_state()
cur_pth, cur_freq = get_cpu_state(
setting='scaling_max_freq',
)
cpu_scaled: float = (
int(cur_freq) / int(max_freq)
)
if cpu_scaled != 1.:
this_fast = (
this_fast_on_linux / (
cpu_scaled * 2 # <- bc likely "dual threaded"
# ^TODO, calc the thr-per-core val?
)
)
test_log.warning(
f'Increasing time-limit on linux bc CPU scaling,\n'
f'\n'
f'{mx_pth} = {max_freq}\n'
f'{cur_pth} = {cur_freq}\n'
f'\n'
f'cpu_scaled = {cpu_scaled}\n'
f'this_fast_on_linux: {this_fast_on_linux} -> {this_fast}\n'
)
results, diff = time_quad_ex
assert results
assert diff < this_fast
@pytest.mark.parametrize(
'cancel_delay',
2018-11-22 19:25:31 +00:00
list(map(lambda i: i/10, range(3, 9)))
)
def test_not_fast_enough_quad(
reg_addr: tuple,
time_quad_ex: tuple[list[int], float],
cancel_delay: float,
ci_env: bool,
spawn_backend: str,
):
'''
Verify we can cancel midway through the quad example and all
actors cancel gracefully.
'''
results, diff = time_quad_ex
delay = max(diff - cancel_delay, 0)
results = trio.run(
cancel_after,
delay,
reg_addr,
)
system: str = platform.system()
if (
system in ('Windows', 'Darwin')
and
results is not None
):
# In CI envoirments it seems later runs are quicker then the first
# so just ignore these
print(f'Woa there {system} caught your breath eh?')
else:
# should be cancelled mid-streaming
assert results is None
@tractor_test(timeout=20)
async def test_respawn_consumer_task(
reg_addr: tuple,
spawn_backend: str,
loglevel: str,
):
'''
Verify that ``._portal.ReceiveStream.shield()``
sucessfully protects the underlying IPC channel from being closed
when cancelling and respawning a consumer task.
This also serves to verify that all values from the stream can be
received despite the respawns.
'''
stream = None
async with tractor.open_nursery() as an:
portal = await an.start_actor(
2021-04-28 15:55:37 +00:00
name='streamer',
enable_modules=[__name__]
)
async with portal.open_stream_from(
stream_data,
seed=11,
2021-04-28 15:55:37 +00:00
) as stream:
expect = set(range(11))
received = []
2021-04-28 15:55:37 +00:00
# this is the re-spawn task routine
async def consume(task_status=trio.TASK_STATUS_IGNORED):
print('starting consume task..')
nonlocal stream
2021-04-28 15:55:37 +00:00
with trio.CancelScope() as cs:
task_status.started(cs)
2021-04-28 15:55:37 +00:00
# shield stream's underlying channel from cancellation
# with stream.shield():
async for v in stream:
print(f'from stream: {v}')
expect.remove(v)
received.append(v)
2021-04-28 15:55:37 +00:00
print('exited consume')
2021-04-28 15:55:37 +00:00
async with trio.open_nursery() as ln:
cs = await ln.start(consume)
2021-04-28 15:55:37 +00:00
while True:
2021-04-28 15:55:37 +00:00
await trio.sleep(0.1)
2021-04-28 15:55:37 +00:00
if received[-1] % 2 == 0:
2021-04-28 15:55:37 +00:00
print('cancelling consume task..')
cs.cancel()
2021-04-28 15:55:37 +00:00
# respawn
cs = await ln.start(consume)
2021-04-28 15:55:37 +00:00
if not expect:
print("all values streamed, BREAKING")
break
cs.cancel()
2021-04-28 15:55:37 +00:00
# TODO: this is justification for a
# ``ActorNursery.stream_from_actor()`` helper?
await portal.cancel_actor()