tractor/tests/test_legacy_one_way_streami...

"""
Streaming via the, now legacy, "async-gen API".

"""
import time
from functools import partial
from pathlib import Path
import platform
from typing import (
    Callable,
    Literal,
)

import trio
import tractor
import pytest

from tractor._testing import tractor_test


def test_must_define_ctx():

    with pytest.raises(TypeError) as err:
        @tractor.stream
        async def no_ctx():
            pass

    assert (
        "no_ctx must be `ctx: tractor.Context"
        in
        str(err.value)
    )

    @tractor.stream
    async def has_ctx(ctx):
        pass


async def async_gen_stream(sequence):
    for i in sequence:
        yield i
        await trio.sleep(0.1)

    # block indefinitely waiting to be cancelled by ``aclose()`` call
    with trio.CancelScope() as cs:
        await trio.sleep_forever()
        assert 0
    assert cs.cancelled_caught


# TODO: deprecated either remove entirely
# or re-impl in terms of `MsgStream` one-sides
# wrapper, but at least remove `Portal.open_stream_from()`
@tractor.stream
async def context_stream(
    ctx: tractor.Context,
    sequence: list[int],
):
    for i in sequence:
        await ctx.send_yield(i)
        await trio.sleep(0.1)

    # block indefinitely waiting to be cancelled by ``aclose()`` call
    with trio.CancelScope() as cs:
        await trio.sleep(float('inf'))
        assert 0
    assert cs.cancelled_caught


async def stream_from_single_subactor(
    reg_addr,
    start_method,
    stream_func,
):
    """Verify we can spawn a daemon actor and retrieve streamed data.
    """
    # only one per host address, spawns an actor if None

    async with tractor.open_nursery(
        registry_addrs=[reg_addr],
        start_method=start_method,
    ) as an:

        async with tractor.find_actor('streamerd') as portals:

            if not portals:

                # no brokerd actor found
                portal = await an.start_actor(
                    'streamerd',
                    enable_modules=[__name__],
                )

                seq = range(10)

                with trio.fail_after(5):
                    async with portal.open_stream_from(
                        stream_func,
                        sequence=list(seq),  # has to be msgpack serializable
                    ) as stream:

                        # it'd sure be nice to have an asyncitertools here...
                        iseq = iter(seq)
                        ival = next(iseq)

                        async for val in stream:
                            assert val == ival

                            try:
                                ival = next(iseq)
                            except StopIteration:
                                # should cancel far end task which will be
                                # caught and no error is raised
                                await stream.aclose()

                        await trio.sleep(0.3)

                        # ensure EOC signalled-state translates
                        # XXX: not really sure this is correct,
                        # shouldn't it be a `ClosedResourceError`?
                        try:
                            await stream.__anext__()
                        except StopAsyncIteration:
                            # stop all spawned subactors
                            await portal.cancel_actor()


@pytest.mark.parametrize(
    'stream_func',
    [
        async_gen_stream,
        context_stream,
    ],
    ids='stream_func={}'.format
)
def test_stream_from_single_subactor(
    reg_addr: tuple,
    start_method: str,
    stream_func: Callable,
):
    '''
    Verify streaming from a spawned async generator.

    '''
    trio.run(
        partial(
            stream_from_single_subactor,
            reg_addr,
            start_method,
            stream_func=stream_func,
        ),
    )


# this is the first 2 actors, streamer_1 and streamer_2
async def stream_data(seed: int):

    for i in range(seed):
        yield i

        # trigger scheduler to simulate practical usage
        await trio.sleep(0.0001)


# this is the third actor; the aggregator
async def aggregate(seed: int):
    '''
    Ensure that the two streams we receive match but only stream
    a single set of values to the parent.

    '''
    async with tractor.open_nursery() as an:
        portals = []
        for i in range(1, 3):
            # fork point
            portal = await an.start_actor(
                name=f'streamer_{i}',
                enable_modules=[__name__],
            )

            portals.append(portal)

        send_chan, recv_chan = trio.open_memory_channel(500)

        async def push_to_chan(portal, send_chan):
            async with send_chan:

                async with portal.open_stream_from(
                    stream_data,
                    seed=seed,
                ) as stream:

                    async for value in stream:
                        # leverage trio's built-in backpressure
                        await send_chan.send(value)

            print(
                f'FINISHED ITERATING!\n'
                f'peer: {portal.channel.aid.uid}'
            )

        # spawn 2 trio tasks to collect streams and push to a local queue
        async with trio.open_nursery() as tn:

            for portal in portals:
                tn.start_soon(
                    push_to_chan,
                    portal,
                    send_chan.clone(),
                )

            # close this local task's reference to send side
            await send_chan.aclose()

            unique_vals = set()
            async with recv_chan:
                async for value in recv_chan:
                    if value not in unique_vals:
                        unique_vals.add(value)
                        # yield upwards to the spawning parent actor
                        yield value

                assert value in unique_vals

            print("FINISHED ITERATING in aggregator")

        await an.cancel()
        print("WAITING on `ActorNursery` to finish")
    print("AGGREGATOR COMPLETE!")


async def a_quadruple_example() -> list[int]:
    '''
    Open the root-actor which is also a "registrar".

    '''
    async with tractor.open_nursery() as an:
        seed = int(1e3)
        pre_start = time.time()

        portal = await an.start_actor(
            name='aggregator',
            enable_modules=[__name__],
        )

        start = time.time()
        # the portal call returns exactly what you'd expect
        # as if the remote "aggregate" function was called locally
        result_stream = []

        async with portal.open_stream_from(aggregate, seed=seed) as stream:
            async for value in stream:
                result_stream.append(value)

        print(f"STREAM TIME = {time.time() - start}")
        print(f"STREAM + SPAWN TIME = {time.time() - pre_start}")
        assert result_stream == list(range(seed))
        await portal.cancel_actor()
        return result_stream


async def cancel_after(
    wait: float,
    reg_addr: tuple,
) -> list[int]:

    async with tractor.open_root_actor(
        registry_addrs=[reg_addr],
    ):
        with trio.move_on_after(wait):
            return await a_quadruple_example()


@pytest.fixture(scope='module')
def time_quad_ex(
    reg_addr: tuple,
    ci_env: bool,
    spawn_backend: str,
):
    non_linux: bool = (_sys := platform.system()) != 'Linux'
    if ci_env and non_linux:
        pytest.skip(f'Test is too flaky on {_sys!r} in CI')

    if spawn_backend == 'mp':
        '''
        no idea but the  mp *nix runs are flaking out here often...

        '''
        pytest.skip("Test is too flaky on mp in CI")

    timeout = 7 if non_linux else 4
    start = time.time()
    results: list[int] = trio.run(
        cancel_after,
        timeout,
        reg_addr,
    )
    diff: float = time.time() - start
    assert results
    return results, diff


def get_cpu_state(
    icpu: int = 0,
    setting: Literal[
        'scaling_governor',
        '*_pstate_max_freq',
        'scaling_max_freq',
        # 'scaling_cur_freq',
    ] = '*_pstate_max_freq',
) -> tuple[
    Path,
    str|int,
]|None:
    '''
    Attempt to read the (first) CPU's setting according
    to the set `setting` from under the file-sys,

    /sys/devices/system/cpu/cpu0/cpufreq/{setting}

    Useful to determine latency limits for various perf affected test
    suites.

    '''
    try:
        # Read governor for core 0 (usually same for all)
        setting_path: Path = list(
            Path(f'/sys/devices/system/cpu/cpu{icpu}/cpufreq/')
            .glob(f'{setting}')
        )[0]  # <- XXX must be single match!
        with open(
            setting_path,
            'r',
        ) as f:
            return (
                setting_path,
                f.read().strip(),
            )
    except FileNotFoundError:
        return None


def test_a_quadruple_example(
    time_quad_ex: tuple[list[int], float],
    ci_env: bool,
    spawn_backend: str,
    test_log: tractor.log.StackLevelAdapter,
):
    '''
    This also serves as a "we'd like to be this fast" smoke test
    given past empirical eval of this suite.

    '''
    non_linux: bool = (_sys := platform.system()) != 'Linux'

    this_fast_on_linux: float = 3
    this_fast = (
        6 if non_linux
        else this_fast_on_linux
    )
    # ^ XXX NOTE,
    # i've noticed that tweaking the CPU governor setting
    # to not "always" enable "turbo" mode can result in latency
    # which causes this limit to be too little. Not sure if it'd
    # be worth it to adjust the linux value based on reading the
    # CPU conf from the sys?
    #
    # For ex, see the `auto-cpufreq` docs on such settings,
    # https://github.com/AdnanHodzic/auto-cpufreq?tab=readme-ov-file#example-config-file-contents
    #
    # HENCE this below auxiliary compensation logic..
    if not non_linux:
        mx_pth, max_freq = get_cpu_state()
        cur_pth, cur_freq = get_cpu_state(
            setting='scaling_max_freq',
        )
        cpu_scaled: float = (
            int(cur_freq) / int(max_freq)
        )

        if cpu_scaled != 1.:
            this_fast = (
                this_fast_on_linux / (
                    cpu_scaled * 2  # <- bc likely "dual threaded"
                    # ^TODO, calc the thr-per-core val?
                )
            )
            test_log.warning(
                f'Increasing time-limit on linux bc CPU scaling,\n'
                f'\n'
                f'{mx_pth} = {max_freq}\n'
                f'{cur_pth} = {cur_freq}\n'
                f'\n'
                f'cpu_scaled = {cpu_scaled}\n'
                f'this_fast_on_linux: {this_fast_on_linux} -> {this_fast}\n'
            )

    results, diff = time_quad_ex
    assert results
    assert diff < this_fast


@pytest.mark.parametrize(
    'cancel_delay',
    list(map(lambda i: i/10, range(3, 9)))
)
def test_not_fast_enough_quad(
    reg_addr: tuple,
    time_quad_ex: tuple[list[int], float],
    cancel_delay: float,
    ci_env: bool,
    spawn_backend: str,
):
    '''
    Verify we can cancel midway through the quad example and all
    actors cancel gracefully.

    '''
    results, diff = time_quad_ex
    delay = max(diff - cancel_delay, 0)
    results = trio.run(
        cancel_after,
        delay,
        reg_addr,
    )
    system: str = platform.system()
    if (
        system in ('Windows', 'Darwin')
        and
        results is not None
    ):
        # In CI envoirments it seems later runs are quicker then the first
        # so just ignore these
        print(f'Woa there {system} caught your breath eh?')
    else:
        # should be cancelled mid-streaming
        assert results is None


@tractor_test(timeout=20)
async def test_respawn_consumer_task(
    reg_addr: tuple,
    spawn_backend: str,
    loglevel: str,
):
    '''
    Verify that ``._portal.ReceiveStream.shield()``
    sucessfully protects the underlying IPC channel from being closed
    when cancelling and respawning a consumer task.

    This also serves to verify that all values from the stream can be
    received despite the respawns.

    '''
    stream = None

    async with tractor.open_nursery() as an:

        portal = await an.start_actor(
            name='streamer',
            enable_modules=[__name__]
        )
        async with portal.open_stream_from(
            stream_data,
            seed=11,
        ) as stream:

            expect = set(range(11))
            received = []

            # this is the re-spawn task routine
            async def consume(task_status=trio.TASK_STATUS_IGNORED):
                print('starting consume task..')
                nonlocal stream

                with trio.CancelScope() as cs:
                    task_status.started(cs)

                    # shield stream's underlying channel from cancellation
                    # with stream.shield():

                    async for v in stream:
                        print(f'from stream: {v}')
                        expect.remove(v)
                        received.append(v)

                    print('exited consume')

            async with trio.open_nursery() as ln:
                cs = await ln.start(consume)

                while True:

                    await trio.sleep(0.1)

                    if received[-1] % 2 == 0:

                        print('cancelling consume task..')
                        cs.cancel()

                        # respawn
                        cs = await ln.start(consume)

                    if not expect:
                        print("all values streamed, BREAKING")
                        break

                cs.cancel()

        # TODO: this is justification for a
        # ``ActorNursery.stream_from_actor()`` helper?
        await portal.cancel_actor()