Add common state delegate type for all consumers

For every set of broadcast receivers which pull from the same producer, we need a singleton state for all of, - subscriptions - the sender ready event - the queue Add a `BroadcastState` dataclass for this and pass it to all subscriptions. This makes the design much more like the built-in memory channels which do something very similar with `MemoryChannelState`. Use a `filter()` on the subs list in the sequence update step, plus some other commented approaches we can try for speed.
2021-08-10 15:32:53 -04:00 · 2021-08-10 15:32:53 -04:00 · ceed96aa3f
parent 6e78bcf898
commit ceed96aa3f
1 changed files with 71 additions and 47 deletions
--- a/tractor/_broadcast.py
+++ b/tractor/_broadcast.py
@ -4,23 +4,42 @@ https://tokio-rs.github.io/tokio/doc/tokio/sync/broadcast/index.html
 '''
 from __future__ import annotations
 from itertools import cycle
 from collections import deque
 from contextlib import asynccontextmanager
 from dataclasses import dataclass
 from functools import partial
 from itertools import cycle
 from operator import ne
 from typing import Optional
 import trio
 import tractor
 from trio.lowlevel import current_task
 from trio.abc import ReceiveChannel
 from trio._core._run import Task
 from trio.abc import ReceiveChannel
 from trio.lowlevel import current_task
 import tractor
 class Lagged(trio.TooSlowError):
    '''Subscribed consumer task was too slow'''
@dataclass
 class BroadcastState:
    '''Common state to all receivers of a broadcast.
    '''
    queue: deque
    # map of underlying clones to receiver wrappers
    # which must be provided as a singleton per broadcaster
    # clone-subscription set.
    subs: dict[trio.ReceiveChannel, BroadcastReceiver]
    # broadcast event to wakeup all sleeping consumer tasks
    # on a newly produced value from the sender.
    sender_ready: Optional[trio.Event] = None
 class BroadcastReceiver(ReceiveChannel):
    '''A memory receive channel broadcaster which is non-lossy for the
    fastest consumer.
@ -33,28 +52,21 @@ class BroadcastReceiver(ReceiveChannel):
        self,
        rx_chan: ReceiveChannel,
-        queue: deque,
+        state: BroadcastState,
        _subs: dict[trio.ReceiveChannel, BroadcastReceiver],
    ) -> None:
-        # map of underlying clones to receiver wrappers
+        # register the original underlying (clone)
-        # which must be provided as a singleton per broadcaster
+        self._state = state
-        # clone-subscription set.
+        state.subs[rx_chan] = -1
        self._subs = _subs
        # underlying for this receiver
        self._rx = rx_chan
        # register the original underlying (clone)
        self._subs[rx_chan] = -1
        self._queue = queue
        self._value_received: Optional[trio.Event] = None
    async def receive(self):
        key = self._rx
        state = self._state
        # TODO: ideally we can make some way to "lock out" the
        # underlying receive channel in some way such that if some task
@ -64,7 +76,7 @@ class BroadcastReceiver(ReceiveChannel):
        # only tasks which have entered ``.subscribe()`` can
        # receive on this broadcaster.
        try:
-            seq = self._subs[key]
+            seq = state.subs[key]
        except KeyError:
            raise RuntimeError(
                f'{self} is not registerd as subscriber')
@ -74,7 +86,7 @@ class BroadcastReceiver(ReceiveChannel):
        if seq > -1:
            # get the oldest value we haven't received immediately
            try:
-                value = self._queue[seq]
+                value = state.queue[seq]
            except IndexError:
                # adhere to ``tokio`` style "lagging":
@ -87,51 +99,61 @@ class BroadcastReceiver(ReceiveChannel):
                # decrement to the last value and expect
                # consumer to either handle the ``Lagged`` and come back
                # or bail out on its own (thus un-subscribing)
-                self._subs[key] = self._queue.maxlen - 1
+                state.subs[key] = state.queue.maxlen - 1
                # this task was overrun by the producer side
                task: Task = current_task()
                raise Lagged(f'Task {task.name} was overrun')
-            self._subs[key] -= 1
+            state.subs[key] -= 1
            return value
        # current task already has the latest value **and** is the
        # first task to begin waiting for a new one
-        if self._value_received is None:
+        if state.sender_ready is None:
-            event = self._value_received = trio.Event()
+            event = state.sender_ready = trio.Event()
            value = await self._rx.receive()
            # items with lower indices are "newer"
-            self._queue.appendleft(value)
+            state.queue.appendleft(value)
            # broadcast new value to all subscribers by increasing
            # all sequence numbers that will point in the queue to
            # their latest available value.
-            subs = self._subs.copy()
+            # don't decrement the sequence for this task since we
            # don't decrement the sequence # for this task since we
            # already retreived the last value
-            subs.pop(key)
+
-            for sub_key, seq in subs.items():
+            # XXX: which of these impls is fastest?
-                self._subs[sub_key] += 1
+
            # subs = state.subs.copy()
            # subs.pop(key)
            for sub_key in filter(
                # lambda k: k != key, state.subs,
                partial(ne, key), state.subs,
            ):
                state.subs[sub_key] += 1
            # reset receiver waiter task event for next blocking condition
            self._value_received = None
            event.set()
            state.sender_ready = None
            return value
        # This task is all caught up and ready to receive the latest
        # value, so queue sched it on the internal event.
        else:
-            await self._value_received.wait()
+            await state.sender_ready.wait()
-            seq = self._subs[key]
+            # TODO: optimization: if this is always true can't we just
-            assert seq > -1, 'Internal error?'
+            # skip iterating these sequence numbers on the fastest
            # task's wakeup and always read from state.queue[0]?
            seq = state.subs[key]
            assert seq == 0, 'Internal error?'
-            self._subs[key] -= 1
+            state.subs[key] -= 1
-            return self._queue[0]
+            return state.queue[seq]
    @asynccontextmanager
    async def subscribe(
@ -145,12 +167,12 @@ class BroadcastReceiver(ReceiveChannel):
        '''
        clone = self._rx.clone()
        state = self._state
        br = BroadcastReceiver(
-            clone,
+            rx_chan=clone,
-            self._queue,
+            state=state,
            _subs=self._subs,
        )
-        assert clone in self._subs
+        assert clone in state.subs
        try:
            yield br
@ -159,7 +181,7 @@ class BroadcastReceiver(ReceiveChannel):
            # ``AsyncResource`` api.
            await clone.aclose()
            # drop from subscribers and close
-            self._subs.pop(clone)
+            state.subs.pop(clone)
    # TODO:
    # - should there be some ._closed flag that causes
@ -186,8 +208,10 @@ def broadcast_receiver(
    return BroadcastReceiver(
        recv_chan,
-        queue=deque(maxlen=max_buffer_size),
+        state=BroadcastState(
-        _subs={},  # this is singleton over all subscriptions
+            queue=deque(maxlen=max_buffer_size),
            subs={},
        ),
    )
@ -210,7 +234,7 @@ if __name__ == '__main__':
            ) -> None:
                task = current_task()
-                count = 0
+                lags = 0
                while True:
                    async with rx.subscribe() as brx:
@ -218,22 +242,22 @@ if __name__ == '__main__':
                            async for value in brx:
                                print(f'{task.name}: {value}')
                                await trio.sleep(delay)
                                count += 1
                        except Lagged:
                            print(
                                f'restarting slow ass {task.name}'
-                                f'that bailed out on {count}:{value}')
+                                f'that bailed out on {lags}:{value}')
-                            if count <= retries:
+                            if lags <= retries:
                                lags += 1
                                continue
                            else:
                                print(
                                    f'{task.name} was too slow and terminated '
-                                    f'on {count}:{value}')
+                                    f'on {lags}:{value}')
                                return
            async with trio.open_nursery() as n:
-                for i in range(1, size):
+                for i in range(1, 10):
                    n.start_soon(
                        partial(
                            sub_and_print,