Multi-`broker` quotes with `Feed.open_multi_stream()`

Adds provider-list-filtered (quote) stream multiplexing support allowing for merged real-time `tractor.MsgStream`s using an `@acm` interface. Behind the scenes we are just doing a classic multi-task push to common mem chan approach. Details to make it work on `Feed`: - add `Feed.mods: dict[str, Moduletype]` and `Feed.portals[ModuleType, tractor.Portal]` which are both populated during init in `open_feed()` - drop `Feed.portal` and `Feed.name` Also fix a final lingering tsdb history loading loop termination bug.
2022-11-11 17:17:17 -05:00 · 2022-11-11 17:17:17 -05:00 · 7b9db86753
parent 20a396270e
commit 7b9db86753
1 changed files with 126 additions and 69 deletions
--- a/piker/data/feed.py
+++ b/piker/data/feed.py
@ -32,6 +32,7 @@ from typing import (
    Callable,
    Optional,
    Awaitable,
    Sequence,
    TYPE_CHECKING,
    Union,
 )
@ -627,26 +628,31 @@ async def tsdb_backfill(
                field_map=marketstore.ohlc_key_map,
            )
            tsdb_last_frame_start = tsdb_history['Epoch'][0]
            # load as much from storage into shm possible (depends on
            # user's shm size settings).
            while (
                shm._first.value > 0
            ):
                # load as much from storage into shm as space will
                # allow according to user's shm size settings.
                tsdb_last_frame_start = tsdb_history['Epoch'][0]
                tsdb_history = await storage.read_ohlcv(
                    fqsn,
                    end=tsdb_last_frame_start,
                    timeframe=timeframe,
                )
                next_start = tsdb_history['Epoch'][0]
                if (
                    not len(tsdb_history)  # empty query
                    # no earlier data detected
-                    or tsdb_history['Epoch'][0] >= tsdb_last_frame_start
+                    or next_start >= tsdb_last_frame_start
                ):
                    break
                else:
                    tsdb_last_frame_start = next_start
                prepend_start = shm._first.value
                to_push = tsdb_history[-prepend_start:]
@ -868,6 +874,9 @@ class Flume(Struct):
    izero_hist: int = 0
    izero_rt: int = 0
    throttle_rate: int | None = None
    # TODO: do we need this really if we can pull the `Portal` from
    # ``tractor``'s internals?
    feed: Feed | None = None
    @property
@ -905,12 +914,15 @@ class Flume(Struct):
        if not self.feed:
            raise RuntimeError('This flume is not part of any ``Feed``?')
        # TODO: maybe a public (property) API for this in ``tractor``?
        portal = self.stream._ctx._portal
        # XXX: this should be singleton on a host,
        # a lone broker-daemon per provider should be
        # created for all practical purposes
        async with maybe_open_context(
            acm_func=partial(
-                self.feed.portal.open_context,
+                portal.open_context,
                iter_ohlc_periods,
            ),
            kwargs={'delay_s': delay_s},
@ -1384,24 +1396,66 @@ class Feed(Struct):
    similarly allocated shm arrays.
    '''
-    mod: ModuleType
+    mods: dict[str, ModuleType] = {}
-    _portal: tractor.Portal
+    portals: dict[ModuleType, tractor.Portal] = {}
    flumes: dict[str, Flume] = {}
    streams: dict[
        str,
        trio.abc.ReceiveChannel[dict[str, Any]],
    ] = {}
-    status: dict[str, Any]
+
    # used for UI to show remote state
    status: dict[str, Any] = {}
    @acm
    async def open_multi_stream(
        self,
        brokers: Sequence[str] | None = None,
    ) -> trio.abc.ReceiveChannel:
        if brokers is None:
            mods = self.mods
        else:
            mods = {name: self.mods[name] for name in brokers}
        if len(mods) == 1:
            # just pass the brokerd stream directly if only one provider
            # was detected.
            stream = self.streams[list(brokers)[0]]
            async with stream.subscribe() as bstream:
                yield bstream
                return
        # start multiplexing task tree
        tx, rx = trio.open_memory_channel(616)
        async def relay_to_common_memchan(stream: tractor.MsgStream):
            async with tx:
                async for msg in stream:
                    await tx.send(msg)
        async with trio.open_nursery() as nurse:
            # spawn a relay task for each stream so that they all
            # multiplex to a common channel.
            for brokername in mods:
                stream = self.streams[brokername]
                nurse.start_soon(relay_to_common_memchan, stream)
            try:
                yield rx
            finally:
                nurse.cancel_scope.cancel()
    _max_sample_rate: int = 1
-    @property
+    # @property
-    def portal(self) -> tractor.Portal:
+    # def portal(self) -> tractor.Portal:
-        return self._portal
+    #     return self._portal
-    @property
+    # @property
-    def name(self) -> str:
+    # def name(self) -> str:
-        return self.mod.name
+    #     return self.mod.name
@acm
@ -1457,6 +1511,7 @@ async def open_feed(
    '''
    providers: dict[ModuleType, list[str]] = {}
    feed = Feed()
    for fqsn in fqsns:
        brokername, key, suffix = unpack_fqsn(fqsn)
@ -1469,6 +1524,7 @@ async def open_feed(
        # built a per-provider map to instrument names
        providers.setdefault(mod, []).append(bfqsn)
        feed.mods[mod.name] = mod
    # one actor per brokerd for now
    brokerd_ctxs = []
@ -1495,18 +1551,15 @@ async def open_feed(
            (brokermod, bfqsns),
        ) in zip(portals, providers.items()):
-            feed = Feed(
+            feed.portals[brokermod] = portal
-                mod=brokermod,
+
                _portal=portal,
                status={},
            )
            # fill out "status info" that the UI can show
-            host, port = feed.portal.channel.raddr
+            host, port = portal.channel.raddr
            if host == '127.0.0.1':
                host = 'localhost'
            feed.status.update({
-                'actor_name': feed.portal.channel.uid[0],
+                'actor_name': portal.channel.uid[0],
                'host': host,
                'port': port,
                'hist_shm': 'NA',
@ -1519,8 +1572,7 @@ async def open_feed(
            bus_ctxs.append(
                portal.open_context(
                    open_feed_bus,
-                    # brokername=brokermod.name,
+                    brokername=brokermod.name,
                    brokername=brokername,
                    symbols=bfqsns,
                    loglevel=loglevel,
                    start_stream=start_stream,
@ -1528,61 +1580,66 @@ async def open_feed(
                )
            )
-            async with (
+        assert len(feed.mods) == len(feed.portals)
                gather_contexts(bus_ctxs) as ctxs,
            ):
                remote_scopes = []
                for (
                    (ctx, flumes_msg_dict),
                    (brokermod, bfqsns),
                ) in zip(ctxs, providers.items()):
-                    stream_ctxs = []
+        async with (
-                    for fqsn, flume_msg in flumes_msg_dict.items():
+            gather_contexts(bus_ctxs) as ctxs,
-                        flume = Flume.from_msg(flume_msg)
+        ):
-                        assert flume.symbol.fqsn == fqsn
+            stream_ctxs = []
-                        feed.flumes[fqsn] = flume
+            for (
-                        flume.feed = feed
+                (ctx, flumes_msg_dict),
                (brokermod, bfqsns),
            ) in zip(ctxs, providers.items()):
-                        # attach and cache shm handles
+                for fqsn, flume_msg in flumes_msg_dict.items():
-                        rt_shm = flume.rt_shm
+                    flume = Flume.from_msg(flume_msg)
-                        assert rt_shm
+                    assert flume.symbol.fqsn == fqsn
-                        hist_shm = flume.hist_shm
+                    feed.flumes[fqsn] = flume
                        assert hist_shm
-                        feed.status['hist_shm'] = (
+                    # TODO: do we need this?
-                            f'{humanize(hist_shm._shm.size)}'
+                    flume.feed = feed
                        )
                        feed.status['rt_shm'] = f'{humanize(rt_shm._shm.size)}'
-                    remote_scopes.append(ctx)
+                    # attach and cache shm handles
-                    stream_ctxs.append(
+                    rt_shm = flume.rt_shm
-                        ctx.open_stream(
+                    assert rt_shm
-                            # XXX: be explicit about stream backpressure
+                    hist_shm = flume.hist_shm
-                            # since we should **never** overrun on feeds
+                    assert hist_shm
-                            # being too fast, which will pretty much
+
-                            # always happen with HFT XD
+                    feed.status['hist_shm'] = (
-                            backpressure=backpressure,
+                        f'{humanize(hist_shm._shm.size)}'
                        )
                    )
                    feed.status['rt_shm'] = f'{humanize(rt_shm._shm.size)}'
-                async with (
+                stream_ctxs.append(
-                    gather_contexts(stream_ctxs) as streams,
+                    ctx.open_stream(
-                ):
+                        # XXX: be explicit about stream backpressure
-                    for (
+                        # since we should **never** overrun on feeds
-                        stream,
+                        # being too fast, which will pretty much
-                        (brokermod, bfqsns),
+                        # always happen with HFT XD
-                    ) in zip(streams, providers.items()):
+                        backpressure=backpressure,
                    )
                )
-                        # for bfqsn in bfqsns:
+            async with (
-                        for fqsn in flumes_msg_dict:
+                gather_contexts(stream_ctxs) as streams,
            ):
                for (
                    stream,
                    (brokermod, bfqsns),
                ) in zip(streams, providers.items()):
-                            # apply common rt steam to each flume
+                    feed.streams[brokermod.name] = stream
                            # (normally one per broker)
                            feed.flumes[fqsn].stream = stream
                            feed.streams[brokermod.name] = stream
-                    yield feed
+                    # for bfqsn in bfqsns:
                    for fqsn in flumes_msg_dict:
                        # apply common rt steam to each flume
                        # (normally one per broker)
                        feed.flumes[fqsn].stream = stream
                assert len(feed.mods) == len(feed.portals) == len(feed.streams)
                yield feed
@acm