Multi-`broker` quotes with `Feed.open_multi_stream()`

Adds provider-list-filtered (quote) stream multiplexing support allowing
for merged real-time `tractor.MsgStream`s using an `@acm` interface.
Behind the scenes we are just doing a classic multi-task push to common
mem chan approach.

Details to make it work on `Feed`:
- add `Feed.mods: dict[str, Moduletype]` and
  `Feed.portals[ModuleType, tractor.Portal]` which are both populated
  during init in `open_feed()`
- drop `Feed.portal` and `Feed.name`

Also fix a final lingering tsdb history loading loop termination bug.
agg_feedz
Tyler Goodlet 2022-11-11 17:17:17 -05:00
parent 20a396270e
commit 7b9db86753
1 changed files with 126 additions and 69 deletions

View File

@ -32,6 +32,7 @@ from typing import (
Callable, Callable,
Optional, Optional,
Awaitable, Awaitable,
Sequence,
TYPE_CHECKING, TYPE_CHECKING,
Union, Union,
) )
@ -627,26 +628,31 @@ async def tsdb_backfill(
field_map=marketstore.ohlc_key_map, field_map=marketstore.ohlc_key_map,
) )
tsdb_last_frame_start = tsdb_history['Epoch'][0]
# load as much from storage into shm possible (depends on
# user's shm size settings).
while ( while (
shm._first.value > 0 shm._first.value > 0
): ):
# load as much from storage into shm as space will
# allow according to user's shm size settings.
tsdb_last_frame_start = tsdb_history['Epoch'][0]
tsdb_history = await storage.read_ohlcv( tsdb_history = await storage.read_ohlcv(
fqsn, fqsn,
end=tsdb_last_frame_start, end=tsdb_last_frame_start,
timeframe=timeframe, timeframe=timeframe,
) )
next_start = tsdb_history['Epoch'][0]
if ( if (
not len(tsdb_history) # empty query not len(tsdb_history) # empty query
# no earlier data detected # no earlier data detected
or tsdb_history['Epoch'][0] >= tsdb_last_frame_start or next_start >= tsdb_last_frame_start
): ):
break break
else:
tsdb_last_frame_start = next_start
prepend_start = shm._first.value prepend_start = shm._first.value
to_push = tsdb_history[-prepend_start:] to_push = tsdb_history[-prepend_start:]
@ -868,6 +874,9 @@ class Flume(Struct):
izero_hist: int = 0 izero_hist: int = 0
izero_rt: int = 0 izero_rt: int = 0
throttle_rate: int | None = None throttle_rate: int | None = None
# TODO: do we need this really if we can pull the `Portal` from
# ``tractor``'s internals?
feed: Feed | None = None feed: Feed | None = None
@property @property
@ -905,12 +914,15 @@ class Flume(Struct):
if not self.feed: if not self.feed:
raise RuntimeError('This flume is not part of any ``Feed``?') raise RuntimeError('This flume is not part of any ``Feed``?')
# TODO: maybe a public (property) API for this in ``tractor``?
portal = self.stream._ctx._portal
# XXX: this should be singleton on a host, # XXX: this should be singleton on a host,
# a lone broker-daemon per provider should be # a lone broker-daemon per provider should be
# created for all practical purposes # created for all practical purposes
async with maybe_open_context( async with maybe_open_context(
acm_func=partial( acm_func=partial(
self.feed.portal.open_context, portal.open_context,
iter_ohlc_periods, iter_ohlc_periods,
), ),
kwargs={'delay_s': delay_s}, kwargs={'delay_s': delay_s},
@ -1384,24 +1396,66 @@ class Feed(Struct):
similarly allocated shm arrays. similarly allocated shm arrays.
''' '''
mod: ModuleType mods: dict[str, ModuleType] = {}
_portal: tractor.Portal portals: dict[ModuleType, tractor.Portal] = {}
flumes: dict[str, Flume] = {} flumes: dict[str, Flume] = {}
streams: dict[ streams: dict[
str, str,
trio.abc.ReceiveChannel[dict[str, Any]], trio.abc.ReceiveChannel[dict[str, Any]],
] = {} ] = {}
status: dict[str, Any]
# used for UI to show remote state
status: dict[str, Any] = {}
@acm
async def open_multi_stream(
self,
brokers: Sequence[str] | None = None,
) -> trio.abc.ReceiveChannel:
if brokers is None:
mods = self.mods
else:
mods = {name: self.mods[name] for name in brokers}
if len(mods) == 1:
# just pass the brokerd stream directly if only one provider
# was detected.
stream = self.streams[list(brokers)[0]]
async with stream.subscribe() as bstream:
yield bstream
return
# start multiplexing task tree
tx, rx = trio.open_memory_channel(616)
async def relay_to_common_memchan(stream: tractor.MsgStream):
async with tx:
async for msg in stream:
await tx.send(msg)
async with trio.open_nursery() as nurse:
# spawn a relay task for each stream so that they all
# multiplex to a common channel.
for brokername in mods:
stream = self.streams[brokername]
nurse.start_soon(relay_to_common_memchan, stream)
try:
yield rx
finally:
nurse.cancel_scope.cancel()
_max_sample_rate: int = 1 _max_sample_rate: int = 1
@property # @property
def portal(self) -> tractor.Portal: # def portal(self) -> tractor.Portal:
return self._portal # return self._portal
@property # @property
def name(self) -> str: # def name(self) -> str:
return self.mod.name # return self.mod.name
@acm @acm
@ -1457,6 +1511,7 @@ async def open_feed(
''' '''
providers: dict[ModuleType, list[str]] = {} providers: dict[ModuleType, list[str]] = {}
feed = Feed()
for fqsn in fqsns: for fqsn in fqsns:
brokername, key, suffix = unpack_fqsn(fqsn) brokername, key, suffix = unpack_fqsn(fqsn)
@ -1469,6 +1524,7 @@ async def open_feed(
# built a per-provider map to instrument names # built a per-provider map to instrument names
providers.setdefault(mod, []).append(bfqsn) providers.setdefault(mod, []).append(bfqsn)
feed.mods[mod.name] = mod
# one actor per brokerd for now # one actor per brokerd for now
brokerd_ctxs = [] brokerd_ctxs = []
@ -1495,18 +1551,15 @@ async def open_feed(
(brokermod, bfqsns), (brokermod, bfqsns),
) in zip(portals, providers.items()): ) in zip(portals, providers.items()):
feed = Feed( feed.portals[brokermod] = portal
mod=brokermod,
_portal=portal,
status={},
)
# fill out "status info" that the UI can show # fill out "status info" that the UI can show
host, port = feed.portal.channel.raddr host, port = portal.channel.raddr
if host == '127.0.0.1': if host == '127.0.0.1':
host = 'localhost' host = 'localhost'
feed.status.update({ feed.status.update({
'actor_name': feed.portal.channel.uid[0], 'actor_name': portal.channel.uid[0],
'host': host, 'host': host,
'port': port, 'port': port,
'hist_shm': 'NA', 'hist_shm': 'NA',
@ -1519,8 +1572,7 @@ async def open_feed(
bus_ctxs.append( bus_ctxs.append(
portal.open_context( portal.open_context(
open_feed_bus, open_feed_bus,
# brokername=brokermod.name, brokername=brokermod.name,
brokername=brokername,
symbols=bfqsns, symbols=bfqsns,
loglevel=loglevel, loglevel=loglevel,
start_stream=start_stream, start_stream=start_stream,
@ -1528,61 +1580,66 @@ async def open_feed(
) )
) )
async with ( assert len(feed.mods) == len(feed.portals)
gather_contexts(bus_ctxs) as ctxs,
):
remote_scopes = []
for (
(ctx, flumes_msg_dict),
(brokermod, bfqsns),
) in zip(ctxs, providers.items()):
stream_ctxs = [] async with (
for fqsn, flume_msg in flumes_msg_dict.items(): gather_contexts(bus_ctxs) as ctxs,
flume = Flume.from_msg(flume_msg) ):
assert flume.symbol.fqsn == fqsn stream_ctxs = []
feed.flumes[fqsn] = flume for (
flume.feed = feed (ctx, flumes_msg_dict),
(brokermod, bfqsns),
) in zip(ctxs, providers.items()):
# attach and cache shm handles for fqsn, flume_msg in flumes_msg_dict.items():
rt_shm = flume.rt_shm flume = Flume.from_msg(flume_msg)
assert rt_shm assert flume.symbol.fqsn == fqsn
hist_shm = flume.hist_shm feed.flumes[fqsn] = flume
assert hist_shm
feed.status['hist_shm'] = ( # TODO: do we need this?
f'{humanize(hist_shm._shm.size)}' flume.feed = feed
)
feed.status['rt_shm'] = f'{humanize(rt_shm._shm.size)}'
remote_scopes.append(ctx) # attach and cache shm handles
stream_ctxs.append( rt_shm = flume.rt_shm
ctx.open_stream( assert rt_shm
# XXX: be explicit about stream backpressure hist_shm = flume.hist_shm
# since we should **never** overrun on feeds assert hist_shm
# being too fast, which will pretty much
# always happen with HFT XD feed.status['hist_shm'] = (
backpressure=backpressure, f'{humanize(hist_shm._shm.size)}'
)
) )
feed.status['rt_shm'] = f'{humanize(rt_shm._shm.size)}'
async with ( stream_ctxs.append(
gather_contexts(stream_ctxs) as streams, ctx.open_stream(
): # XXX: be explicit about stream backpressure
for ( # since we should **never** overrun on feeds
stream, # being too fast, which will pretty much
(brokermod, bfqsns), # always happen with HFT XD
) in zip(streams, providers.items()): backpressure=backpressure,
)
)
# for bfqsn in bfqsns: async with (
for fqsn in flumes_msg_dict: gather_contexts(stream_ctxs) as streams,
):
for (
stream,
(brokermod, bfqsns),
) in zip(streams, providers.items()):
# apply common rt steam to each flume feed.streams[brokermod.name] = stream
# (normally one per broker)
feed.flumes[fqsn].stream = stream
feed.streams[brokermod.name] = stream
yield feed # for bfqsn in bfqsns:
for fqsn in flumes_msg_dict:
# apply common rt steam to each flume
# (normally one per broker)
feed.flumes[fqsn].stream = stream
assert len(feed.mods) == len(feed.portals) == len(feed.streams)
yield feed
@acm @acm