From 40000345a1acb6279d006680125ff6271b5ace14 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 7 Nov 2022 09:17:25 -0500 Subject: [PATCH 01/49] Only log pos size errors for `ib` --- piker/brokers/ib/broker.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/piker/brokers/ib/broker.py b/piker/brokers/ib/broker.py index daf9a703..401ac71c 100644 --- a/piker/brokers/ib/broker.py +++ b/piker/brokers/ib/broker.py @@ -371,8 +371,8 @@ async def update_and_audit_msgs( else: entry = f'split_ratio = 1/{int(reverse_split_ratio)}' - raise ValueError( - # log.error( + # raise ValueError( + log.error( f'POSITION MISMATCH ib <-> piker ledger:\n' f'ib: {ibppmsg}\n' f'piker: {msg}\n' @@ -883,7 +883,7 @@ async def deliver_trade_events( # execdict.pop('acctNumber') fill_msg = BrokerdFill( - # should match the value returned from + # NOTE: should match the value returned from # `.submit_limit()` reqid=execu.orderId, time_ns=time.time_ns(), # cuz why not From 508de6182add97e52eabab12df50c2737b1be8c5 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 7 Nov 2022 10:19:12 -0500 Subject: [PATCH 02/49] Drop duplicate live gateway from compose file for now --- dockering/ib/docker-compose.yml | 62 ++++++++++++++++----------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/dockering/ib/docker-compose.yml b/dockering/ib/docker-compose.yml index f3a28d66..f8be5684 100644 --- a/dockering/ib/docker-compose.yml +++ b/dockering/ib/docker-compose.yml @@ -62,39 +62,39 @@ services: # - "127.0.0.1:4002:4002" # - "127.0.0.1:5900:5900" - ib_gw_live: - image: waytrade/ib-gateway:1012.2i - restart: always - network_mode: 'host' + # ib_gw_live: + # image: waytrade/ib-gateway:1012.2i + # restart: always + # network_mode: 'host' - volumes: - - type: bind - source: ./jts_live.ini - target: /root/jts/jts.ini - # don't let ibc clobber this file for - # the main reason of not having a stupid - # timezone set.. - read_only: true + # volumes: + # - type: bind + # source: ./jts_live.ini + # target: /root/jts/jts.ini + # # don't let ibc clobber this file for + # # the main reason of not having a stupid + # # timezone set.. + # read_only: true - # force our own ibc config - - type: bind - source: ./ibc.ini - target: /root/ibc/config.ini + # # force our own ibc config + # - type: bind + # source: ./ibc.ini + # target: /root/ibc/config.ini - # force our noop script - socat isn't needed in host mode. - - type: bind - source: ./fork_ports_delayed.sh - target: /root/scripts/fork_ports_delayed.sh + # # force our noop script - socat isn't needed in host mode. + # - type: bind + # source: ./fork_ports_delayed.sh + # target: /root/scripts/fork_ports_delayed.sh - # force our noop script - socat isn't needed in host mode. - - type: bind - source: ./run_x11_vnc.sh - target: /root/scripts/run_x11_vnc.sh - read_only: true + # # force our noop script - socat isn't needed in host mode. + # - type: bind + # source: ./run_x11_vnc.sh + # target: /root/scripts/run_x11_vnc.sh + # read_only: true - # NOTE: to fill these out, define an `.env` file in the same dir as - # this compose file which looks something like: - environment: - TRADING_MODE: 'live' - VNC_SERVER_PASSWORD: 'doggy' - VNC_SERVER_PORT: '3004' + # # NOTE: to fill these out, define an `.env` file in the same dir as + # # this compose file which looks something like: + # environment: + # TRADING_MODE: 'live' + # VNC_SERVER_PASSWORD: 'doggy' + # VNC_SERVER_PORT: '3004' From 344a634cb697e1d26b94f1cb8752b98bf65f0da7 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 7 Nov 2022 13:04:58 -0500 Subject: [PATCH 03/49] Always set fqsn in `Feed.symbols: dict` --- piker/data/feed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index ef4b3634..aaedfcc8 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -1410,7 +1410,7 @@ async def open_feed( # symbol.broker_info[brokername] = si feed.symbols[fqsn] = symbol - feed.symbols[sym] = symbol + feed.symbols[f'{sym}.{brokername}'] = symbol # cast shm dtype to list... can't member why we need this for shm_key, shm in [ From 2a9042b1b184d63bc0b54e471f86ba8a9c8b9de1 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 7 Nov 2022 15:33:52 -0500 Subject: [PATCH 04/49] Make all UI entrypoints accept an fqsn `list` This is to prep for multi-symbol feeds and charts so we accept a sequence of fqsns to the top level entrypoints as well as the `.data.feed.open_feed()` API (though we're not actually supporting true multiplexed feeds nor shm lookups per fqsn yet). --- piker/ui/_app.py | 16 ++++++++++------ piker/ui/_chart.py | 24 ++++++++++++++++-------- piker/ui/_display.py | 35 +++++++++++++++++++---------------- 3 files changed, 45 insertions(+), 30 deletions(-) diff --git a/piker/ui/_app.py b/piker/ui/_app.py index a31fd2da..38a4db20 100644 --- a/piker/ui/_app.py +++ b/piker/ui/_app.py @@ -66,7 +66,7 @@ async def _async_main( # implicit required argument provided by ``qtractor_run()`` main_widget: GodWidget, - sym: str, + syms: list[str], brokernames: str, loglevel: str, @@ -113,12 +113,16 @@ async def _async_main( # godwidget.hbox.addWidget(search) godwidget.search = search - symbol, _, provider = sym.rpartition('.') + symbols: list[str] = [] + + for sym in syms: + symbol, _, provider = sym.rpartition('.') + symbols.append(symbol) # this internally starts a ``display_symbol_data()`` task above - order_mode_ready = await godwidget.load_symbol( + order_mode_ready = await godwidget.load_symbols( provider, - symbol, + symbols, loglevel ) @@ -166,7 +170,7 @@ async def _async_main( def _main( - sym: str, + syms: list[str], brokernames: [str], piker_loglevel: str, tractor_kwargs, @@ -178,7 +182,7 @@ def _main( ''' run_qtractor( func=_async_main, - args=(sym, brokernames, piker_loglevel), + args=(syms, brokernames, piker_loglevel), main_widget_type=GodWidget, tractor_kwargs=tractor_kwargs, ) diff --git a/piker/ui/_chart.py b/piker/ui/_chart.py index bb2d4448..f61ed1d7 100644 --- a/piker/ui/_chart.py +++ b/piker/ui/_chart.py @@ -186,10 +186,10 @@ class GodWidget(QWidget): ) -> tuple[LinkedSplits, LinkedSplits]: # type: ignore return self._chart_cache.get(symbol_key) - async def load_symbol( + async def load_symbols( self, providername: str, - symbol_key: str, + symbol_keys: list[str], loglevel: str, reset: bool = False, @@ -200,12 +200,20 @@ class GodWidget(QWidget): Expects a ``numpy`` structured array containing all the ohlcv fields. ''' - # our symbol key style is always lower case - symbol_key = symbol_key.lower() + fqsns: list[str] = [] - # fully qualified symbol name (SNS i guess is what we're making?) - fqsn = '.'.join([symbol_key, providername]) - all_linked = self.get_chart_symbol(fqsn) + # our symbol key style is always lower case + for key in list(map(str.lower, symbol_keys)): + + # fully qualified symbol name (SNS i guess is what we're making?) + fqsn = '.'.join([key, providername]) + fqsns.append(fqsn) + + # NOTE: for now we use the first symbol in the set as the "key" + # for the overlay of feeds on the chart. + group_key = fqsns[0] + + all_linked = self.get_chart_symbol(group_key) order_mode_started = trio.Event() if not self.vbox.isEmpty(): @@ -238,7 +246,7 @@ class GodWidget(QWidget): display_symbol_data, self, providername, - symbol_key, + fqsns, loglevel, order_mode_started, ) diff --git a/piker/ui/_display.py b/piker/ui/_display.py index 0a98b4a8..af4f8361 100644 --- a/piker/ui/_display.py +++ b/piker/ui/_display.py @@ -947,7 +947,7 @@ async def link_views_with_region( async def display_symbol_data( godwidget: GodWidget, provider: str, - sym: str, + fqsns: list[str], loglevel: str, order_mode_started: trio.Event, @@ -961,11 +961,6 @@ async def display_symbol_data( ''' sbar = godwidget.window.status_bar - loading_sym_key = sbar.open_status( - f'loading {sym}.{provider} ->', - group_key=True - ) - # historical data fetch # brokermod = brokers.get_brokermod(provider) @@ -974,10 +969,18 @@ async def display_symbol_data( # clear_on_next=True, # group_key=loading_sym_key, # ) - fqsn = '.'.join((sym, provider)) + + for fqsn in fqsns: + + loading_sym_key = sbar.open_status( + f'loading {fqsn} ->', + group_key=True + ) + + first_fqsn = fqsns[0] async with open_feed( - [fqsn], + fqsns, loglevel=loglevel, # limit to at least display's FPS @@ -988,7 +991,7 @@ async def display_symbol_data( ohlcv: ShmArray = feed.rt_shm hist_ohlcv: ShmArray = feed.hist_shm - symbol = feed.symbols[sym] + symbol = feed.symbols[first_fqsn] fqsn = symbol.front_fqsn() step_size_s = 1 @@ -1025,7 +1028,7 @@ async def display_symbol_data( godwidget.pp_pane = pp_pane # create main OHLC chart - chart = rt_linked.plot_ohlc_main( + ohlc_chart = rt_linked.plot_ohlc_main( symbol, ohlcv, # in the case of history chart we explicitly set `False` @@ -1033,8 +1036,8 @@ async def display_symbol_data( sidepane=pp_pane, ) - chart._feeds[symbol.key] = feed - chart.setFocus() + ohlc_chart._feeds[symbol.key] = feed + ohlc_chart.setFocus() # XXX: FOR SOME REASON THIS IS CAUSING HANGZ!?! # plot historical vwap if available @@ -1044,7 +1047,7 @@ async def display_symbol_data( # and 'bar_wap' in bars.dtype.fields # ): # wap_in_history = True - # chart.draw_curve( + # ohlc_chart.draw_curve( # name='bar_wap', # shm=ohlcv, # color='default_light', @@ -1105,7 +1108,7 @@ async def display_symbol_data( await trio.sleep(0) # size view to data prior to order mode init - chart.default_view() + ohlc_chart.default_view() rt_linked.graphics_cycle() await trio.sleep(0) @@ -1119,7 +1122,7 @@ async def display_symbol_data( godwidget.resize_all() await link_views_with_region( - chart, + ohlc_chart, hist_chart, feed, ) @@ -1135,7 +1138,7 @@ async def display_symbol_data( ): if not vlm_chart: # trigger another view reset if no sub-chart - chart.default_view() + ohlc_chart.default_view() rt_linked.mode = mode From 8e85ed92c86b80995028c7e893e9ddda056fbda1 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 7 Nov 2022 15:39:28 -0500 Subject: [PATCH 05/49] Use new `GodWidget.load_symbols()` from search --- piker/ui/_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/piker/ui/_search.py b/piker/ui/_search.py index bbe88320..6c7c6fd8 100644 --- a/piker/ui/_search.py +++ b/piker/ui/_search.py @@ -665,9 +665,9 @@ class SearchWidget(QtWidgets.QWidget): log.info(f'Requesting symbol: {symbol}.{provider}') - await godw.load_symbol( + await godw.load_symbols( provider, - symbol, + [symbol], 'info', ) From 051a8729b65a2d45a640d08f8cb79c6b91d1a47e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 7 Nov 2022 15:40:01 -0500 Subject: [PATCH 06/49] EMS: expect fqsn key in `Feed.symbols` --- piker/clearing/_ems.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/piker/clearing/_ems.py b/piker/clearing/_ems.py index 1569429f..2147369b 100644 --- a/piker/clearing/_ems.py +++ b/piker/clearing/_ems.py @@ -1239,8 +1239,7 @@ async def process_client_order_cmds( pred = mk_check(trigger_price, last, action) spread_slap: float = 5 - sym = fqsn.replace(f'.{brokers[0]}', '') - min_tick = feed.symbols[sym].tick_size + min_tick = feed.symbols[fqsn].tick_size if action == 'buy': tickfilter = ('ask', 'last', 'trade') From 1bf1965a8b10fd3a31711dcee6c6f36fadcf34cd Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 7 Nov 2022 15:40:41 -0500 Subject: [PATCH 07/49] Drop `tractor.log` level override fixture --- tests/conftest.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index aaa125ce..114812bd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,15 +14,6 @@ def pytest_addoption(parser): help="Use a practice API account") -@pytest.fixture(scope='session', autouse=True) -def loglevel(request): - orig = tractor.log._default_loglevel - level = tractor.log._default_loglevel = request.config.option.loglevel - log.get_console_log(level) - yield level - tractor.log._default_loglevel = orig - - @pytest.fixture(scope='session') def test_config(): dirname = os.path.dirname From c7d5db5f907073478254f32a11afbfe2a7811205 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 7 Nov 2022 15:40:52 -0500 Subject: [PATCH 08/49] Start data feed layer test suite Initial test that starts a `binance` feed and reads the quote messages alongside shm buffers for 1s and 1m OHLC; just prints to console for now. Template out parametrization for multi-symbol quote-multiplexed feeds which coming soon B) --- tests/test_feeds.py | 65 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 tests/test_feeds.py diff --git a/tests/test_feeds.py b/tests/test_feeds.py new file mode 100644 index 00000000..3c1104a9 --- /dev/null +++ b/tests/test_feeds.py @@ -0,0 +1,65 @@ +''' +Data feed layer APIs, performance, msg throttling. + +''' +from pprint import pprint + +import pytest +import trio +from piker import ( + open_piker_runtime, + open_feed, +) +from piker.data import ShmArray + + +@pytest.mark.parametrize( + 'fqsns', + [ + ['btcusdt.binance'] + ], + ids=lambda param: f'fqsns={param}', +) +def test_basic_rt_feed( + fqsns: list[str], +): + ''' + Start a real-time data feed for provided fqsn and pull + a few quotes then simply shut down. + + ''' + async def main(): + async with ( + open_piker_runtime('test_basic_rt_feed'), + open_feed( + fqsns, + loglevel='info', + + # TODO: ensure throttle rate is applied + # limit to at least display's FPS + # avoiding needless Qt-in-guest-mode context switches + # tick_throttle=_quote_throttle_rate, + + ) as feed + ): + for fqin in fqsns: + assert feed.symbols[fqin] + + ohlcv: ShmArray = feed.rt_shm + hist_ohlcv: ShmArray = feed.hist_shm + + count: int = 0 + async for quotes in feed.stream: + + # print quote msg, rt and history + # buffer values on console. + pprint(quotes) + pprint(ohlcv.array[-1]) + pprint(hist_ohlcv.array[-1]) + + if count >= 100: + break + + count += 1 + + trio.run(main) From 5bf3cb8e4b45d82f62a1b35abdd6120e245f3a0a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 8 Nov 2022 14:50:31 -0500 Subject: [PATCH 09/49] Just warn on `ib` symbol search lags --- piker/brokers/ib/feed.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/piker/brokers/ib/feed.py b/piker/brokers/ib/feed.py index e15e8462..442cbdeb 100644 --- a/piker/brokers/ib/feed.py +++ b/piker/brokers/ib/feed.py @@ -1047,7 +1047,13 @@ async def open_symbol_search( stock_results = [] async def stash_results(target: Awaitable[list]): - stock_results.extend(await target) + try: + results = await target + except tractor.trionics.Lagged: + print("IB SYM-SEARCH OVERRUN?!?") + return + + stock_results.extend(results) for i in range(10): with trio.move_on_after(3) as cs: From 18dc8b08e4e80414be9ac2587c8cf9372d595485 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 8 Nov 2022 16:09:18 -0500 Subject: [PATCH 10/49] First draft aggregate feedz support Orient shm-flow-arrays around the new idea of a `Flume` which provides access, mgmt and basic measure of real-time data flow sets (see water flow management semantics). - We discard the previous idea of a "init message" which contained all the shm attachment info and instead send a startup message full of `Flume.to_msg()`s which are symmetrically loaded on the caller actor side. - Create data-flows "entries" for every passed in fqsn such that the consumer gets back streams and shm for each, now all wrapped in `Flume` types. For now we allocate `brokermod.stream_quotes()` tasks 1-to-1 for each fqsn (instead of expecting each backend to do multi-plexing, though we might want that eventually) as well a `_FeedsBus._subscriber` entry for each. The pause/resume management loop is adjusted to match. Previously `Feed`s were allocated 1-to-1 with each fqsn. - Make `Feed` a `Struct` subtype instead of a `@dataclass` and move all flow specific attrs to the new `Flume`: - move `.index_stream()`, `.get_ds_info()` to `Flume`. - drop `.receive()`: each fqsn entry will now require knowledge of separate streams by feed users. - add multi-fqsn tables: `.flumes`, `.streams` which point to the appropriate per-symbol entries. - Async load all `Flume`s from all contexts and all quote streams using `tractor.trionics.gather_contexts()` on the client `open_feed()` side. - Update feeds test to include streaming 2 symbols on the same (binance) backend. --- piker/data/feed.py | 1182 ++++++++++++++++++++++++++----------------- tests/test_feeds.py | 51 +- 2 files changed, 742 insertions(+), 491 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index aaedfcc8..e87c00be 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -22,19 +22,21 @@ This module is enabled for ``brokerd`` daemons. """ from __future__ import annotations from contextlib import asynccontextmanager as acm -from dataclasses import ( - dataclass, - field, -) +# from dataclasses import ( +# dataclass, +# field, +# ) from datetime import datetime from functools import partial from types import ModuleType from typing import ( Any, AsyncIterator, + AsyncContextManager, Callable, Optional, Awaitable, + Sequence, TYPE_CHECKING, Union, ) @@ -43,7 +45,10 @@ import trio from trio.abc import ReceiveChannel from trio_typing import TaskStatus import tractor -from tractor.trionics import maybe_open_context +from tractor.trionics import ( + maybe_open_context, + gather_contexts, +) import pendulum import numpy as np @@ -58,6 +63,7 @@ from ._sharedmem import ( maybe_open_shm_array, attach_shm_array, ShmArray, + _Token, _secs_in_day, ) from .ingest import get_ingestormod @@ -109,11 +115,6 @@ class _FeedsBus(Struct): task_lock: trio.StrictFIFOLock = trio.StrictFIFOLock() - # XXX: so weird but, apparently without this being `._` private - # pydantic will complain about private `tractor.Context` instance - # vars (namely `._portal` and `._cancel_scope`) at import time. - # Reported this bug: - # https://github.com/samuelcolvin/pydantic/issues/2816 _subscribers: dict[ str, list[ @@ -719,10 +720,14 @@ async def manage_history( buffer. ''' + + from tractor._state import _runtime_vars + port = _runtime_vars['_root_mailbox'][1] + # (maybe) allocate shm array for this broker/symbol which will # be used for fast near-term history capture and processing. hist_shm, opened = maybe_open_shm_array( - key=f'{fqsn}_hist', + key=f'{fqsn}_hist', #_p{port}', # use any broker defined ohlc dtype: dtype=getattr(mod, '_ohlc_dtype', base_iohlc_dtype), @@ -739,7 +744,7 @@ async def manage_history( ) rt_shm, opened = maybe_open_shm_array( - key=f'{fqsn}_rt', + key=f'{fqsn}_rt', #_p{port}', # use any broker defined ohlc dtype: dtype=getattr(mod, '_ohlc_dtype', base_iohlc_dtype), @@ -836,373 +841,61 @@ async def manage_history( await trio.sleep_forever() -async def allocate_persistent_feed( - bus: _FeedsBus, - - brokername: str, - symbol: str, - - loglevel: str, - start_stream: bool = True, - - task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED, - -) -> None: +class Flume(Struct): ''' - Create and maintain a "feed bus" which allocates tasks for real-time - streaming and optional historical data storage per broker/data provider - backend; this normally task runs *in* a `brokerd` actor. + Composite reference type which points to all the addressing handles + and other meta-data necessary for the read, measure and management + of a set of real-time updated data flows. - If none exists, this allocates a ``_FeedsBus`` which manages the - lifetimes of streaming tasks created for each requested symbol. + Can be thought of as a "flow descriptor" or "flow frame" which + describes the high level properties of a set of data flows that can + be used seamlessly across process-memory boundaries. - - 2 tasks are created: - - a real-time streaming task which connec + Each instance's sub-components normally includes: + - a msg oriented quote stream provided via an IPC transport + - history and real-time shm buffers which are both real-time + updated and backfilled. + - associated startup indexing information related to both buffer + real-time-append and historical prepend addresses. + - low level APIs to read and measure the updated data and manage + queuing properties. ''' - # load backend module - try: - mod = get_brokermod(brokername) - except ImportError: - mod = get_ingestormod(brokername) + symbol: Symbol + first_quote: dict + _hist_shm_token: _Token + _rt_shm_token: _Token - # mem chan handed to broker backend so it can push real-time - # quotes to this task for sampling and history storage (see below). - send, quote_stream = trio.open_memory_channel(616) - - # data sync signals for both history loading and market quotes - some_data_ready = trio.Event() - feed_is_live = trio.Event() - - # establish broker backend quote stream by calling - # ``stream_quotes()``, which is a required broker backend endpoint. - init_msg, first_quote = await bus.nursery.start( - partial( - mod.stream_quotes, - send_chan=send, - feed_is_live=feed_is_live, - symbols=[symbol], - loglevel=loglevel, - ) - ) - # the broker-specific fully qualified symbol name, - # but ensure it is lower-cased for external use. - bfqsn = init_msg[symbol]['fqsn'].lower() - init_msg[symbol]['fqsn'] = bfqsn - - # HISTORY, run 2 tasks: - # - a history loader / maintainer - # - a real-time streamer which consumers and sends new data to any - # consumers as well as writes to storage backends (as configured). - - # XXX: neither of these will raise but will cause an inf hang due to: - # https://github.com/python-trio/trio/issues/2258 - # bus.nursery.start_soon( - # await bus.start_task( - ( - izero_hist, - hist_shm, - izero_rt, - rt_shm, - ) = await bus.nursery.start( - manage_history, - mod, - bus, - '.'.join((bfqsn, brokername)), - some_data_ready, - feed_is_live, - ) - - # we hand an IPC-msg compatible shm token to the caller so it - # can read directly from the memory which will be written by - # this task. - msg = init_msg[symbol] - msg['hist_shm_token'] = hist_shm.token - msg['izero_hist'] = izero_hist - msg['izero_rt'] = izero_rt - msg['rt_shm_token'] = rt_shm.token - - # true fqsn - fqsn = '.'.join((bfqsn, brokername)) - # add a fqsn entry that includes the ``.`` suffix - # and an entry that includes the broker-specific fqsn (including - # any new suffixes or elements as injected by the backend). - init_msg[fqsn] = msg - init_msg[bfqsn] = msg - - # TODO: pretty sure we don't need this? why not just leave 1s as - # the fastest "sample period" since we'll probably always want that - # for most purposes. - # pass OHLC sample rate in seconds (be sure to use python int type) - # init_msg[symbol]['sample_rate'] = 1 #int(delay_s) - - # yield back control to starting nursery once we receive either - # some history or a real-time quote. - log.info(f'waiting on history to load: {fqsn}') - await some_data_ready.wait() - - # append ``.`` suffix to each quote symbol - acceptable_not_fqsn_with_broker_suffix = symbol + f'.{brokername}' - - generic_first_quotes = { - acceptable_not_fqsn_with_broker_suffix: first_quote, - fqsn: first_quote, - } - - # for ambiguous names we simply apply the retreived - # feed to that name (for now). - bus.feeds[symbol] = bus.feeds[bfqsn] = ( - init_msg, - generic_first_quotes, - ) - - # insert 1s ohlc into the increment buffer set - # to update and shift every second - sampler.ohlcv_shms.setdefault( - 1, - [] - ).append(rt_shm) - - task_status.started() - - if not start_stream: - await trio.sleep_forever() - - # begin real-time updates of shm and tsb once the feed goes live and - # the backend will indicate when real-time quotes have begun. - await feed_is_live.wait() - - # insert 1m ohlc into the increment buffer set - # to shift every 60s. - sampler.ohlcv_shms.setdefault(60, []).append(hist_shm) - - # create buffer a single incrementer task broker backend - # (aka `brokerd`) using the lowest sampler period. - if sampler.incrementers.get(_default_delay_s) is None: - await bus.start_task( - increment_ohlc_buffer, - _default_delay_s, - ) - - sum_tick_vlm: bool = init_msg.get( - 'shm_write_opts', {} - ).get('sum_tick_vlm', True) - - # NOTE: if no high-freq sampled data has (yet) been loaded, - # seed the buffer with a history datum - this is most handy - # for many backends which don't sample @ 1s OHLC but do have - # slower data such as 1m OHLC. - if not len(rt_shm.array): - rt_shm.push(hist_shm.array[-3:-1]) - ohlckeys = ['open', 'high', 'low', 'close'] - rt_shm.array[ohlckeys][-2:] = hist_shm.array['close'][-1] - rt_shm.array['volume'][-2] = 0 - - # start sample loop and shm incrementer task for OHLC style sampling - # at the above registered step periods. - try: - await sample_and_broadcast( - bus, - rt_shm, - hist_shm, - quote_stream, - brokername, - sum_tick_vlm - ) - finally: - log.warning(f'{fqsn} feed task terminated') - - -@tractor.context -async def open_feed_bus( - - ctx: tractor.Context, - brokername: str, - symbol: str, # normally expected to the broker-specific fqsn - loglevel: str, - tick_throttle: Optional[float] = None, - start_stream: bool = True, - -) -> None: - ''' - Open a data feed "bus": an actor-persistent per-broker task-oriented - data feed registry which allows managing real-time quote streams per - symbol. - - ''' - if loglevel is None: - loglevel = tractor.current_actor().loglevel - - # XXX: required to propagate ``tractor`` loglevel to piker logging - get_console_log(loglevel or tractor.current_actor().loglevel) - - # local state sanity checks - # TODO: check for any stale shm entries for this symbol - # (after we also group them in a nice `/dev/shm/piker/` subdir). - # ensure we are who we think we are - servicename = tractor.current_actor().name - assert 'brokerd' in servicename - assert brokername in servicename - - bus = get_feed_bus(brokername) - - # if no cached feed for this symbol has been created for this - # brokerd yet, start persistent stream and shm writer task in - # service nursery - entry = bus.feeds.get(symbol) - if entry is None: - # allocate a new actor-local stream bus which - # will persist for this `brokerd`'s service lifetime. - async with bus.task_lock: - await bus.nursery.start( - partial( - allocate_persistent_feed, - - bus=bus, - brokername=brokername, - # here we pass through the selected symbol in native - # "format" (i.e. upper vs. lowercase depending on - # provider). - symbol=symbol, - loglevel=loglevel, - start_stream=start_stream, - ) - ) - # TODO: we can remove this? - assert isinstance(bus.feeds[symbol], tuple) - - # XXX: ``first_quotes`` may be outdated here if this is secondary - # subscriber - init_msg, first_quotes = bus.feeds[symbol] - - msg = init_msg[symbol] - bfqsn = msg['fqsn'].lower() - - # true fqsn - fqsn = '.'.join([bfqsn, brokername]) - assert fqsn in first_quotes - assert bus.feeds[bfqsn] - - # broker-ambiguous symbol (provided on cli - eg. mnq.globex.ib) - bsym = symbol + f'.{brokername}' - assert bsym in first_quotes - - # we use the broker-specific fqsn (bfqsn) for - # the sampler subscription since the backend isn't (yet) - # expected to append it's own name to the fqsn, so we filter - # on keys which *do not* include that name (e.g .ib) . - bus._subscribers.setdefault(bfqsn, []) - - # send this even to subscribers to existing feed? - # deliver initial info message a first quote asap - await ctx.started(( - init_msg, - first_quotes, - )) - - if not start_stream: - log.warning(f'Not opening real-time stream for {fqsn}') - await trio.sleep_forever() - - # real-time stream loop - async with ( - ctx.open_stream() as stream, - ): - # re-send to trigger display loop cycle (necessary especially - # when the mkt is closed and no real-time messages are - # expected). - await stream.send({fqsn: first_quotes}) - - # open a bg task which receives quotes over a mem chan - # and only pushes them to the target actor-consumer at - # a max ``tick_throttle`` instantaneous rate. - if tick_throttle: - send, recv = trio.open_memory_channel(2**10) - cs = await bus.start_task( - uniform_rate_send, - tick_throttle, - recv, - stream, - ) - sub = (send, ctx, tick_throttle) - - else: - sub = (stream, ctx, tick_throttle) - - subs = bus._subscribers[bfqsn] - subs.append(sub) - - try: - uid = ctx.chan.uid - - # ctrl protocol for start/stop of quote streams based on UI - # state (eg. don't need a stream when a symbol isn't being - # displayed). - async for msg in stream: - - if msg == 'pause': - if sub in subs: - log.info( - f'Pausing {fqsn} feed for {uid}') - subs.remove(sub) - - elif msg == 'resume': - if sub not in subs: - log.info( - f'Resuming {fqsn} feed for {uid}') - subs.append(sub) - else: - raise ValueError(msg) - finally: - log.info( - f'Stopping {symbol}.{brokername} feed for {ctx.chan.uid}') - - if tick_throttle: - # TODO: a one-cancels-one nursery - # n.cancel_scope.cancel() - cs.cancel() - try: - bus._subscribers[bfqsn].remove(sub) - except ValueError: - log.warning(f'{sub} for {symbol} was already removed?') - - -@dataclass -class Feed: - ''' - A data feed for client-side interaction with far-process real-time - data sources. - - This is an thin abstraction on top of ``tractor``'s portals for - interacting with IPC streams and storage APIs (shm and time-series - db). - - ''' - name: str - hist_shm: ShmArray - rt_shm: ShmArray - mod: ModuleType - first_quotes: dict # symbol names to first quote dicts - _portal: tractor.Portal - stream: trio.abc.ReceiveChannel[dict[str, Any]] - status: dict[str, Any] + # private shm refs loaded dynamically from tokens + _hist_shm: ShmArray | None = None + _rt_shm: ShmArray | None = None + stream: tractor.MsgStream | None = None izero_hist: int = 0 izero_rt: int = 0 - - throttle_rate: Optional[int] = None - - _trade_stream: Optional[AsyncIterator[dict[str, Any]]] = None - _max_sample_rate: int = 1 - - # cache of symbol info messages received as first message when - # a stream startsc. - symbols: dict[str, Symbol] = field(default_factory=dict) + throttle_rate: int | None = None @property - def portal(self) -> tractor.Portal: - return self._portal + def rt_shm(self) -> ShmArray: + + if self._rt_shm is None: + self._rt_shm = attach_shm_array( + token=self._rt_shm_token, + readonly=True, + ) + + return self._rt_shm + + @property + def hist_shm(self) -> ShmArray: + + if self._hist_shm is None: + self._hist_shm = attach_shm_array( + token=self._hist_shm_token, + readonly=True, + ) + + return self._hist_shm async def receive(self) -> dict: return await self.stream.receive() @@ -1267,6 +960,489 @@ class Feed: ratio, ) + # TODO: get native msgspec decoding for these workinn + def to_msg(self) -> dict: + msg = self.to_dict() + msg['symbol'] = msg['symbol'].to_dict() + # can't serialize the stream object, it's + # expected you'll have a ref to it since + # this msg should be rxed on a stream on + # whatever far end IPC.. + msg.pop('stream') + return msg + + @classmethod + def from_msg(cls, msg: dict) -> dict: + symbol = Symbol(**msg.pop('symbol')) + return cls( + symbol=symbol, + **msg, + ) + + +async def allocate_persistent_feed( + bus: _FeedsBus, + + brokername: str, + symstr: str, + + loglevel: str, + start_stream: bool = True, + + task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED, + +) -> None: + ''' + Create and maintain a "feed bus" which allocates tasks for real-time + streaming and optional historical data storage per broker/data provider + backend; this normally task runs *in* a `brokerd` actor. + + If none exists, this allocates a ``_FeedsBus`` which manages the + lifetimes of streaming tasks created for each requested symbol. + + + 2 tasks are created: + - a real-time streaming task which connec + + ''' + # load backend module + try: + mod = get_brokermod(brokername) + except ImportError: + mod = get_ingestormod(brokername) + + # mem chan handed to broker backend so it can push real-time + # quotes to this task for sampling and history storage (see below). + send, quote_stream = trio.open_memory_channel(616) + + # data sync signals for both history loading and market quotes + some_data_ready = trio.Event() + feed_is_live = trio.Event() + + # establish broker backend quote stream by calling + # ``stream_quotes()``, which is a required broker backend endpoint. + init_msg, first_quote = await bus.nursery.start( + partial( + mod.stream_quotes, + send_chan=send, + feed_is_live=feed_is_live, + symbols=[symstr], + loglevel=loglevel, + ) + ) + # TODO: this is indexed by symbol for now since we've planned (for + # some time) to expect backends to handle single + # ``.stream_quotes()`` calls with multiple symbols inputs to just + # work such that a backend can do its own multiplexing if desired. + # + # Likely this will require some design changes: + # - the .started() should return some config output determining + # whether the backend does indeed multiplex multi-symbol quotes + # internally or whether separate task spawns should be done per + # symbol (as it is right now). + # - information about discovery of non-local host daemons which can + # be contacted in the case where we want to support load disti + # over multi-use clusters; eg. some new feed request is + # re-directed to another daemon cluster because the current one is + # at max capacity. + # - the same ideas ^ but when a local core is maxxed out (like how + # binance does often with hft XD + # - if a brokerd is non-local then we can't just allocate a mem + # channel here and have the brokerd write it, we instead need + # a small streaming machine around the remote feed which can then + # do the normal work of sampling and writing shm buffers + # (depending on if we want sampling done on the far end or not?) + msg = init_msg[symstr] + + # the broker-specific fully qualified symbol name, + # but ensure it is lower-cased for external use. + bfqsn = msg['fqsn'].lower() + + # true fqsn including broker/provider suffix + fqsn = '.'.join((bfqsn, brokername)) + # msg['fqsn'] = bfqsn + + symbol = Symbol.from_fqsn( + fqsn=fqsn, + info=msg, + ) + + # HISTORY storage, run 2 tasks: + # - a history loader / maintainer + # - a real-time streamer which consumers and sends new data to any + # consumers as well as writes to storage backends (as configured). + + # XXX: neither of these will raise but will cause an inf hang due to: + # https://github.com/python-trio/trio/issues/2258 + # bus.nursery.start_soon( + # await bus.start_task( + ( + izero_hist, + hist_shm, + izero_rt, + rt_shm, + ) = await bus.nursery.start( + manage_history, + mod, + bus, + fqsn, + some_data_ready, + feed_is_live, + ) + + # we hand an IPC-msg compatible shm token to the caller so it + # can read directly from the memory which will be written by + # this task. + + # msg['hist_shm_token'] = hist_shm.token + # msg['izero_hist'] = izero_hist + # msg['izero_rt'] = izero_rt + # msg['rt_shm_token'] = rt_shm.token + + # add a fqsn entry that includes the ``.`` suffix + # and an entry that includes the broker-specific fqsn (including + # any new suffixes or elements as injected by the backend). + # init_msg[fqsn] = msg + # init_msg[bfqsn] = msg + + # TODO: pretty sure we don't need this? why not just leave 1s as + # the fastest "sample period" since we'll probably always want that + # for most purposes. + # pass OHLC sample rate in seconds (be sure to use python int type) + # init_msg[symbol]['sample_rate'] = 1 #int(delay_s) + + # yield back control to starting nursery once we receive either + # some history or a real-time quote. + log.info(f'waiting on history to load: {fqsn}') + await some_data_ready.wait() + + # append ``.`` suffix to each quote symbol + # acceptable_not_fqsn_with_broker_suffix = symbol + f'.{brokername}' + + # generic_first_quotes = { + # acceptable_not_fqsn_with_broker_suffix: first_quote, + # fqsn: first_quote, + # } + + flume = Flume( + symbol=symbol, + _hist_shm_token=hist_shm.token, + _rt_shm_token=rt_shm.token, + first_quote=first_quote, + # stream=stream, + izero_hist=izero_hist, + izero_rt=izero_rt, + # throttle_rate=tick_throttle, + ) + + # for ambiguous names we simply apply the retreived + # feed to that name (for now). + bus.feeds[symstr] = bus.feeds[bfqsn] = flume + # init_msg, + # generic_first_quotes, + # ) + + # insert 1s ohlc into the increment buffer set + # to update and shift every second + sampler.ohlcv_shms.setdefault( + 1, + [] + ).append(rt_shm) + + task_status.started() + + if not start_stream: + await trio.sleep_forever() + + # begin real-time updates of shm and tsb once the feed goes live and + # the backend will indicate when real-time quotes have begun. + await feed_is_live.wait() + + # insert 1m ohlc into the increment buffer set + # to shift every 60s. + sampler.ohlcv_shms.setdefault(60, []).append(hist_shm) + + # create buffer a single incrementer task broker backend + # (aka `brokerd`) using the lowest sampler period. + if sampler.incrementers.get(_default_delay_s) is None: + await bus.start_task( + increment_ohlc_buffer, + _default_delay_s, + ) + + sum_tick_vlm: bool = init_msg.get( + 'shm_write_opts', {} + ).get('sum_tick_vlm', True) + + # NOTE: if no high-freq sampled data has (yet) been loaded, + # seed the buffer with a history datum - this is most handy + # for many backends which don't sample @ 1s OHLC but do have + # slower data such as 1m OHLC. + if not len(rt_shm.array): + rt_shm.push(hist_shm.array[-3:-1]) + ohlckeys = ['open', 'high', 'low', 'close'] + rt_shm.array[ohlckeys][-2:] = hist_shm.array['close'][-1] + rt_shm.array['volume'][-2] = 0 + + # start sample loop and shm incrementer task for OHLC style sampling + # at the above registered step periods. + try: + await sample_and_broadcast( + bus, + rt_shm, + hist_shm, + quote_stream, + brokername, + sum_tick_vlm + ) + finally: + log.warning(f'{fqsn} feed task terminated') + + +@tractor.context +async def open_feed_bus( + + ctx: tractor.Context, + brokername: str, + symbols: list[str], # normally expected to the broker-specific fqsn + + loglevel: str = 'error', + tick_throttle: Optional[float] = None, + start_stream: bool = True, + +) -> dict[ + str, # fqsn + tuple[dict, dict] # pair of dicts of the initmsg and first quotes +]: + ''' + Open a data feed "bus": an actor-persistent per-broker task-oriented + data feed registry which allows managing real-time quote streams per + symbol. + + ''' + if loglevel is None: + loglevel = tractor.current_actor().loglevel + + # XXX: required to propagate ``tractor`` loglevel to piker logging + get_console_log(loglevel or tractor.current_actor().loglevel) + + # local state sanity checks + # TODO: check for any stale shm entries for this symbol + # (after we also group them in a nice `/dev/shm/piker/` subdir). + # ensure we are who we think we are + servicename = tractor.current_actor().name + assert 'brokerd' in servicename + assert brokername in servicename + + bus = get_feed_bus(brokername) + + flumes: dict[str, Flume] = {} + for symbol in symbols: + # if no cached feed for this symbol has been created for this + # brokerd yet, start persistent stream and shm writer task in + # service nursery + entry = bus.feeds.get(symbol) + if entry is None: + # allocate a new actor-local stream bus which + # will persist for this `brokerd`'s service lifetime. + async with bus.task_lock: + await bus.nursery.start( + partial( + allocate_persistent_feed, + + bus=bus, + brokername=brokername, + # here we pass through the selected symbol in native + # "format" (i.e. upper vs. lowercase depending on + # provider). + symstr=symbol, + loglevel=loglevel, + start_stream=start_stream, + ) + ) + # TODO: we can remove this? + # assert isinstance(bus.feeds[symbol], tuple) + + # XXX: ``first_quotes`` may be outdated here if this is secondary + # subscriber + # init_msg, first_quotes = bus.feeds[symbol] + flume = bus.feeds[symbol] + # assert bus.feeds[bfqsn] is flume + + # msg = init_msg[symbol] + # bfqsn = msg['fqsn'].lower() + bfqsn = flume.symbol.key + + # true fqsn + fqsn = '.'.join([bfqsn, brokername]) + assert fqsn == flume.symbol.fqsn + # assert fqsn in first_quotes + + # broker-ambiguous symbol (provided on cli - eg. mnq.globex.ib) + # bsym = symbol + f'.{brokername}' + # assert bsym in first_quotes + + # pack for ``.started()`` sync msg + flumes[fqsn] = flume + + # we use the broker-specific fqsn (bfqsn) for + # the sampler subscription since the backend isn't (yet) + # expected to append it's own name to the fqsn, so we filter + # on keys which *do not* include that name (e.g .ib) . + bus._subscribers.setdefault(bfqsn, []) + + # send this even to subscribers to existing feed? + # deliver initial info message a first quote asap + await ctx.started(flumes) + # init_msg, + # first_quotes, + # )) + + if not start_stream: + log.warning(f'Not opening real-time stream for {fqsn}') + await trio.sleep_forever() + + # real-time stream loop + async with ( + ctx.open_stream() as stream, + ): + + local_subs: list = [] + for fqsn, flume in flumes.items(): + # re-send to trigger display loop cycle (necessary especially + # when the mkt is closed and no real-time messages are + # expected). + await stream.send({fqsn: flume.first_quote}) + + # set a common msg stream for all requested symbols + flume.stream = stream + + # Add a real-time quote subscription to feed bus: + # This ``sub`` subscriber entry is added to the feed bus set so + # that the ``sample_and_broadcast()`` task (spawned inside + # ``allocate_persistent_feed()``) will push real-time quote + # (ticks) to this new consumer. + + if tick_throttle: + flume.throttle_rate = tick_throttle + + # open a bg task which receives quotes over a mem chan + # and only pushes them to the target actor-consumer at + # a max ``tick_throttle`` instantaneous rate. + send, recv = trio.open_memory_channel(2**10) + + cs = await bus.start_task( + uniform_rate_send, + tick_throttle, + recv, + stream, + ) + # NOTE: so the ``send`` channel here is actually a swapped + # in trio mem chan which gets pushed by the normal sampler + # task but instead of being sent directly over the IPC msg + # stream it's the throttle task does the work of + # incrementally forwarding to the IPC stream at the throttle + # rate. + sub = (send, ctx, tick_throttle) + + else: + sub = (stream, ctx, tick_throttle) + + # TODO: add an api for this on the bus? + # maybe use the current task-id to key the sub list that's + # added / removed? Or maybe we can add a general + # pause-resume by sub-key api? + bus_subs = bus._subscribers[bfqsn] + bus_subs.append(sub) + local_subs.append(sub) + + try: + uid = ctx.chan.uid + + # ctrl protocol for start/stop of quote streams based on UI + # state (eg. don't need a stream when a symbol isn't being + # displayed). + async for msg in stream: + + if msg == 'pause': + for sub in local_subs: + if sub in bus_subs: + log.info( + f'Pausing {fqsn} feed for {uid}') + bus_subs.remove(sub) + + elif msg == 'resume': + for sub in local_subs: + if sub not in bus_subs: + log.info( + f'Resuming {fqsn} feed for {uid}') + bus_subs.append(sub) + else: + raise ValueError(msg) + finally: + log.info( + f'Stopping {symbol}.{brokername} feed for {ctx.chan.uid}') + + if tick_throttle: + # TODO: a one-cancels-one nursery + # n.cancel_scope.cancel() + cs.cancel() + + # drop all subs for this task from the bus + for sub in local_subs: + try: + bus._subscribers[bfqsn].remove(sub) + except ValueError: + log.warning(f'{sub} for {symbol} was already removed?') + + +# @dataclass +class Feed(Struct): + ''' + A per-provider API for client-side consumption from real-time data + (streaming) sources, normally brokers and data services. + + This is a somewhat thin abstraction on top of + a ``tractor.MsgStream`` plus associate share memory buffers which + can be read in a readers-writer-lock style IPC configuration. + + Furhter, there is direct access to slower sampled historical data through + similarly allocated shm arrays. + + ''' + # name: str + # hist_shm: ShmArray + # rt_shm: ShmArray + mod: ModuleType + _portal: tractor.Portal + # symbol names to first quote dicts + # shms: dict[str, tuple[ShmArray, Shmarray]] + flumes: dict[str, Flume] = {} + # first_quotes: dict[str, dict] = {} + streams: dict[ + str, + trio.abc.ReceiveChannel[dict[str, Any]], + ] = {} + status: dict[str, Any] + + # izero_hist: int = 0 + # izero_rt: int = 0 + # throttle_rate: Optional[int] = None + + _max_sample_rate: int = 1 + + # cache of symbol info messages received as first message when + # a stream startsc. + # symbols: dict[str, Symbol] = {} + + @property + def portal(self) -> tractor.Portal: + return self._portal + + @property + def name(self) -> str: + return self.mod.name + @acm async def install_brokerd_search( @@ -1320,118 +1496,172 @@ async def open_feed( Open a "data feed" which provides streamed real-time quotes. ''' - fqsn = fqsns[0].lower() + # fqsn = fqsns[0].lower() - brokername, key, suffix = unpack_fqsn(fqsn) - bfqsn = fqsn.replace('.' + brokername, '') + providers: dict[ModuleType, list[str]] = {} - try: - mod = get_brokermod(brokername) - except ImportError: - mod = get_ingestormod(brokername) - - # no feed for broker exists so maybe spawn a data brokerd - async with ( - - # if no `brokerd` for this backend exists yet we spawn - # and actor for one. - maybe_spawn_brokerd( - brokername, - loglevel=loglevel - ) as portal, - - # (allocate and) connect to any feed bus for this broker - portal.open_context( - open_feed_bus, - brokername=brokername, - symbol=bfqsn, - loglevel=loglevel, - start_stream=start_stream, - tick_throttle=tick_throttle, - - ) as (ctx, (init_msg, first_quotes)), - - ctx.open_stream( - # XXX: be explicit about stream backpressure since we should - # **never** overrun on feeds being too fast, which will - # pretty much always happen with HFT XD - backpressure=backpressure, - ) as stream, - - ): - init = init_msg[bfqsn] - # we can only read from shm - hist_shm = attach_shm_array( - token=init['hist_shm_token'], - readonly=True, - ) - rt_shm = attach_shm_array( - token=init['rt_shm_token'], - readonly=True, - ) - - assert fqsn in first_quotes - - feed = Feed( - name=brokername, - hist_shm=hist_shm, - rt_shm=rt_shm, - mod=mod, - first_quotes=first_quotes, - stream=stream, - _portal=portal, - status={}, - izero_hist=init['izero_hist'], - izero_rt=init['izero_rt'], - throttle_rate=tick_throttle, - ) - - # fill out "status info" that the UI can show - host, port = feed.portal.channel.raddr - if host == '127.0.0.1': - host = 'localhost' - - feed.status.update({ - 'actor_name': feed.portal.channel.uid[0], - 'host': host, - 'port': port, - 'shm': f'{humanize(feed.hist_shm._shm.size)}', - 'throttle_rate': feed.throttle_rate, - }) - feed.status.update(init_msg.pop('status', {})) - - for sym, data in init_msg.items(): - si = data['symbol_info'] - fqsn = data['fqsn'] + f'.{brokername}' - symbol = Symbol.from_fqsn( - fqsn, - info=si, - ) - - # symbol.broker_info[brokername] = si - feed.symbols[fqsn] = symbol - feed.symbols[f'{sym}.{brokername}'] = symbol - - # cast shm dtype to list... can't member why we need this - for shm_key, shm in [ - ('rt_shm_token', rt_shm), - ('hist_shm_token', hist_shm), - ]: - shm_token = data[shm_key] - - # XXX: msgspec won't relay through the tuples XD - shm_token['dtype_descr'] = tuple( - map(tuple, shm_token['dtype_descr'])) - - assert shm_token == shm.token # sanity - - feed._max_sample_rate = 1 + for fqsn in fqsns: + brokername, key, suffix = unpack_fqsn(fqsn) + bfqsn = fqsn.replace('.' + brokername, '') try: - yield feed - finally: - # drop the infinite stream connection - await ctx.cancel() + mod = get_brokermod(brokername) + except ImportError: + mod = get_ingestormod(brokername) + + # built a per-provider map to instrument names + providers.setdefault(mod, []).append(bfqsn) + + # one actor per brokerd for now + brokerd_ctxs = [] + + for brokermod, bfqsns in providers.items(): + + # if no `brokerd` for this backend exists yet we spawn + # a daemon actor for it. + brokerd_ctxs.append( + maybe_spawn_brokerd( + brokermod.name, + loglevel=loglevel + ) + ) + + portals: tuple[tractor.Portal] + async with gather_contexts( + brokerd_ctxs, + ) as portals: + + bus_ctxs = [] + for ( + portal, + (brokermod, bfqsns), + ) in zip(portals, providers.items()): + + feed = Feed( + mod=brokermod, + _portal=portal, + status={}, + ) + # fill out "status info" that the UI can show + host, port = feed.portal.channel.raddr + if host == '127.0.0.1': + host = 'localhost' + + feed.status.update({ + 'actor_name': feed.portal.channel.uid[0], + 'host': host, + 'port': port, + # 'shm': f'{humanize(feed.hist_shm._shm.size)}', + # 'throttle_rate': feed.throttle_rate, + }) + # feed.status.update(init_msg.pop('status', {})) + + # (allocate and) connect to any feed bus for this broker + bus_ctxs.append( + portal.open_context( + open_feed_bus, + brokername=brokername, + symbols=bfqsns, + loglevel=loglevel, + start_stream=start_stream, + tick_throttle=tick_throttle, + ) + ) + + async with ( + gather_contexts(bus_ctxs) as ctxs, + ): + for ( + (ctx, flumes_msg_dict), + (brokermod, bfqsns), + ) in zip(ctxs, providers.items()): + + stream_ctxs = [] + for fqsn, flume_msg in flumes_msg_dict.items(): + flume = Flume.from_msg(flume_msg) + assert flume.symbol.fqsn == fqsn + feed.flumes[fqsn] = flume + + # TODO: this is ugly but eventually we could + # in theory do all this "tabling" of flumes on + # the brokerd-side, in which case we'll likely + # want to make each flume IPC-msg-native? + # bfqsn = list(init_msgs)[0] + # init = init_msg[bfqsn] + + # si = data['symbol_info'] + # fqsn = data['fqsn'] + f'.{brokername}' + # symbol = Symbol.from_fqsn( + # fqsn, + # info=si, + # ) + + # attach and cache shm handles + rt_shm = flume.rt_shm + assert rt_shm + hist_shm = flume.hist_shm + assert hist_shm + + stream_ctxs.append( + ctx.open_stream( + # XXX: be explicit about stream backpressure since we should + # **never** overrun on feeds being too fast, which will + # pretty much always happen with HFT XD + backpressure=backpressure, + ) + ) + + async with ( + gather_contexts(stream_ctxs) as streams, + ): + for ( + stream, + (brokermod, bfqsns), + ) in zip(streams, providers.items()): + + for bfqsn in bfqsns: + fqsn = '.'.join((bfqsn, brokermod.name)) + + # apply common rt steam to each flume + # (normally one per broker) + feed.flumes[fqsn].stream = stream + feed.streams[brokermod.name] = stream + + try: + yield feed + finally: + # drop the infinite stream connection + await ctx.cancel() + + # we can only read from shm + # hist_shm = attach_shm_array( + # token=init['hist_shm_token'], + # readonly=True, + # ) + # rt_shm = attach_shm_array( + # token=init['rt_shm_token'], + # readonly=True, + # ) + + # for sym, data in init_msg.items(): + + # symbol.broker_info[brokername] = si + # feed.symbols[fqsn] = symbol + # feed.symbols[f'{sym}.{brokername}'] = symbol + + # cast shm dtype to list... can't member why we need this + # for shm_key, shm in [ + # ('rt_shm_token', rt_shm), + # ('hist_shm_token', hist_shm), + # ]: + # shm_token = flume[shm_key] + + # # XXX: msgspec won't relay through the tuples XD + # shm_token['dtype_descr'] = tuple( + # map(tuple, shm_token['dtype_descr'])) + + # assert shm_token == shm.token # sanity + # assert fqsn in first_quotes @acm diff --git a/tests/test_feeds.py b/tests/test_feeds.py index 3c1104a9..6b91838c 100644 --- a/tests/test_feeds.py +++ b/tests/test_feeds.py @@ -5,6 +5,7 @@ Data feed layer APIs, performance, msg throttling. from pprint import pprint import pytest +import tractor import trio from piker import ( open_piker_runtime, @@ -16,7 +17,7 @@ from piker.data import ShmArray @pytest.mark.parametrize( 'fqsns', [ - ['btcusdt.binance'] + ['btcusdt.binance', 'ethusdt.binance'] ], ids=lambda param: f'fqsns={param}', ) @@ -30,7 +31,13 @@ def test_basic_rt_feed( ''' async def main(): async with ( - open_piker_runtime('test_basic_rt_feed'), + open_piker_runtime( + 'test_basic_rt_feed', + # XXX tractor BUG: this doesn't translate through to the + # ``tractor._state._runtimevars``... + registry_addr=('127.0.0.1', 6666), + debug_mode=True, + ), open_feed( fqsns, loglevel='info', @@ -42,24 +49,38 @@ def test_basic_rt_feed( ) as feed ): + # verify shm buffers exist for fqin in fqsns: - assert feed.symbols[fqin] + flume = feed.flumes[fqin] + ohlcv: ShmArray = flume.rt_shm + hist_ohlcv: ShmArray = flume.hist_shm - ohlcv: ShmArray = feed.rt_shm - hist_ohlcv: ShmArray = feed.hist_shm + quote_count: int = 0 + stream = feed.streams['binance'] + async for quotes in stream: + for fqsn, quote in quotes.items(): - count: int = 0 - async for quotes in feed.stream: + # await tractor.breakpoint() + flume = feed.flumes[fqsn] + ohlcv: ShmArray = flume.rt_shm + hist_ohlcv: ShmArray = flume.hist_shm - # print quote msg, rt and history - # buffer values on console. - pprint(quotes) - pprint(ohlcv.array[-1]) - pprint(hist_ohlcv.array[-1]) + # print quote msg, rt and history + # buffer values on console. + rt_row = ohlcv.array[-1] + hist_row = hist_ohlcv.array[-1] + # last = quote['last'] - if count >= 100: + # assert last == rt_row['close'] + # assert last == hist_row['close'] + pprint( + f'{fqsn}: {quote}\n' + f'rt_ohlc: {rt_row}\n' + f'hist_ohlc: {hist_row}\n' + ) + quote_count += 1 + + if quote_count >= 100: break - count += 1 - trio.run(main) From e7de5404d3722ab88ab9c106dc339af5b0cabd11 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 9 Nov 2022 12:58:32 -0500 Subject: [PATCH 11/49] Add `Symbol.fqsn: str` property --- piker/data/_source.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/piker/data/_source.py b/piker/data/_source.py index 73c218ca..9f0b35b0 100644 --- a/piker/data/_source.py +++ b/piker/data/_source.py @@ -217,6 +217,10 @@ class Symbol(Struct): else: return (key, broker) + @property + def fqsn(self) -> str: + return '.'.join(self.tokens()).lower() + def front_fqsn(self) -> str: ''' fqsn = "fully qualified symbol name" From 32b36aa0427541b51b5251ffe147203628154c77 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 9 Nov 2022 13:03:53 -0500 Subject: [PATCH 12/49] Expect init startup quotes from each symbol --- tests/test_feeds.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/test_feeds.py b/tests/test_feeds.py index 6b91838c..b0b97690 100644 --- a/tests/test_feeds.py +++ b/tests/test_feeds.py @@ -5,7 +5,7 @@ Data feed layer APIs, performance, msg throttling. from pprint import pprint import pytest -import tractor +# import tractor import trio from piker import ( open_piker_runtime, @@ -37,6 +37,7 @@ def test_basic_rt_feed( # ``tractor._state._runtimevars``... registry_addr=('127.0.0.1', 6666), debug_mode=True, + loglevel='runtime', ), open_feed( fqsns, @@ -55,8 +56,20 @@ def test_basic_rt_feed( ohlcv: ShmArray = flume.rt_shm hist_ohlcv: ShmArray = flume.hist_shm + # stream some ticks and ensure we see data from both symbol + # subscriptions. quote_count: int = 0 stream = feed.streams['binance'] + + # pull the first couple startup quotes and ensure + # they match the history buffer last entries. + for _ in range(1): + first_quotes = await stream.receive() + for fqsn, quote in first_quotes.items(): + assert fqsn in fqsns + flume = feed.flumes[fqsn] + assert quote['last'] == flume.first_quote['last'] + async for quotes in stream: for fqsn, quote in quotes.items(): From 25bfe6f035edd6bb556a5b91d330cbcc837ca2f1 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 9 Nov 2022 13:05:51 -0500 Subject: [PATCH 13/49] Use new |-union style type annots in sampling routines --- piker/data/_sampling.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/piker/data/_sampling.py b/piker/data/_sampling.py index 015de05e..1a410309 100644 --- a/piker/data/_sampling.py +++ b/piker/data/_sampling.py @@ -22,7 +22,10 @@ financial data flows. from __future__ import annotations from collections import Counter import time -from typing import TYPE_CHECKING, Optional, Union +from typing import ( + TYPE_CHECKING, + Union, +) import tractor import trio @@ -147,7 +150,7 @@ async def increment_ohlc_buffer( async def broadcast( delay_s: int, - shm: Optional[ShmArray] = None, + shm: ShmArray | None = None, ) -> None: ''' @@ -241,6 +244,8 @@ async def sample_and_broadcast( # iterate stream delivered by broker async for quotes in quote_stream: + # print(quotes) + # TODO: ``numba`` this! for broker_symbol, quote in quotes.items(): # TODO: in theory you can send the IPC msg *before* writing @@ -314,7 +319,7 @@ async def sample_and_broadcast( tuple[ Union[tractor.MsgStream, trio.MemorySendChannel], tractor.Context, - Optional[float], # tick throttle in Hz + float | None, # tick throttle in Hz ] ] = bus._subscribers[broker_symbol.lower()] From bb6452b969b7c8ddb89370bf23b8cbf93d631904 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 9 Nov 2022 18:57:15 -0500 Subject: [PATCH 14/49] Further feed syncing fixes wrt to `Flumes` Sync per-symbol sampler loop start to subscription registers such that the loop can't start until the consumer's stream subscription is added; the task-sync uses a `trio.Event`. This patch also drops a ton of commented cruft. Further adjustments needed to get parity with prior functionality: - pass init msg 'symbol_info' field to the `Symbol.broker_info: dict`. - ensure the `_FeedsBus._subscriptions` table uses the broker specific (without brokername suffix) as keys for lookup so that the sampler loop doesn't have to append in the brokername as a suffix. - ensure the `open_feed_bus()` flumes-table-msg returned sent by `tractor.Context.started()` uses the `.to_msg()` form of all flume structs. - ensure `maybe_open_feed()` uses `tractor.MsgStream.subscribe()` on all `Flume.stream`s on cache hits using the `tractor.trionics.gather_contexts()` helper. --- piker/data/feed.py | 229 +++++++++++++++----------------------------- tests/test_feeds.py | 35 ++++--- 2 files changed, 103 insertions(+), 161 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index e87c00be..1a5eba0c 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -22,10 +22,6 @@ This module is enabled for ``brokerd`` daemons. """ from __future__ import annotations from contextlib import asynccontextmanager as acm -# from dataclasses import ( -# dataclass, -# field, -# ) from datetime import datetime from functools import partial from types import ModuleType @@ -36,7 +32,6 @@ from typing import ( Callable, Optional, Awaitable, - Sequence, TYPE_CHECKING, Union, ) @@ -243,7 +238,7 @@ def diff_history( time = array['time'] to_push = array[time >= last_tsdb_dt.timestamp()] - log.info( + log.debug( f'Pushing partial frame {to_push.size} to shm' ) @@ -359,7 +354,7 @@ async def start_backfill( # last retrieved start dt to the next request as # it's end dt. while start_dt > last_tsdb_dt: - log.info( + log.debug( f'Requesting {step_size_s}s frame ending in {start_dt}' ) @@ -721,13 +716,18 @@ async def manage_history( ''' - from tractor._state import _runtime_vars - port = _runtime_vars['_root_mailbox'][1] + # TODO: is there a way to make each shm file key + # actor-tree-discovery-addr unique so we avoid collisions + # when doing tests which also allocate shms for certain instruments + # that may be in use on the system by some other running daemons? + # from tractor._state import _runtime_vars + # port = _runtime_vars['_root_mailbox'][1] # (maybe) allocate shm array for this broker/symbol which will # be used for fast near-term history capture and processing. hist_shm, opened = maybe_open_shm_array( - key=f'{fqsn}_hist', #_p{port}', + # key=f'{fqsn}_hist_p{port}', + key=f'{fqsn}_hist', # use any broker defined ohlc dtype: dtype=getattr(mod, '_ohlc_dtype', base_iohlc_dtype), @@ -744,7 +744,8 @@ async def manage_history( ) rt_shm, opened = maybe_open_shm_array( - key=f'{fqsn}_rt', #_p{port}', + # key=f'{fqsn}_rt_p{port}', + key=f'{fqsn}_rt', # use any broker defined ohlc dtype: dtype=getattr(mod, '_ohlc_dtype', base_iohlc_dtype), @@ -874,6 +875,7 @@ class Flume(Struct): izero_hist: int = 0 izero_rt: int = 0 throttle_rate: int | None = None + feed: Feed | None = None @property def rt_shm(self) -> ShmArray: @@ -907,12 +909,15 @@ class Flume(Struct): ) -> AsyncIterator[int]: + if not self.feed: + raise RuntimeError('This flume is not part of any ``Feed``?') + # XXX: this should be singleton on a host, # a lone broker-daemon per provider should be # created for all practical purposes async with maybe_open_context( acm_func=partial( - self.portal.open_context, + self.feed.portal.open_context, iter_ohlc_periods, ), kwargs={'delay_s': delay_s}, @@ -964,11 +969,12 @@ class Flume(Struct): def to_msg(self) -> dict: msg = self.to_dict() msg['symbol'] = msg['symbol'].to_dict() - # can't serialize the stream object, it's - # expected you'll have a ref to it since - # this msg should be rxed on a stream on - # whatever far end IPC.. + + # can't serialize the stream or feed objects, it's expected + # you'll have a ref to it since this msg should be rxed on + # a stream on whatever far end IPC.. msg.pop('stream') + msg.pop('feed') return msg @classmethod @@ -982,6 +988,7 @@ class Flume(Struct): async def allocate_persistent_feed( bus: _FeedsBus, + sub_registered: trio.Event, brokername: str, symstr: str, @@ -1064,8 +1071,9 @@ async def allocate_persistent_feed( symbol = Symbol.from_fqsn( fqsn=fqsn, - info=msg, + info=msg['symbol_info'], ) + assert symbol.type_key # HISTORY storage, run 2 tasks: # - a history loader / maintainer @@ -1090,46 +1098,16 @@ async def allocate_persistent_feed( feed_is_live, ) - # we hand an IPC-msg compatible shm token to the caller so it - # can read directly from the memory which will be written by - # this task. - - # msg['hist_shm_token'] = hist_shm.token - # msg['izero_hist'] = izero_hist - # msg['izero_rt'] = izero_rt - # msg['rt_shm_token'] = rt_shm.token - - # add a fqsn entry that includes the ``.`` suffix - # and an entry that includes the broker-specific fqsn (including - # any new suffixes or elements as injected by the backend). - # init_msg[fqsn] = msg - # init_msg[bfqsn] = msg - - # TODO: pretty sure we don't need this? why not just leave 1s as - # the fastest "sample period" since we'll probably always want that - # for most purposes. - # pass OHLC sample rate in seconds (be sure to use python int type) - # init_msg[symbol]['sample_rate'] = 1 #int(delay_s) - # yield back control to starting nursery once we receive either # some history or a real-time quote. log.info(f'waiting on history to load: {fqsn}') await some_data_ready.wait() - # append ``.`` suffix to each quote symbol - # acceptable_not_fqsn_with_broker_suffix = symbol + f'.{brokername}' - - # generic_first_quotes = { - # acceptable_not_fqsn_with_broker_suffix: first_quote, - # fqsn: first_quote, - # } - flume = Flume( symbol=symbol, _hist_shm_token=hist_shm.token, _rt_shm_token=rt_shm.token, first_quote=first_quote, - # stream=stream, izero_hist=izero_hist, izero_rt=izero_rt, # throttle_rate=tick_throttle, @@ -1138,9 +1116,6 @@ async def allocate_persistent_feed( # for ambiguous names we simply apply the retreived # feed to that name (for now). bus.feeds[symstr] = bus.feeds[bfqsn] = flume - # init_msg, - # generic_first_quotes, - # ) # insert 1s ohlc into the increment buffer set # to update and shift every second @@ -1184,9 +1159,14 @@ async def allocate_persistent_feed( rt_shm.array[ohlckeys][-2:] = hist_shm.array['close'][-1] rt_shm.array['volume'][-2] = 0 + # wait the spawning parent task to register its subscriber + # send-stream entry before we start the sample loop. + await sub_registered.wait() + # start sample loop and shm incrementer task for OHLC style sampling # at the above registered step periods. try: + log.info(f'Starting sampler task for {fqsn}') await sample_and_broadcast( bus, rt_shm, @@ -1235,14 +1215,16 @@ async def open_feed_bus( assert brokername in servicename bus = get_feed_bus(brokername) + sub_registered = trio.Event() flumes: dict[str, Flume] = {} + for symbol in symbols: # if no cached feed for this symbol has been created for this # brokerd yet, start persistent stream and shm writer task in # service nursery - entry = bus.feeds.get(symbol) - if entry is None: + flume = bus.feeds.get(symbol) + if flume is None: # allocate a new actor-local stream bus which # will persist for this `brokerd`'s service lifetime. async with bus.task_lock: @@ -1251,6 +1233,7 @@ async def open_feed_bus( allocate_persistent_feed, bus=bus, + sub_registered=sub_registered, brokername=brokername, # here we pass through the selected symbol in native # "format" (i.e. upper vs. lowercase depending on @@ -1263,24 +1246,17 @@ async def open_feed_bus( # TODO: we can remove this? # assert isinstance(bus.feeds[symbol], tuple) - # XXX: ``first_quotes`` may be outdated here if this is secondary + # XXX: ``.first_quote`` may be outdated here if this is secondary # subscriber - # init_msg, first_quotes = bus.feeds[symbol] flume = bus.feeds[symbol] - # assert bus.feeds[bfqsn] is flume + sym = flume.symbol + bfqsn = sym.key + fqsn = sym.fqsn # true fqsn + assert bfqsn in fqsn and brokername in fqsn - # msg = init_msg[symbol] - # bfqsn = msg['fqsn'].lower() - bfqsn = flume.symbol.key - - # true fqsn - fqsn = '.'.join([bfqsn, brokername]) - assert fqsn == flume.symbol.fqsn - # assert fqsn in first_quotes - - # broker-ambiguous symbol (provided on cli - eg. mnq.globex.ib) - # bsym = symbol + f'.{brokername}' - # assert bsym in first_quotes + if sym.suffix: + bfqsn = fqsn.rstrip(f'.{brokername}') + log.warning(f'{brokername} expanded symbol {symbol} -> {bfqsn}') # pack for ``.started()`` sync msg flumes[fqsn] = flume @@ -1290,13 +1266,12 @@ async def open_feed_bus( # expected to append it's own name to the fqsn, so we filter # on keys which *do not* include that name (e.g .ib) . bus._subscribers.setdefault(bfqsn, []) + # await tractor.breakpoint() - # send this even to subscribers to existing feed? - # deliver initial info message a first quote asap - await ctx.started(flumes) - # init_msg, - # first_quotes, - # )) + # sync feed subscribers with flume handles + await ctx.started( + {fqsn: flume.to_msg() for fqsn, flume in flumes.items()} + ) if not start_stream: log.warning(f'Not opening real-time stream for {fqsn}') @@ -1352,10 +1327,13 @@ async def open_feed_bus( # maybe use the current task-id to key the sub list that's # added / removed? Or maybe we can add a general # pause-resume by sub-key api? + bfqsn = fqsn.rstrip(f'.{brokername}') bus_subs = bus._subscribers[bfqsn] bus_subs.append(sub) local_subs.append(sub) + sub_registered.set() + try: uid = ctx.chan.uid @@ -1396,7 +1374,6 @@ async def open_feed_bus( log.warning(f'{sub} for {symbol} was already removed?') -# @dataclass class Feed(Struct): ''' A per-provider API for client-side consumption from real-time data @@ -1410,31 +1387,17 @@ class Feed(Struct): similarly allocated shm arrays. ''' - # name: str - # hist_shm: ShmArray - # rt_shm: ShmArray mod: ModuleType _portal: tractor.Portal - # symbol names to first quote dicts - # shms: dict[str, tuple[ShmArray, Shmarray]] flumes: dict[str, Flume] = {} - # first_quotes: dict[str, dict] = {} streams: dict[ str, trio.abc.ReceiveChannel[dict[str, Any]], ] = {} status: dict[str, Any] - # izero_hist: int = 0 - # izero_rt: int = 0 - # throttle_rate: Optional[int] = None - _max_sample_rate: int = 1 - # cache of symbol info messages received as first message when - # a stream startsc. - # symbols: dict[str, Symbol] = {} - @property def portal(self) -> tractor.Portal: return self._portal @@ -1496,8 +1459,6 @@ async def open_feed( Open a "data feed" which provides streamed real-time quotes. ''' - # fqsn = fqsns[0].lower() - providers: dict[ModuleType, list[str]] = {} for fqsn in fqsns: @@ -1531,7 +1492,7 @@ async def open_feed( brokerd_ctxs, ) as portals: - bus_ctxs = [] + bus_ctxs: list[AsyncContextManager] = [] for ( portal, (brokermod, bfqsns), @@ -1551,8 +1512,9 @@ async def open_feed( 'actor_name': feed.portal.channel.uid[0], 'host': host, 'port': port, - # 'shm': f'{humanize(feed.hist_shm._shm.size)}', - # 'throttle_rate': feed.throttle_rate, + 'hist_shm': 'NA', + 'rt_shm': 'NA', + 'throttle_rate': tick_throttle, }) # feed.status.update(init_msg.pop('status', {})) @@ -1571,6 +1533,7 @@ async def open_feed( async with ( gather_contexts(bus_ctxs) as ctxs, ): + remote_scopes = [] for ( (ctx, flumes_msg_dict), (brokermod, bfqsns), @@ -1581,20 +1544,7 @@ async def open_feed( flume = Flume.from_msg(flume_msg) assert flume.symbol.fqsn == fqsn feed.flumes[fqsn] = flume - - # TODO: this is ugly but eventually we could - # in theory do all this "tabling" of flumes on - # the brokerd-side, in which case we'll likely - # want to make each flume IPC-msg-native? - # bfqsn = list(init_msgs)[0] - # init = init_msg[bfqsn] - - # si = data['symbol_info'] - # fqsn = data['fqsn'] + f'.{brokername}' - # symbol = Symbol.from_fqsn( - # fqsn, - # info=si, - # ) + flume.feed = feed # attach and cache shm handles rt_shm = flume.rt_shm @@ -1602,11 +1552,18 @@ async def open_feed( hist_shm = flume.hist_shm assert hist_shm + feed.status['hist_shm'] = ( + f'{humanize(hist_shm._shm.size)}' + ) + feed.status['rt_shm'] = f'{humanize(rt_shm._shm.size)}' + + remote_scopes.append(ctx) stream_ctxs.append( ctx.open_stream( - # XXX: be explicit about stream backpressure since we should - # **never** overrun on feeds being too fast, which will - # pretty much always happen with HFT XD + # XXX: be explicit about stream backpressure + # since we should **never** overrun on feeds + # being too fast, which will pretty much + # always happen with HFT XD backpressure=backpressure, ) ) @@ -1619,49 +1576,15 @@ async def open_feed( (brokermod, bfqsns), ) in zip(streams, providers.items()): - for bfqsn in bfqsns: - fqsn = '.'.join((bfqsn, brokermod.name)) + # for bfqsn in bfqsns: + for fqsn in flumes_msg_dict: # apply common rt steam to each flume # (normally one per broker) feed.flumes[fqsn].stream = stream feed.streams[brokermod.name] = stream - try: - yield feed - finally: - # drop the infinite stream connection - await ctx.cancel() - - # we can only read from shm - # hist_shm = attach_shm_array( - # token=init['hist_shm_token'], - # readonly=True, - # ) - # rt_shm = attach_shm_array( - # token=init['rt_shm_token'], - # readonly=True, - # ) - - # for sym, data in init_msg.items(): - - # symbol.broker_info[brokername] = si - # feed.symbols[fqsn] = symbol - # feed.symbols[f'{sym}.{brokername}'] = symbol - - # cast shm dtype to list... can't member why we need this - # for shm_key, shm in [ - # ('rt_shm_token', rt_shm), - # ('hist_shm_token', hist_shm), - # ]: - # shm_token = flume[shm_key] - - # # XXX: msgspec won't relay through the tuples XD - # shm_token['dtype_descr'] = tuple( - # map(tuple, shm_token['dtype_descr'])) - - # assert shm_token == shm.token # sanity - # assert fqsn in first_quotes + yield feed @acm @@ -1703,7 +1626,13 @@ async def maybe_open_feed( log.info(f'Using cached feed for {fqsn}') # add a new broadcast subscription for the quote stream # if this feed is likely already in use - async with feed.stream.subscribe() as bstream: - yield feed, bstream + + async with gather_contexts( + mngrs=[stream.subscribe() for stream in feed.streams.values()] + ) as bstreams: + for bstream, flume in zip(bstreams, feed.flumes.values()): + flume.stream = bstream + + yield feed else: - yield feed, feed.stream + yield feed diff --git a/tests/test_feeds.py b/tests/test_feeds.py index b0b97690..2fb5f693 100644 --- a/tests/test_feeds.py +++ b/tests/test_feeds.py @@ -2,10 +2,11 @@ Data feed layer APIs, performance, msg throttling. ''' +from collections import Counter from pprint import pprint import pytest -# import tractor +import tractor import trio from piker import ( open_piker_runtime, @@ -17,12 +18,12 @@ from piker.data import ShmArray @pytest.mark.parametrize( 'fqsns', [ - ['btcusdt.binance', 'ethusdt.binance'] + {'btcusdt.binance', 'ethusdt.binance'} ], ids=lambda param: f'fqsns={param}', ) def test_basic_rt_feed( - fqsns: list[str], + fqsns: set[str], ): ''' Start a real-time data feed for provided fqsn and pull @@ -33,11 +34,12 @@ def test_basic_rt_feed( async with ( open_piker_runtime( 'test_basic_rt_feed', + # XXX tractor BUG: this doesn't translate through to the # ``tractor._state._runtimevars``... registry_addr=('127.0.0.1', 6666), - debug_mode=True, - loglevel='runtime', + + # debug_mode=True, ), open_feed( fqsns, @@ -58,20 +60,29 @@ def test_basic_rt_feed( # stream some ticks and ensure we see data from both symbol # subscriptions. - quote_count: int = 0 stream = feed.streams['binance'] - # pull the first couple startup quotes and ensure - # they match the history buffer last entries. + # pull the first startup quotes, one for each fqsn, and + # ensure they match each flume's startup quote value. + fqsns_copy = fqsns.copy() for _ in range(1): first_quotes = await stream.receive() for fqsn, quote in first_quotes.items(): - assert fqsn in fqsns + + # XXX: TODO: WTF apparently this error will get + # supressed and only show up in the teardown + # excgroup if we don't have the fix from + # + # assert 0 + + fqsns_copy.remove(fqsn) flume = feed.flumes[fqsn] assert quote['last'] == flume.first_quote['last'] + cntr = Counter() async for quotes in stream: for fqsn, quote in quotes.items(): + cntr[fqsn] += 1 # await tractor.breakpoint() flume = feed.flumes[fqsn] @@ -91,9 +102,11 @@ def test_basic_rt_feed( f'rt_ohlc: {rt_row}\n' f'hist_ohlc: {hist_row}\n' ) - quote_count += 1 - if quote_count >= 100: + if cntr.total() >= 100: break + # await tractor.breakpoint() + assert set(cntr.keys()) == fqsns + trio.run(main) From 7daab6329d612007a1e631d92a53b38b0bdc0ccf Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 9 Nov 2022 22:10:01 -0500 Subject: [PATCH 15/49] Make `Symbol` derive from internal `.types.Struct` --- piker/data/_sampling.py | 1 - piker/data/_source.py | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/piker/data/_sampling.py b/piker/data/_sampling.py index 1a410309..edba3e1f 100644 --- a/piker/data/_sampling.py +++ b/piker/data/_sampling.py @@ -331,7 +331,6 @@ async def sample_and_broadcast( lags: int = 0 for (stream, ctx, tick_throttle) in subs: - try: with trio.move_on_after(0.2) as cs: if tick_throttle: diff --git a/piker/data/_source.py b/piker/data/_source.py index 9f0b35b0..87ba74a3 100644 --- a/piker/data/_source.py +++ b/piker/data/_source.py @@ -23,7 +23,8 @@ import decimal from bidict import bidict import numpy as np -from msgspec import Struct + +from .types import Struct # from numba import from_dtype From 2c4daf08e0f7e60514fc1e3add57d82da34f56af Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 9 Nov 2022 22:30:15 -0500 Subject: [PATCH 16/49] Adjust to per-fqsn-oriented `Flume` lookups throughout --- piker/clearing/_ems.py | 23 ++++++++++------- piker/clearing/_paper_engine.py | 2 +- piker/fsp/_engine.py | 23 ++++++++--------- piker/ui/_chart.py | 6 +++-- piker/ui/_display.py | 44 +++++++++++++++++++-------------- piker/ui/_position.py | 15 ++++++----- piker/ui/order_mode.py | 14 ++++++----- 7 files changed, 73 insertions(+), 54 deletions(-) diff --git a/piker/clearing/_ems.py b/piker/clearing/_ems.py index 2147369b..b478abcc 100644 --- a/piker/clearing/_ems.py +++ b/piker/clearing/_ems.py @@ -48,6 +48,7 @@ from ..data._source import ( ) from ..data.feed import ( Feed, + Flume, maybe_open_feed, ) from ..ui._notify import notify_from_ems_status_msg @@ -523,13 +524,14 @@ class Router(Struct): maybe_open_feed( [fqsn], loglevel=loglevel, - ) as (feed, quote_stream), + ) as feed, ): brokermod = feed.mod broker = brokermod.name # XXX: this should be initial price quote from target provider - first_quote: dict = feed.first_quotes[fqsn] + flume = feed.flumes[fqsn] + first_quote: dict = flume.first_quote book: DarkBook = self.get_dark_book(broker) book.lasts[fqsn]: float = first_quote['last'] @@ -547,14 +549,16 @@ class Router(Struct): clear_dark_triggers, self, relay.brokerd_stream, - quote_stream, + flume.stream, broker, fqsn, # form: ... book ) client_ready = trio.Event() - task_status.started((relay, feed, client_ready)) + task_status.started( + (relay, feed, client_ready) + ) # sync to the client side by waiting for the stream # connection setup before relaying any existing live @@ -1014,7 +1018,7 @@ async def process_client_order_cmds( brokerd_order_stream: tractor.MsgStream, fqsn: str, - feed: Feed, + flume: Flume, dark_book: DarkBook, router: Router, @@ -1212,7 +1216,7 @@ async def process_client_order_cmds( 'size': size, 'exec_mode': exec_mode, 'action': action, - 'brokers': brokers, # list + 'brokers': _, # list } if ( # "DARK" triggers # submit order to local EMS book and scan loop, @@ -1234,12 +1238,12 @@ async def process_client_order_cmds( # sometimes the real-time feed hasn't come up # so just pull from the latest history. if isnan(last): - last = feed.rt_shm.array[-1]['close'] + last = flume.rt_shm.array[-1]['close'] pred = mk_check(trigger_price, last, action) spread_slap: float = 5 - min_tick = feed.symbols[fqsn].tick_size + min_tick = flume.symbol.tick_size if action == 'buy': tickfilter = ('ask', 'last', 'trade') @@ -1452,11 +1456,12 @@ async def _emsd_main( # start inbound (from attached client) order request processing # main entrypoint, run here until cancelled. try: + flume = feed.flumes[fqsn] await process_client_order_cmds( client_stream, brokerd_stream, fqsn, - feed, + flume, dark_book, _router, ) diff --git a/piker/clearing/_paper_engine.py b/piker/clearing/_paper_engine.py index 211a29fc..33ca5761 100644 --- a/piker/clearing/_paper_engine.py +++ b/piker/clearing/_paper_engine.py @@ -578,7 +578,7 @@ async def trades_dialogue( ) # paper engine simulator clearing task - await simulate_fills(feed.stream, client) + await simulate_fills(feed.streams[broker], client) @asynccontextmanager diff --git a/piker/fsp/_engine.py b/piker/fsp/_engine.py index 084ff510..eb5eaff4 100644 --- a/piker/fsp/_engine.py +++ b/piker/fsp/_engine.py @@ -26,7 +26,6 @@ from typing import ( ) import numpy as np -import pyqtgraph as pg import trio from trio_typing import TaskStatus import tractor @@ -35,7 +34,9 @@ from tractor.msg import NamespacePath from ..log import get_logger, get_console_log from .. import data from ..data import attach_shm_array -from ..data.feed import Feed +from ..data.feed import ( + Flume, +) from ..data._sharedmem import ShmArray from ..data._sampling import _default_delay_s from ..data._source import Symbol @@ -79,7 +80,7 @@ async def filter_quotes_by_sym( async def fsp_compute( symbol: Symbol, - feed: Feed, + flume: Flume, quote_stream: trio.abc.ReceiveChannel, src: ShmArray, @@ -107,7 +108,7 @@ async def fsp_compute( filter_quotes_by_sym(fqsn, quote_stream), # XXX: currently the ``ohlcv`` arg - feed.rt_shm, + flume.rt_shm, ) # Conduct a single iteration of fsp with historical bars input @@ -310,12 +311,12 @@ async def cascade( # needs to get throttled the ticks we generate. # tick_throttle=60, - ) as (feed, quote_stream): - symbol = feed.symbols[fqsn] + ) as feed: + flume = feed.flumes[fqsn] + symbol = flume.symbol + assert src.token == flume.rt_shm.token profiler(f'{func}: feed up') - - assert src.token == feed.rt_shm.token # last_len = new_len = len(src.array) func_name = func.__name__ @@ -327,8 +328,8 @@ async def cascade( fsp_compute, symbol=symbol, - feed=feed, - quote_stream=quote_stream, + flume=flume, + quote_stream=flume.stream, # shm src=src, @@ -430,7 +431,7 @@ async def cascade( # Increment the underlying shared memory buffer on every # "increment" msg received from the underlying data feed. - async with feed.index_stream( + async with flume.index_stream( int(delay_s) ) as istream: diff --git a/piker/ui/_chart.py b/piker/ui/_chart.py index f61ed1d7..93b41095 100644 --- a/piker/ui/_chart.py +++ b/piker/ui/_chart.py @@ -915,14 +915,16 @@ class ChartPlotWidget(pg.PlotWidget): def resume_all_feeds(self): try: for feed in self._feeds.values(): - self.linked.godwidget._root_n.start_soon(feed.resume) + for flume in feed.flumes.values(): + self.linked.godwidget._root_n.start_soon(flume.resume) except RuntimeError: # TODO: cancel the qtractor runtime here? raise def pause_all_feeds(self): for feed in self._feeds.values(): - self.linked.godwidget._root_n.start_soon(feed.pause) + for flume in feed.flumes.values(): + self.linked.godwidget._root_n.start_soon(flume.pause) @property def view(self) -> ChartView: diff --git a/piker/ui/_display.py b/piker/ui/_display.py index af4f8361..c7ed9299 100644 --- a/piker/ui/_display.py +++ b/piker/ui/_display.py @@ -33,6 +33,7 @@ import pyqtgraph as pg from ..data.feed import ( open_feed, Feed, + Flume, ) from ..data.types import Struct from ._axes import YAxisLabel @@ -228,7 +229,7 @@ async def graphics_update_loop( nurse: trio.Nursery, godwidget: GodWidget, - feed: Feed, + flume: Flume, wap_in_history: bool = False, vlm_chart: Optional[ChartPlotWidget] = None, @@ -255,8 +256,8 @@ async def graphics_update_loop( fast_chart = linked.chart hist_chart = godwidget.hist_linked.chart - ohlcv = feed.rt_shm - hist_ohlcv = feed.hist_shm + ohlcv = flume.rt_shm + hist_ohlcv = flume.hist_shm # update last price sticky last_price_sticky = fast_chart._ysticks[fast_chart.name] @@ -347,9 +348,9 @@ async def graphics_update_loop( 'i_last_append': i_last, 'i_last': i_last, } - _, hist_step_size_s, _ = feed.get_ds_info() + _, hist_step_size_s, _ = flume.get_ds_info() - async with feed.index_stream( + async with flume.index_stream( # int(hist_step_size_s) # TODO: seems this is more reliable at keeping the slow # chart incremented in view more correctly? @@ -393,7 +394,7 @@ async def graphics_update_loop( nurse.start_soon(increment_history_view) # main real-time quotes update loop - stream: tractor.MsgStream = feed.stream + stream: tractor.MsgStream = flume.stream async for quotes in stream: ds.quotes = quotes @@ -813,13 +814,13 @@ def graphics_update_cycle( async def link_views_with_region( rt_chart: ChartPlotWidget, hist_chart: ChartPlotWidget, - feed: Feed, + flume: Flume, ) -> None: # these value are be only pulled once during shm init/startup - izero_hist = feed.izero_hist - izero_rt = feed.izero_rt + izero_hist = flume.izero_hist + izero_rt = flume.izero_rt # Add the LinearRegionItem to the ViewBox, but tell the ViewBox # to exclude this item when doing auto-range calculations. @@ -846,7 +847,7 @@ async def link_views_with_region( # poll for datums load and timestep detection for _ in range(100): try: - _, _, ratio = feed.get_ds_info() + _, _, ratio = flume.get_ds_info() break except IndexError: await trio.sleep(0.01) @@ -977,8 +978,7 @@ async def display_symbol_data( group_key=True ) - first_fqsn = fqsns[0] - + feed: Feed async with open_feed( fqsns, loglevel=loglevel, @@ -988,11 +988,17 @@ async def display_symbol_data( tick_throttle=_quote_throttle_rate, ) as feed: - ohlcv: ShmArray = feed.rt_shm - hist_ohlcv: ShmArray = feed.hist_shm - symbol = feed.symbols[first_fqsn] - fqsn = symbol.front_fqsn() + # TODO: right now we only show one symbol on charts, but + # overlays are coming muy pronto guey.. + assert len(feed.flumes) == 1 + flume = list(feed.flumes.values())[0] + + ohlcv: ShmArray = flume.rt_shm + hist_ohlcv: ShmArray = flume.hist_shm + + symbol = flume.symbol + fqsn = symbol.fqsn step_size_s = 1 tf_key = tf_in_1s[step_size_s] @@ -1012,7 +1018,7 @@ async def display_symbol_data( hist_linked._symbol = symbol hist_chart = hist_linked.plot_ohlc_main( symbol, - feed.hist_shm, + hist_ohlcv, # in the case of history chart we explicitly set `False` # to avoid internal pane creation. # sidepane=False, @@ -1100,7 +1106,7 @@ async def display_symbol_data( graphics_update_loop, ln, godwidget, - feed, + flume, wap_in_history, vlm_chart, ) @@ -1124,7 +1130,7 @@ async def display_symbol_data( await link_views_with_region( ohlc_chart, hist_chart, - feed, + flume, ) mode: OrderMode diff --git a/piker/ui/_position.py b/piker/ui/_position.py index f2ec1466..98584161 100644 --- a/piker/ui/_position.py +++ b/piker/ui/_position.py @@ -45,7 +45,10 @@ from ..calc import humanize, pnl, puterize from ..clearing._allocate import Allocator from ..pp import Position from ..data._normalize import iterticks -from ..data.feed import Feed +from ..data.feed import ( + Feed, + Flume, +) from ..data.types import Struct from ._label import Label from ._lines import LevelLine, order_line @@ -64,7 +67,7 @@ _pnl_tasks: dict[str, bool] = {} async def update_pnl_from_feed( - feed: Feed, + flume: Flume, order_mode: OrderMode, # noqa tracker: PositionTracker, @@ -95,7 +98,7 @@ async def update_pnl_from_feed( # real-time update pnl on the status pane try: - async with feed.stream.subscribe() as bstream: + async with flume.stream.subscribe() as bstream: # last_tick = time.time() async for quotes in bstream: @@ -390,12 +393,12 @@ class SettingsPane: mode = self.order_mode sym = mode.chart.linked.symbol size = tracker.live_pp.size - feed = mode.quote_feed + flume: Feed = mode.feed.flumes[sym.fqsn] pnl_value = 0 if size: # last historical close price - last = feed.rt_shm.array[-1][['close']][0] + last = flume.rt_shm.array[-1][['close']][0] pnl_value = copysign(1, size) * pnl( tracker.live_pp.ppu, last, @@ -408,7 +411,7 @@ class SettingsPane: _pnl_tasks[fqsn] = True self.order_mode.nursery.start_soon( update_pnl_from_feed, - feed, + flume, mode, tracker, ) diff --git a/piker/ui/order_mode.py b/piker/ui/order_mode.py index fa8ecbce..7e4ae066 100644 --- a/piker/ui/order_mode.py +++ b/piker/ui/order_mode.py @@ -44,7 +44,10 @@ from ..clearing._allocate import ( ) from ._style import _font from ..data._source import Symbol -from ..data.feed import Feed +from ..data.feed import ( + Feed, + Flume, +) from ..data.types import Struct from ..log import get_logger from ._editors import LineEditor, ArrowEditor @@ -118,7 +121,6 @@ class OrderMode: chart: ChartPlotWidget # type: ignore # noqa hist_chart: ChartPlotWidget # type: ignore # noqa nursery: trio.Nursery # used by ``ui._position`` code? - quote_feed: Feed book: OrderBook lines: LineEditor arrows: ArrowEditor @@ -514,12 +516,13 @@ class OrderMode: # XXX: seems to fail on certain types of races? # assert len(lines) == 2 if lines: - _, _, ratio = self.feed.get_ds_info() + flume: Flume = self.feed.flumes[self.chart.linked.symbol.fqsn] + _, _, ratio = flume.get_ds_info() for i, chart in [ (arrow_index, self.chart), - (self.feed.izero_hist + (flume.izero_hist + - round((arrow_index - self.feed.izero_rt)/ratio), + round((arrow_index - flume.izero_rt)/ratio), self.hist_chart) ]: self.arrows.add( @@ -801,7 +804,6 @@ async def open_order_mode( chart, hist_chart, tn, - feed, book, lines, arrows, From 8a01c9e42b751c481909f048323100ffd0b3f91e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 10 Nov 2022 01:41:35 -0500 Subject: [PATCH 17/49] Fix broker-tail stripping using `str.removesuffix()` --- piker/data/feed.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index 1a5eba0c..672974e6 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -1255,7 +1255,7 @@ async def open_feed_bus( assert bfqsn in fqsn and brokername in fqsn if sym.suffix: - bfqsn = fqsn.rstrip(f'.{brokername}') + bfqsn = fqsn.removesuffix(f'.{brokername}') log.warning(f'{brokername} expanded symbol {symbol} -> {bfqsn}') # pack for ``.started()`` sync msg @@ -1327,7 +1327,7 @@ async def open_feed_bus( # maybe use the current task-id to key the sub list that's # added / removed? Or maybe we can add a general # pause-resume by sub-key api? - bfqsn = fqsn.rstrip(f'.{brokername}') + bfqsn = fqsn.removesuffix(f'.{brokername}') bus_subs = bus._subscribers[bfqsn] bus_subs.append(sub) local_subs.append(sub) From 29b6b3e54fec4e39bf9842ef33e0d758271dea45 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 10 Nov 2022 01:42:05 -0500 Subject: [PATCH 18/49] Port `storesh` cli-cmd machinery to `Flume` apis --- piker/data/marketstore.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index 096c9745..2a088bc5 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -661,7 +661,7 @@ async def tsdb_history_update( [fqsn], start_stream=False, - ) as (feed, stream), + ) as feed, ): profiler(f'opened feed for {fqsn}') @@ -669,12 +669,13 @@ async def tsdb_history_update( # to_prepend = None if fqsn: - symbol = feed.symbols.get(fqsn) + flume = feed.flumes[fqsn] + symbol = flume.symbol if symbol: - fqsn = symbol.front_fqsn() + fqsn = symbol.fqsn # diff db history with shm and only write the missing portions - # ohlcv = feed.hist_shm.array + # ohlcv = flume.hist_shm.array # TODO: use pg profiler # for secs in (1, 60): From 36868bb86e52231d2a1cea7ac00fbc63190c7b5a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 10 Nov 2022 10:38:45 -0500 Subject: [PATCH 19/49] Add `kraken` test, ensure single broker-provider for now --- tests/test_feeds.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tests/test_feeds.py b/tests/test_feeds.py index 2fb5f693..5bb9a510 100644 --- a/tests/test_feeds.py +++ b/tests/test_feeds.py @@ -13,14 +13,18 @@ from piker import ( open_feed, ) from piker.data import ShmArray +from piker.data._source import ( + unpack_fqsn, +) @pytest.mark.parametrize( 'fqsns', [ - {'btcusdt.binance', 'ethusdt.binance'} + (100, {'btcusdt.binance', 'ethusdt.binance'}), + (50, {'xbteur.kraken', 'xbtusd.kraken'}), ], - ids=lambda param: f'fqsns={param}', + ids=lambda param: f'quotes={param[0]}@fqsns={param[1]}', ) def test_basic_rt_feed( fqsns: set[str], @@ -30,6 +34,17 @@ def test_basic_rt_feed( a few quotes then simply shut down. ''' + max_quotes, fqsns = fqsns + + brokers = set() + for fqsn in fqsns: + brokername, key, suffix = unpack_fqsn(fqsn) + brokers.add(brokername) + + # NOTE: we only have single broker-backed multi-symbol streams + # currently. + assert len(brokers) == 1 + async def main(): async with ( open_piker_runtime( @@ -60,7 +75,7 @@ def test_basic_rt_feed( # stream some ticks and ensure we see data from both symbol # subscriptions. - stream = feed.streams['binance'] + stream = feed.streams[brokername] # pull the first startup quotes, one for each fqsn, and # ensure they match each flume's startup quote value. From 8476d8d0564f90416e72d183acf1e375f521a4b5 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 10 Nov 2022 15:25:05 -0500 Subject: [PATCH 20/49] Fix partial-frame-missing backfill logic This had a bug prior where the end of a frame (a partial) wasn't being sliced correctly and we'd get odd gaps showing up in the backfilled from `brokerd` vs. tsdb end index. Repair this by doing timeframe aware index diffing in `diff_history()` which seems to resolve it. Also, use the frame-result's `end_dt: datetime` for the loop exit condition. --- piker/data/feed.py | 123 +++++++++++++++++++++++++++++---------------- 1 file changed, 79 insertions(+), 44 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index 672974e6..09dce25e 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -208,39 +208,50 @@ async def _setup_persistent_brokerd( def diff_history( - array, - start_dt, - end_dt, - last_tsdb_dt: Optional[datetime] = None + array: np.ndarray, + timeframe: int, + start_dt: datetime, + end_dt: datetime, + last_tsdb_dt: datetime | None = None ) -> np.ndarray: + # no diffing with tsdb dt index possible.. + if last_tsdb_dt is None: + return array + + time_attr = { + 1: 'seconds', + 60: 'minutes', + }[timeframe] + i_diff = getattr((end_dt - last_tsdb_dt), time_attr) + + # if we detect a partial frame's worth of data + # that is new, slice out only that history and + # write to shm. + if i_diff < 0: + # empty case since tsdb already has this history + return array[:0] + + ln = len(array) to_push = array - if last_tsdb_dt: - s_diff = (start_dt - last_tsdb_dt).seconds + if i_diff < ln: + # slice out missing history from end of frame + to_push = array[ln - i_diff:ln] - # if we detect a partial frame's worth of data - # that is new, slice out only that history and - # write to shm. - if ( - s_diff < 0 - ): - if abs(s_diff) < len(array): - # the + 1 is because ``last_tsdb_dt`` is pulled from - # the last row entry for the ``'time'`` field retreived - # from the tsdb. - to_push = array[abs(s_diff) + 1:] + # XXX: OLD GAP HANDLING..if there's a gap in a partial frame + # worth. we don't need this any more with the timeframe aware + # diffing above right? + # else: + # # pass back only the portion of the array that is + # # greater then the last time stamp in the tsdb. + # time = array['time'] + # to_push = array[time >= last_tsdb_dt.timestamp()] - else: - # pass back only the portion of the array that is - # greater then the last time stamp in the tsdb. - time = array['time'] - to_push = array[time >= last_tsdb_dt.timestamp()] - - log.debug( - f'Pushing partial frame {to_push.size} to shm' - ) + log.debug( + f'Pushing partial frame {to_push.size} to shm' + ) return to_push @@ -286,6 +297,7 @@ async def start_backfill( to_push = diff_history( array, + timeframe, start_dt, end_dt, last_tsdb_dt=last_tsdb_dt, @@ -334,12 +346,12 @@ async def start_backfill( 60: {'years': 6}, } - kwargs = periods[step_size_s] + period_duration = periods[step_size_s] # NOTE: manually set the "latest" datetime which we intend to # backfill history "until" so as to adhere to the history # settings above when the tsdb is detected as being empty. - last_tsdb_dt = start_dt.subtract(**kwargs) + last_tsdb_dt = start_dt.subtract(**period_duration) # configure async query throttling # rate = config.get('rate', 1) @@ -353,7 +365,7 @@ async def start_backfill( # inline sequential loop where we simply pass the # last retrieved start dt to the next request as # it's end dt. - while start_dt > last_tsdb_dt: + while end_dt > last_tsdb_dt: log.debug( f'Requesting {step_size_s}s frame ending in {start_dt}' ) @@ -363,6 +375,8 @@ async def start_backfill( timeframe, end_dt=start_dt, ) + # if timeframe == 1: + # await tractor.breakpoint() # broker says there never was or is no more history to pull except DataUnavailable: @@ -404,6 +418,7 @@ async def start_backfill( to_push = diff_history( array, + timeframe, start_dt, end_dt, last_tsdb_dt=last_tsdb_dt, @@ -424,6 +439,9 @@ async def start_backfill( log.info( f'Shm buffer overrun on: {start_dt} -> {end_dt}?' ) + # can't push the entire frame? so + # push only the amount that can fit.. + await tractor.breakpoint() break log.info( @@ -510,6 +528,8 @@ async def tsdb_backfill( # start history anal and load missing new data via backend. for timeframe, shm in shms.items(): + # loads a (large) frame of data from the tsdb depending + # on the db's query size limit. tsdb_history, first_tsdb_dt, last_tsdb_dt = await storage.load( fqsn, timeframe=timeframe, @@ -586,29 +606,45 @@ async def tsdb_backfill( if bf_done: await bf_done.wait() - # Load tsdb history into shm buffer (for display). - # TODO: eventually it'd be nice to not require a shm array/buffer # to accomplish this.. maybe we can do some kind of tsdb direct to # graphics format eventually in a child-actor? - # do diff against last start frame of history and only fill - # in from the tsdb an allotment that allows for most recent - # to be loaded into mem *before* tsdb data. - if last_tsdb_dt and latest_start_dt: - dt_diff_s = ( - latest_start_dt - last_tsdb_dt - ).seconds - else: - dt_diff_s = 0 - # TODO: see if there's faster multi-field reads: # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields # re-index with a `time` and index field prepend_start = shm._first.value + shm_last_dt = pendulum.from_timestamp(shm.array[0]['time']) - # sanity check on most-recent-data loading - assert prepend_start > dt_diff_s + if last_tsdb_dt: + assert shm_last_dt >= last_tsdb_dt + + # do diff against start index of last frame of history and only + # fill in an amount of datums from tsdb allows for most recent + # to be loaded into mem *before* tsdb data. + if ( + last_tsdb_dt + and latest_start_dt + ): + backfilled_size_s = ( + latest_start_dt - last_tsdb_dt + ).seconds + else: + backfilled_size_s = ( + latest_start_dt - shm_last_dt + ).seconds + + # Load TSDB history into shm buffer (for display) if there is + # remaining buffer space. + + # if the shm buffer len is not large enough to contain + # all missing data between the most recent backend-queried frame + # and the most recent dt-index in the db we warn that we only + # want to load a portion of the next tsdb query to fill that + # space. + log.info( + f'{backfilled_size_s} seconds worth of {timeframe}s data loaded' + ) if ( len(tsdb_history) @@ -1266,7 +1302,6 @@ async def open_feed_bus( # expected to append it's own name to the fqsn, so we filter # on keys which *do not* include that name (e.g .ib) . bus._subscribers.setdefault(bfqsn, []) - # await tractor.breakpoint() # sync feed subscribers with flume handles await ctx.started( From d6fb6fe3aef0b48670c080d83626be7455c94fdc Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 11 Nov 2022 13:19:24 -0500 Subject: [PATCH 21/49] Just drop the pretty repr from our struct for now --- piker/data/types.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/piker/data/types.py b/piker/data/types.py index 4bdb8063..1359526c 100644 --- a/piker/data/types.py +++ b/piker/data/types.py @@ -42,16 +42,17 @@ class Struct( for f in self.__struct_fields__ } - def __repr__(self): - # only turn on pprint when we detect a python REPL - # at runtime B) - if ( - hasattr(sys, 'ps1') - # TODO: check if we're in pdb - ): - return self.pformat() + # Lul, doesn't seem to work that well.. + # def __repr__(self): + # # only turn on pprint when we detect a python REPL + # # at runtime B) + # if ( + # hasattr(sys, 'ps1') + # # TODO: check if we're in pdb + # ): + # return self.pformat() - return super().__repr__() + # return super().__repr__() def pformat(self) -> str: return f'Struct({pformat(self.to_dict())})' From 81516c52049cd2a413a1fa31a99c7f5c76a91c28 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 11 Nov 2022 14:31:53 -0500 Subject: [PATCH 22/49] Finally fix tsdb -> shm backfill loading A slight facepalm but, the main issue was a simple indexing logic error: we need to slice with `tsdb_history[-shm._first.value:]` to push most recent history not oldest.. This allows cleanup of tsdb backfill loop as well. Further, greatly simply `diff_history()` time slicing by using the classic `numpy` conditional slice on the epoch field. --- piker/data/feed.py | 109 +++++++++++++++------------------------------ 1 file changed, 36 insertions(+), 73 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index 09dce25e..82b7b59b 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -220,40 +220,8 @@ def diff_history( if last_tsdb_dt is None: return array - time_attr = { - 1: 'seconds', - 60: 'minutes', - }[timeframe] - i_diff = getattr((end_dt - last_tsdb_dt), time_attr) - - # if we detect a partial frame's worth of data - # that is new, slice out only that history and - # write to shm. - if i_diff < 0: - # empty case since tsdb already has this history - return array[:0] - - ln = len(array) - to_push = array - - if i_diff < ln: - # slice out missing history from end of frame - to_push = array[ln - i_diff:ln] - - # XXX: OLD GAP HANDLING..if there's a gap in a partial frame - # worth. we don't need this any more with the timeframe aware - # diffing above right? - # else: - # # pass back only the portion of the array that is - # # greater then the last time stamp in the tsdb. - # time = array['time'] - # to_push = array[time >= last_tsdb_dt.timestamp()] - - log.debug( - f'Pushing partial frame {to_push.size} to shm' - ) - - return to_push + time = array['time'] + return array[time > last_tsdb_dt.timestamp()] async def start_backfill( @@ -375,8 +343,6 @@ async def start_backfill( timeframe, end_dt=start_dt, ) - # if timeframe == 1: - # await tractor.breakpoint() # broker says there never was or is no more history to pull except DataUnavailable: @@ -441,7 +407,6 @@ async def start_backfill( ) # can't push the entire frame? so # push only the amount that can fit.. - await tractor.breakpoint() break log.info( @@ -614,7 +579,11 @@ async def tsdb_backfill( # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields # re-index with a `time` and index field prepend_start = shm._first.value - shm_last_dt = pendulum.from_timestamp(shm.array[0]['time']) + array = shm.array + if len(array): + shm_last_dt = pendulum.from_timestamp(shm.array[0]['time']) + else: + shm_last_dt = None if last_tsdb_dt: assert shm_last_dt >= last_tsdb_dt @@ -629,27 +598,24 @@ async def tsdb_backfill( backfilled_size_s = ( latest_start_dt - last_tsdb_dt ).seconds - else: - backfilled_size_s = ( - latest_start_dt - shm_last_dt - ).seconds + # if the shm buffer len is not large enough to contain + # all missing data between the most recent backend-queried frame + # and the most recent dt-index in the db we warn that we only + # want to load a portion of the next tsdb query to fill that + # space. + log.info( + f'{backfilled_size_s} seconds worth of {timeframe}s loaded' + ) # Load TSDB history into shm buffer (for display) if there is # remaining buffer space. - - # if the shm buffer len is not large enough to contain - # all missing data between the most recent backend-queried frame - # and the most recent dt-index in the db we warn that we only - # want to load a portion of the next tsdb query to fill that - # space. - log.info( - f'{backfilled_size_s} seconds worth of {timeframe}s data loaded' - ) - if ( len(tsdb_history) ): - to_push = tsdb_history[:prepend_start] + + # load the first (smaller) bit of history originally loaded + # above from ``Storage.load()``. + to_push = tsdb_history[-prepend_start:] shm.push( to_push, @@ -660,37 +626,30 @@ async def tsdb_backfill( # start=prepend_start, field_map=marketstore.ohlc_key_map, ) - prepend_start = shm._first.value - - # load as much from storage into shm as space will - # allow according to user's shm size settings. - last_frame_start = tsdb_history['Epoch'][0] while ( shm._first.value > 0 ): + # load as much from storage into shm as space will + # allow according to user's shm size settings. + tsdb_last_frame_start = tsdb_history['Epoch'][0] + tsdb_history = await storage.read_ohlcv( fqsn, - end=last_frame_start, + end=tsdb_last_frame_start, timeframe=timeframe, ) if ( - not len(tsdb_history) + not len(tsdb_history) # empty query + + # no earlier data detected + or tsdb_history['Epoch'][0] >= tsdb_last_frame_start + ): - # on empty db history - break - - time = tsdb_history['Epoch'] - frame_start = time[0] - frame_end = time[0] - print(f"LOADING MKTS HISTORY: {frame_start} - {frame_end}") - - if frame_start >= last_frame_start: - # no new data loaded was from tsdb, so we can exit. break prepend_start = shm._first.value - to_push = tsdb_history[:prepend_start] + to_push = tsdb_history[-prepend_start:] # insert the history pre a "days worth" of samples # to leave some real-time buffer space at the end. @@ -699,8 +658,6 @@ async def tsdb_backfill( prepend=True, field_map=marketstore.ohlc_key_map, ) - last_frame_start = frame_start - log.info(f'Loaded {to_push.shape} datums from storage') # manually trigger step update to update charts/fsps @@ -1248,6 +1205,11 @@ async def open_feed_bus( # ensure we are who we think we are servicename = tractor.current_actor().name assert 'brokerd' in servicename + + # XXX: figure this not crashing into debug! + # await tractor.breakpoint() + # assert 0 + assert brokername in servicename bus = get_feed_bus(brokername) @@ -1557,6 +1519,7 @@ async def open_feed( bus_ctxs.append( portal.open_context( open_feed_bus, + # brokername=brokermod.name, brokername=brokername, symbols=bfqsns, loglevel=loglevel, From 20a396270e53bcc2ffffd302e6789f8b7385bba8 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 11 Nov 2022 14:47:39 -0500 Subject: [PATCH 23/49] `Storage.read_ohlcv()` now returns a `numpy` array --- piker/data/marketstore.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index 2a088bc5..d354f9b0 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -38,7 +38,7 @@ from math import isnan from bidict import bidict from msgspec.msgpack import encode, decode -import pyqtgraph as pg +# import pyqtgraph as pg import numpy as np import tractor from trio_websocket import open_websocket_url @@ -429,10 +429,7 @@ class Storage: end: Optional[int] = None, limit: int = int(800e3), - ) -> dict[ - int, - Union[dict, np.ndarray], - ]: + ) -> np.ndarray: client = self.client syms = await client.list_symbols() From 7b9db86753dbfcabae78055e3b3184d63cc63ac2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 11 Nov 2022 17:17:17 -0500 Subject: [PATCH 24/49] Multi-`broker` quotes with `Feed.open_multi_stream()` Adds provider-list-filtered (quote) stream multiplexing support allowing for merged real-time `tractor.MsgStream`s using an `@acm` interface. Behind the scenes we are just doing a classic multi-task push to common mem chan approach. Details to make it work on `Feed`: - add `Feed.mods: dict[str, Moduletype]` and `Feed.portals[ModuleType, tractor.Portal]` which are both populated during init in `open_feed()` - drop `Feed.portal` and `Feed.name` Also fix a final lingering tsdb history loading loop termination bug. --- piker/data/feed.py | 195 +++++++++++++++++++++++++++++---------------- 1 file changed, 126 insertions(+), 69 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index 82b7b59b..d26c7f37 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -32,6 +32,7 @@ from typing import ( Callable, Optional, Awaitable, + Sequence, TYPE_CHECKING, Union, ) @@ -627,26 +628,31 @@ async def tsdb_backfill( field_map=marketstore.ohlc_key_map, ) + tsdb_last_frame_start = tsdb_history['Epoch'][0] + + # load as much from storage into shm possible (depends on + # user's shm size settings). while ( shm._first.value > 0 ): - # load as much from storage into shm as space will - # allow according to user's shm size settings. - tsdb_last_frame_start = tsdb_history['Epoch'][0] tsdb_history = await storage.read_ohlcv( fqsn, end=tsdb_last_frame_start, timeframe=timeframe, ) + + next_start = tsdb_history['Epoch'][0] if ( not len(tsdb_history) # empty query # no earlier data detected - or tsdb_history['Epoch'][0] >= tsdb_last_frame_start + or next_start >= tsdb_last_frame_start ): break + else: + tsdb_last_frame_start = next_start prepend_start = shm._first.value to_push = tsdb_history[-prepend_start:] @@ -868,6 +874,9 @@ class Flume(Struct): izero_hist: int = 0 izero_rt: int = 0 throttle_rate: int | None = None + + # TODO: do we need this really if we can pull the `Portal` from + # ``tractor``'s internals? feed: Feed | None = None @property @@ -905,12 +914,15 @@ class Flume(Struct): if not self.feed: raise RuntimeError('This flume is not part of any ``Feed``?') + # TODO: maybe a public (property) API for this in ``tractor``? + portal = self.stream._ctx._portal + # XXX: this should be singleton on a host, # a lone broker-daemon per provider should be # created for all practical purposes async with maybe_open_context( acm_func=partial( - self.feed.portal.open_context, + portal.open_context, iter_ohlc_periods, ), kwargs={'delay_s': delay_s}, @@ -1384,24 +1396,66 @@ class Feed(Struct): similarly allocated shm arrays. ''' - mod: ModuleType - _portal: tractor.Portal + mods: dict[str, ModuleType] = {} + portals: dict[ModuleType, tractor.Portal] = {} flumes: dict[str, Flume] = {} streams: dict[ str, trio.abc.ReceiveChannel[dict[str, Any]], ] = {} - status: dict[str, Any] + + # used for UI to show remote state + status: dict[str, Any] = {} + + @acm + async def open_multi_stream( + self, + brokers: Sequence[str] | None = None, + + ) -> trio.abc.ReceiveChannel: + + if brokers is None: + mods = self.mods + else: + mods = {name: self.mods[name] for name in brokers} + + if len(mods) == 1: + # just pass the brokerd stream directly if only one provider + # was detected. + stream = self.streams[list(brokers)[0]] + async with stream.subscribe() as bstream: + yield bstream + return + + # start multiplexing task tree + tx, rx = trio.open_memory_channel(616) + + async def relay_to_common_memchan(stream: tractor.MsgStream): + async with tx: + async for msg in stream: + await tx.send(msg) + + async with trio.open_nursery() as nurse: + # spawn a relay task for each stream so that they all + # multiplex to a common channel. + for brokername in mods: + stream = self.streams[brokername] + nurse.start_soon(relay_to_common_memchan, stream) + + try: + yield rx + finally: + nurse.cancel_scope.cancel() _max_sample_rate: int = 1 - @property - def portal(self) -> tractor.Portal: - return self._portal + # @property + # def portal(self) -> tractor.Portal: + # return self._portal - @property - def name(self) -> str: - return self.mod.name + # @property + # def name(self) -> str: + # return self.mod.name @acm @@ -1457,6 +1511,7 @@ async def open_feed( ''' providers: dict[ModuleType, list[str]] = {} + feed = Feed() for fqsn in fqsns: brokername, key, suffix = unpack_fqsn(fqsn) @@ -1469,6 +1524,7 @@ async def open_feed( # built a per-provider map to instrument names providers.setdefault(mod, []).append(bfqsn) + feed.mods[mod.name] = mod # one actor per brokerd for now brokerd_ctxs = [] @@ -1495,18 +1551,15 @@ async def open_feed( (brokermod, bfqsns), ) in zip(portals, providers.items()): - feed = Feed( - mod=brokermod, - _portal=portal, - status={}, - ) + feed.portals[brokermod] = portal + # fill out "status info" that the UI can show - host, port = feed.portal.channel.raddr + host, port = portal.channel.raddr if host == '127.0.0.1': host = 'localhost' feed.status.update({ - 'actor_name': feed.portal.channel.uid[0], + 'actor_name': portal.channel.uid[0], 'host': host, 'port': port, 'hist_shm': 'NA', @@ -1519,8 +1572,7 @@ async def open_feed( bus_ctxs.append( portal.open_context( open_feed_bus, - # brokername=brokermod.name, - brokername=brokername, + brokername=brokermod.name, symbols=bfqsns, loglevel=loglevel, start_stream=start_stream, @@ -1528,61 +1580,66 @@ async def open_feed( ) ) - async with ( - gather_contexts(bus_ctxs) as ctxs, - ): - remote_scopes = [] - for ( - (ctx, flumes_msg_dict), - (brokermod, bfqsns), - ) in zip(ctxs, providers.items()): + assert len(feed.mods) == len(feed.portals) - stream_ctxs = [] - for fqsn, flume_msg in flumes_msg_dict.items(): - flume = Flume.from_msg(flume_msg) - assert flume.symbol.fqsn == fqsn - feed.flumes[fqsn] = flume - flume.feed = feed + async with ( + gather_contexts(bus_ctxs) as ctxs, + ): + stream_ctxs = [] + for ( + (ctx, flumes_msg_dict), + (brokermod, bfqsns), + ) in zip(ctxs, providers.items()): - # attach and cache shm handles - rt_shm = flume.rt_shm - assert rt_shm - hist_shm = flume.hist_shm - assert hist_shm + for fqsn, flume_msg in flumes_msg_dict.items(): + flume = Flume.from_msg(flume_msg) + assert flume.symbol.fqsn == fqsn + feed.flumes[fqsn] = flume - feed.status['hist_shm'] = ( - f'{humanize(hist_shm._shm.size)}' - ) - feed.status['rt_shm'] = f'{humanize(rt_shm._shm.size)}' + # TODO: do we need this? + flume.feed = feed - remote_scopes.append(ctx) - stream_ctxs.append( - ctx.open_stream( - # XXX: be explicit about stream backpressure - # since we should **never** overrun on feeds - # being too fast, which will pretty much - # always happen with HFT XD - backpressure=backpressure, - ) + # attach and cache shm handles + rt_shm = flume.rt_shm + assert rt_shm + hist_shm = flume.hist_shm + assert hist_shm + + feed.status['hist_shm'] = ( + f'{humanize(hist_shm._shm.size)}' ) + feed.status['rt_shm'] = f'{humanize(rt_shm._shm.size)}' - async with ( - gather_contexts(stream_ctxs) as streams, - ): - for ( - stream, - (brokermod, bfqsns), - ) in zip(streams, providers.items()): + stream_ctxs.append( + ctx.open_stream( + # XXX: be explicit about stream backpressure + # since we should **never** overrun on feeds + # being too fast, which will pretty much + # always happen with HFT XD + backpressure=backpressure, + ) + ) - # for bfqsn in bfqsns: - for fqsn in flumes_msg_dict: + async with ( + gather_contexts(stream_ctxs) as streams, + ): + for ( + stream, + (brokermod, bfqsns), + ) in zip(streams, providers.items()): - # apply common rt steam to each flume - # (normally one per broker) - feed.flumes[fqsn].stream = stream - feed.streams[brokermod.name] = stream + feed.streams[brokermod.name] = stream - yield feed + # for bfqsn in bfqsns: + for fqsn in flumes_msg_dict: + + # apply common rt steam to each flume + # (normally one per broker) + feed.flumes[fqsn].stream = stream + + assert len(feed.mods) == len(feed.portals) == len(feed.streams) + + yield feed @acm From 7bbe86d6fb0d2f3098c30fe71a73e09ec4a3a2e3 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 11 Nov 2022 17:26:32 -0500 Subject: [PATCH 25/49] Unpack broker mod and portal from fqsn for brokerd-trade-dialogs --- piker/clearing/_ems.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/piker/clearing/_ems.py b/piker/clearing/_ems.py index b478abcc..ad512c08 100644 --- a/piker/clearing/_ems.py +++ b/piker/clearing/_ems.py @@ -27,6 +27,7 @@ from contextlib import asynccontextmanager as acm from math import isnan from pprint import pformat import time +from types import ModuleType from typing import ( AsyncIterator, Any, @@ -381,14 +382,15 @@ class Router(Struct): @acm async def maybe_open_brokerd_dialog( self, - feed: Feed, + brokermod: ModuleType, + portal: tractor.Portal, exec_mode: str, symbol: str, loglevel: str, ) -> None: - brokermod = feed.mod broker = brokermod.name + relay: TradesRelay = self.relays.get(broker) if ( relay @@ -427,7 +429,7 @@ class Router(Struct): else: # open live brokerd trades endpoint - open_trades_endpoint = feed.portal.open_context( + open_trades_endpoint = portal.open_context( trades_endpoint, loglevel=loglevel, ) @@ -526,8 +528,10 @@ class Router(Struct): loglevel=loglevel, ) as feed, ): - brokermod = feed.mod + brokername, _, _ = unpack_fqsn(fqsn) + brokermod = feed.mods[brokername] broker = brokermod.name + portal = feed.portals[brokermod] # XXX: this should be initial price quote from target provider flume = feed.flumes[fqsn] @@ -536,7 +540,8 @@ class Router(Struct): book.lasts[fqsn]: float = first_quote['last'] async with self.maybe_open_brokerd_dialog( - feed=feed, + brokermod=brokermod, + portal=portal, exec_mode=exec_mode, symbol=symbol, loglevel=loglevel, From e348968113a6b517c62afba8aaac521341f74c5d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 11 Nov 2022 17:27:02 -0500 Subject: [PATCH 26/49] Add multi-broker streaming test using both `binance` and `kraken` --- tests/test_feeds.py | 99 +++++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 49 deletions(-) diff --git a/tests/test_feeds.py b/tests/test_feeds.py index 5bb9a510..b471c319 100644 --- a/tests/test_feeds.py +++ b/tests/test_feeds.py @@ -6,7 +6,7 @@ from collections import Counter from pprint import pprint import pytest -import tractor +# import tractor import trio from piker import ( open_piker_runtime, @@ -21,12 +21,18 @@ from piker.data._source import ( @pytest.mark.parametrize( 'fqsns', [ + # binance (100, {'btcusdt.binance', 'ethusdt.binance'}), - (50, {'xbteur.kraken', 'xbtusd.kraken'}), + + # kraken + (20, {'xbteur.kraken', 'xbtusd.kraken'}), + + # binance + kraken + (200, {'btcusdt.binance', 'xbtusd.kraken'}), ], ids=lambda param: f'quotes={param[0]}@fqsns={param[1]}', ) -def test_basic_rt_feed( +def test_multi_fqsn_feed( fqsns: set[str], ): ''' @@ -41,10 +47,6 @@ def test_basic_rt_feed( brokername, key, suffix = unpack_fqsn(fqsn) brokers.add(brokername) - # NOTE: we only have single broker-backed multi-symbol streams - # currently. - assert len(brokers) == 1 - async def main(): async with ( open_piker_runtime( @@ -52,9 +54,9 @@ def test_basic_rt_feed( # XXX tractor BUG: this doesn't translate through to the # ``tractor._state._runtimevars``... - registry_addr=('127.0.0.1', 6666), + # registry_addr=('127.0.0.1', 6666), - # debug_mode=True, + debug_mode=True, ), open_feed( fqsns, @@ -73,55 +75,54 @@ def test_basic_rt_feed( ohlcv: ShmArray = flume.rt_shm hist_ohlcv: ShmArray = flume.hist_shm - # stream some ticks and ensure we see data from both symbol - # subscriptions. - stream = feed.streams[brokername] + async with feed.open_multi_stream(brokers) as stream: - # pull the first startup quotes, one for each fqsn, and - # ensure they match each flume's startup quote value. - fqsns_copy = fqsns.copy() - for _ in range(1): - first_quotes = await stream.receive() - for fqsn, quote in first_quotes.items(): + # pull the first startup quotes, one for each fqsn, and + # ensure they match each flume's startup quote value. + fqsns_copy = fqsns.copy() + with trio.fail_after(0.5): + for _ in range(1): + first_quotes = await stream.receive() + for fqsn, quote in first_quotes.items(): - # XXX: TODO: WTF apparently this error will get - # supressed and only show up in the teardown - # excgroup if we don't have the fix from - # - # assert 0 + # XXX: TODO: WTF apparently this error will get + # supressed and only show up in the teardown + # excgroup if we don't have the fix from + # + # assert 0 - fqsns_copy.remove(fqsn) - flume = feed.flumes[fqsn] - assert quote['last'] == flume.first_quote['last'] + fqsns_copy.remove(fqsn) + flume = feed.flumes[fqsn] + assert quote['last'] == flume.first_quote['last'] - cntr = Counter() - async for quotes in stream: - for fqsn, quote in quotes.items(): - cntr[fqsn] += 1 + cntr = Counter() + with trio.fail_after(3): + async for quotes in stream: + for fqsn, quote in quotes.items(): + cntr[fqsn] += 1 - # await tractor.breakpoint() - flume = feed.flumes[fqsn] - ohlcv: ShmArray = flume.rt_shm - hist_ohlcv: ShmArray = flume.hist_shm + # await tractor.breakpoint() + flume = feed.flumes[fqsn] + ohlcv: ShmArray = flume.rt_shm + hist_ohlcv: ShmArray = flume.hist_shm - # print quote msg, rt and history - # buffer values on console. - rt_row = ohlcv.array[-1] - hist_row = hist_ohlcv.array[-1] - # last = quote['last'] + # print quote msg, rt and history + # buffer values on console. + rt_row = ohlcv.array[-1] + hist_row = hist_ohlcv.array[-1] + # last = quote['last'] - # assert last == rt_row['close'] - # assert last == hist_row['close'] - pprint( - f'{fqsn}: {quote}\n' - f'rt_ohlc: {rt_row}\n' - f'hist_ohlc: {hist_row}\n' - ) + # assert last == rt_row['close'] + # assert last == hist_row['close'] + pprint( + f'{fqsn}: {quote}\n' + f'rt_ohlc: {rt_row}\n' + f'hist_ohlc: {hist_row}\n' + ) - if cntr.total() >= 100: - break + if cntr.total() >= max_quotes: + break - # await tractor.breakpoint() assert set(cntr.keys()) == fqsns trio.run(main) From 0a959c1c74d09e7f3710ddb2609344b99a96d845 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 11 Nov 2022 17:27:49 -0500 Subject: [PATCH 27/49] Not all accounts will have API trade transactions this session.. --- piker/brokers/ib/broker.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/piker/brokers/ib/broker.py b/piker/brokers/ib/broker.py index 401ac71c..52b26970 100644 --- a/piker/brokers/ib/broker.py +++ b/piker/brokers/ib/broker.py @@ -575,17 +575,18 @@ async def trades_dialogue( # if new trades are detected from the API, prepare # them for the ledger file and update the pptable. if api_to_ledger_entries: - trade_entries = api_to_ledger_entries[acctid] + trade_entries = api_to_ledger_entries.get(acctid) - # write ledger with all new trades **AFTER** - # we've updated the `pps.toml` from the - # original ledger state! (i.e. this is - # currently done on exit) - ledger.update(trade_entries) + if trade_entries: + # write ledger with all new trades **AFTER** + # we've updated the `pps.toml` from the + # original ledger state! (i.e. this is + # currently done on exit) + ledger.update(trade_entries) - trans = trans_by_acct.get(acctid) - if trans: - table.update_from_trans(trans) + trans = trans_by_acct.get(acctid) + if trans: + table.update_from_trans(trans) # XXX: not sure exactly why it wouldn't be in # the updated output (maybe this is a bug?) but From ddbba760959505ef48d2fc53ee1357feaaa28baf Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 11 Nov 2022 17:57:48 -0500 Subject: [PATCH 28/49] Use (a new) `piker_pin` branch in `tractor` (again) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8f573625..0f13d891 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ # we require a pinned dev branch to get some edge features that # are often untested in tractor's CI and/or being tested by us # first before committing as core features in tractor's base. --e git+https://github.com/goodboy/tractor.git@master#egg=tractor +-e git+https://github.com/goodboy/tractor.git@piker_pin#egg=tractor # `pyqtgraph` peeps keep breaking, fixing, improving so might as well # pin this to a dev branch that we have more control over especially From 79fcbcc2813a84c9ae801c6ab72ede63422c2976 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 11 Nov 2022 17:58:09 -0500 Subject: [PATCH 29/49] Add an sdist job to CI --- .github/workflows/ci.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bec62534..6af351ec 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,6 +14,27 @@ on: jobs: + # test that we can generate a software distribution and install it + # thus avoid missing file issues after packaging. + sdist-linux: + name: 'sdist' + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Setup python + uses: actions/setup-python@v2 + with: + python-version: '3.10' + + - name: Build sdist + run: python setup.py sdist --formats=zip + + - name: Install sdist from .zips + run: python -m pip install dist/*.zip + testing: name: 'install + test-suite' runs-on: ubuntu-latest From c088963cf29069f72d51553f9bbd95a92501ec3e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 11 Nov 2022 17:39:46 -0500 Subject: [PATCH 30/49] Always touch config file dir if dne --- piker/config.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/piker/config.py b/piker/config.py index c7a7acc9..cb250386 100644 --- a/piker/config.py +++ b/piker/config.py @@ -197,6 +197,9 @@ def load( ''' path = path or get_conf_path(conf_name) + if not os.path.isdir(_config_dir): + os.mkdir(_config_dir) + if not os.path.isfile(path): fn = _conf_fn_w_ext(conf_name) @@ -209,9 +212,9 @@ def load( # if one exists. if os.path.isfile(template): shutil.copyfile(template, path) - else: - with open(path, 'w'): - pass # touch + else: + with open(path, 'r'): + pass # touch it config = toml.load(path, **tomlkws) log.debug(f"Read config file {path}") From 1e96ca32df8e929d789c217fd40bf3e13ef93ffd Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 12 Nov 2022 13:43:09 -0500 Subject: [PATCH 31/49] Move `maybe_open_feed()` above for readability --- piker/data/feed.py | 102 ++++++++++++++++++++++----------------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index d26c7f37..4dc63da6 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -1495,6 +1495,57 @@ async def install_brokerd_search( yield +@acm +async def maybe_open_feed( + + fqsns: list[str], + loglevel: Optional[str] = None, + + **kwargs, + +) -> ( + Feed, + ReceiveChannel[dict[str, Any]], +): + ''' + Maybe open a data to a ``brokerd`` daemon only if there is no + local one for the broker-symbol pair, if one is cached use it wrapped + in a tractor broadcast receiver. + + ''' + fqsn = fqsns[0] + + async with maybe_open_context( + acm_func=open_feed, + kwargs={ + 'fqsns': fqsns, + 'loglevel': loglevel, + 'tick_throttle': kwargs.get('tick_throttle'), + + # XXX: super critical to have bool defaults here XD + 'backpressure': kwargs.get('backpressure', True), + 'start_stream': kwargs.get('start_stream', True), + }, + key=fqsn, + + ) as (cache_hit, feed): + + if cache_hit: + log.info(f'Using cached feed for {fqsn}') + # add a new broadcast subscription for the quote stream + # if this feed is likely already in use + + async with gather_contexts( + mngrs=[stream.subscribe() for stream in feed.streams.values()] + ) as bstreams: + for bstream, flume in zip(bstreams, feed.flumes.values()): + flume.stream = bstream + + yield feed + else: + yield feed + + @acm async def open_feed( @@ -1640,54 +1691,3 @@ async def open_feed( assert len(feed.mods) == len(feed.portals) == len(feed.streams) yield feed - - -@acm -async def maybe_open_feed( - - fqsns: list[str], - loglevel: Optional[str] = None, - - **kwargs, - -) -> ( - Feed, - ReceiveChannel[dict[str, Any]], -): - ''' - Maybe open a data to a ``brokerd`` daemon only if there is no - local one for the broker-symbol pair, if one is cached use it wrapped - in a tractor broadcast receiver. - - ''' - fqsn = fqsns[0] - - async with maybe_open_context( - acm_func=open_feed, - kwargs={ - 'fqsns': fqsns, - 'loglevel': loglevel, - 'tick_throttle': kwargs.get('tick_throttle'), - - # XXX: super critical to have bool defaults here XD - 'backpressure': kwargs.get('backpressure', True), - 'start_stream': kwargs.get('start_stream', True), - }, - key=fqsn, - - ) as (cache_hit, feed): - - if cache_hit: - log.info(f'Using cached feed for {fqsn}') - # add a new broadcast subscription for the quote stream - # if this feed is likely already in use - - async with gather_contexts( - mngrs=[stream.subscribe() for stream in feed.streams.values()] - ) as bstreams: - for bstream, flume in zip(bstreams, feed.flumes.values()): - flume.stream = bstream - - yield feed - else: - yield feed From f5cd63ad355be3ebdf4ae1d217bdcf9c0d028d6c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 14 Nov 2022 14:33:04 -0500 Subject: [PATCH 32/49] Ensure correct stream is set on each `Flume` Set each quote-stream by matching the provider for each `Flume` and thus results in some flumes mapping to the same (multiplexed) stream. Monkey-patch the equivalent `tractor.MsgStream._ctx: tractor.Context` on each broadcast-receiver subscription to allow use by feed bus methods as well as other internals which need to reference IPC channel/portal info. Start a `_FeedsBus` subscription management API: - add `.get_subs()` which returns the list of tuples registered for the given key (normally the fqsn). - add `.remove_sub()` which allows removing by key and tuple value and provides encapsulation for sampler task(s) which deal with dropped connections/subscribers. --- piker/data/feed.py | 49 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index 4dc63da6..b243be8d 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -147,6 +147,38 @@ class _FeedsBus(Struct): # task: trio.lowlevel.Task, # ) -> bool: # ... + def get_subs( + self, + key: str, + ) -> list[ + tuple[ + Union[tractor.MsgStream, trio.MemorySendChannel], + tractor.Context, + float | None, # tick throttle in Hz + ] + ]: + return self._subscribers[key] + + def remove_sub( + self, + key: str, + sub: tuple, + ) -> bool: + ''' + Remove a consumer's subscription entry for the given key. + + ''' + stream, ctx, tick_throttle = sub + subs = self.get_subs(key) + try: + subs.remove(sub) + except ValueError: + chan = ctx.chan + log.error( + f'Stream was already removed from subs!?\n' + f'{key}:' + f'{ctx.cid}@{chan.uid}' + ) _bus: _FeedsBus = None @@ -916,6 +948,7 @@ class Flume(Struct): # TODO: maybe a public (property) API for this in ``tractor``? portal = self.stream._ctx._portal + assert portal # XXX: this should be singleton on a host, # a lone broker-daemon per provider should be @@ -1299,6 +1332,7 @@ async def open_feed_bus( await stream.send({fqsn: flume.first_quote}) # set a common msg stream for all requested symbols + assert stream flume.stream = stream # Add a real-time quote subscription to feed bus: @@ -1539,6 +1573,9 @@ async def maybe_open_feed( mngrs=[stream.subscribe() for stream in feed.streams.values()] ) as bstreams: for bstream, flume in zip(bstreams, feed.flumes.values()): + # XXX: TODO: horrible hackery that needs fixing.. + # i guess we have to create context proxies? + bstream._ctx = flume.stream._ctx flume.stream = bstream yield feed @@ -1679,14 +1716,14 @@ async def open_feed( (brokermod, bfqsns), ) in zip(streams, providers.items()): + assert stream feed.streams[brokermod.name] = stream - # for bfqsn in bfqsns: - for fqsn in flumes_msg_dict: - - # apply common rt steam to each flume - # (normally one per broker) - feed.flumes[fqsn].stream = stream + # apply `brokerd`-common steam to each flume + # tracking a symbol from that provider. + for fqsn, flume in feed.flumes.items(): + if brokermod.name in flume.symbol.brokers: + flume.stream = stream assert len(feed.mods) == len(feed.portals) == len(feed.streams) From 326f153a47b37e4ce63593466f4440b3583295a9 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 14 Nov 2022 14:42:22 -0500 Subject: [PATCH 33/49] Catch overruns on throttled feed subs too Previously we would only detect overruns and drop subscriptions on non-throttled feed subs, however you can get the same issue with a wrapping throttler task: - the intermediate mem chan can be blocked either by the throttler task being too slow, in which case we still want to warn about it - the stream's IPC channel actually breaks and we still want to drop the connection and subscription so it doesn't be come a source of stale backpressure. --- piker/data/_sampling.py | 79 ++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 45 deletions(-) diff --git a/piker/data/_sampling.py b/piker/data/_sampling.py index edba3e1f..61b2bd2f 100644 --- a/piker/data/_sampling.py +++ b/piker/data/_sampling.py @@ -309,25 +309,27 @@ async def sample_and_broadcast( volume, ) + # TODO: PUT THIS IN A ``_FeedsBus.broadcast()`` method! # XXX: we need to be very cautious here that no # context-channel is left lingering which doesn't have # a far end receiver actor-task. In such a case you can # end up triggering backpressure which which will # eventually block this producer end of the feed and # thus other consumers still attached. + sub_key: str = broker_symbol.lower() subs: list[ tuple[ Union[tractor.MsgStream, trio.MemorySendChannel], tractor.Context, float | None, # tick throttle in Hz ] - ] = bus._subscribers[broker_symbol.lower()] + ] = bus._subscribers[sub_key] # NOTE: by default the broker backend doesn't append # it's own "name" into the fqsn schema (but maybe it # should?) so we have to manually generate the correct # key here. - bsym = f'{broker_symbol}.{brokername}' + fqsn = f'{broker_symbol}.{brokername}' lags: int = 0 for (stream, ctx, tick_throttle) in subs: @@ -338,47 +340,38 @@ async def sample_and_broadcast( # pushes to the ``uniform_rate_send()`` below. try: stream.send_nowait( - (bsym, quote) + (fqsn, quote) ) except trio.WouldBlock: + overruns[sub_key] += 1 chan = ctx.chan - if ctx: - log.warning( - f'Feed overrun {bus.brokername} ->' - f'{chan.uid} !!!' - ) - else: - key = id(stream) - overruns[key] += 1 - log.warning( - f'Feed overrun {broker_symbol}' - '@{bus.brokername} -> ' - f'feed @ {tick_throttle} Hz' - ) - if overruns[key] > 6: - # TODO: should we check for the - # context being cancelled? this - # could happen but the - # channel-ipc-pipe is still up. - if not chan.connected(): - log.warning( - 'Dropping broken consumer:\n' - f'{broker_symbol}:' - f'{ctx.cid}@{chan.uid}' - ) - await stream.aclose() - raise trio.BrokenResourceError - else: - log.warning( - 'Feed getting overrun bro!\n' - f'{broker_symbol}:' - f'{ctx.cid}@{chan.uid}' - ) - continue + log.warning( + f'Feed OVERRUN {sub_key}' + '@{bus.brokername} -> \n' + f'feed @ {chan.uid}\n' + f'throttle = {tick_throttle} Hz' + ) + + if overruns[sub_key] > 6: + # TODO: should we check for the + # context being cancelled? this + # could happen but the + # channel-ipc-pipe is still up. + if ( + not chan.connected() + or ctx._cancel_called + ): + log.warning( + 'Dropping broken consumer:\n' + f'{sub_key}:' + f'{ctx.cid}@{chan.uid}' + ) + await stream.aclose() + raise trio.BrokenResourceError else: await stream.send( - {bsym: quote} + {fqsn: quote} ) if cs.cancelled_caught: @@ -406,14 +399,10 @@ async def sample_and_broadcast( # so far seems like no since this should all # be single-threaded. Doing it anyway though # since there seems to be some kinda race.. - try: - subs.remove((stream, tick_throttle)) - except ValueError: - log.error( - f'Stream was already removed from subs!?\n' - f'{broker_symbol}:' - f'{ctx.cid}@{chan.uid}' - ) + bus.remove_sub( + sub_key, + (stream, ctx, tick_throttle), + ) # TODO: a less naive throttler, here's some snippets: From 88870fdda76c7b8fdb3f56d0154814059b396347 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 15 Nov 2022 15:44:52 -0500 Subject: [PATCH 34/49] Set `brokers: list[st]` from mods when not provided.. --- piker/data/feed.py | 1 + piker/ui/_overlay.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index b243be8d..6cb25bdc 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -1450,6 +1450,7 @@ class Feed(Struct): if brokers is None: mods = self.mods + brokers = list(self.mods) else: mods = {name: self.mods[name] for name in brokers} diff --git a/piker/ui/_overlay.py b/piker/ui/_overlay.py index af66a735..ac15a9dc 100644 --- a/piker/ui/_overlay.py +++ b/piker/ui/_overlay.py @@ -304,7 +304,7 @@ class PlotItemOverlay: # NOTE: required for scene layering/relaying; this guarantees # the "root" plot receives priority for interaction # events/signals. - root_plotitem.vb.setZValue(1000) + root_plotitem.vb.setZValue(10) self.overlays: list[PlotItem] = [] self.layout = ComposedGridLayout(root_plotitem) @@ -494,6 +494,8 @@ class PlotItemOverlay: root.vb.setFocus() assert root.vb.focusWidget() + vb.setZValue(100) + def get_axis( self, plot: PlotItem, From 2a158aea2cc55f2c3457f9aae7229226f9178446 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 16 Nov 2022 13:32:26 -0500 Subject: [PATCH 35/49] Rework `_FeedsBus` subscriptions mgmt using `set` Allows using `set` ops for subscription management and guarantees no duplicates per `brokerd` actor. New API is simpler for dynamic pause/resume changes per `Feed`: - `_FeedsBus.add_subs()`, `.get_subs()`, `.remove_subs()` all accept multi-sub `set` inputs. - `Feed.pause()` / `.resume()` encapsulates management of *only* sending a msg on each unique underlying IPC msg stream. Use new api in sampler task. --- piker/data/_sampling.py | 13 ++--- piker/data/feed.py | 120 ++++++++++++++++++++++------------------ 2 files changed, 73 insertions(+), 60 deletions(-) diff --git a/piker/data/_sampling.py b/piker/data/_sampling.py index 61b2bd2f..f8230bd7 100644 --- a/piker/data/_sampling.py +++ b/piker/data/_sampling.py @@ -24,7 +24,6 @@ from collections import Counter import time from typing import ( TYPE_CHECKING, - Union, ) import tractor @@ -319,11 +318,10 @@ async def sample_and_broadcast( sub_key: str = broker_symbol.lower() subs: list[ tuple[ - Union[tractor.MsgStream, trio.MemorySendChannel], - tractor.Context, + tractor.MsgStream | trio.MemorySendChannel, float | None, # tick throttle in Hz ] - ] = bus._subscribers[sub_key] + ] = bus.get_subs(sub_key) # NOTE: by default the broker backend doesn't append # it's own "name" into the fqsn schema (but maybe it @@ -332,7 +330,7 @@ async def sample_and_broadcast( fqsn = f'{broker_symbol}.{brokername}' lags: int = 0 - for (stream, ctx, tick_throttle) in subs: + for (stream, tick_throttle) in subs.copy(): try: with trio.move_on_after(0.2) as cs: if tick_throttle: @@ -344,6 +342,7 @@ async def sample_and_broadcast( ) except trio.WouldBlock: overruns[sub_key] += 1 + ctx = stream._ctx chan = ctx.chan log.warning( @@ -399,9 +398,9 @@ async def sample_and_broadcast( # so far seems like no since this should all # be single-threaded. Doing it anyway though # since there seems to be some kinda race.. - bus.remove_sub( + bus.remove_subs( sub_key, - (stream, ctx, tick_throttle), + {(stream, tick_throttle)}, ) diff --git a/piker/data/feed.py b/piker/data/feed.py index 6cb25bdc..93630a13 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -21,6 +21,7 @@ This module is enabled for ``brokerd`` daemons. """ from __future__ import annotations +from collections import defaultdict from contextlib import asynccontextmanager as acm from datetime import datetime from functools import partial @@ -111,16 +112,16 @@ class _FeedsBus(Struct): task_lock: trio.StrictFIFOLock = trio.StrictFIFOLock() - _subscribers: dict[ + _subscribers: defaultdict[ str, - list[ + set[ tuple[ - Union[tractor.MsgStream, trio.MemorySendChannel], - tractor.Context, - Optional[float], # tick throttle in Hz + tractor.MsgStream | trio.MemorySendChannel, + # tractor.Context, + float | None, # tick throttle in Hz ] ] - ] = {} + ] = defaultdict(set) async def start_task( self, @@ -147,38 +148,53 @@ class _FeedsBus(Struct): # task: trio.lowlevel.Task, # ) -> bool: # ... + def get_subs( self, key: str, - ) -> list[ + ) -> set[ tuple[ Union[tractor.MsgStream, trio.MemorySendChannel], - tractor.Context, + # tractor.Context, float | None, # tick throttle in Hz ] ]: + ''' + Get the ``set`` of consumer subscription entries for the given key. + + ''' return self._subscribers[key] - def remove_sub( + def add_subs( self, key: str, - sub: tuple, - ) -> bool: + subs: set[tuple[ + tractor.MsgStream | trio.MemorySendChannel, + # tractor.Context, + float | None, # tick throttle in Hz + ]], + ) -> set[tuple]: ''' - Remove a consumer's subscription entry for the given key. + Add a ``set`` of consumer subscription entries for the given key. ''' - stream, ctx, tick_throttle = sub - subs = self.get_subs(key) - try: - subs.remove(sub) - except ValueError: - chan = ctx.chan - log.error( - f'Stream was already removed from subs!?\n' - f'{key}:' - f'{ctx.cid}@{chan.uid}' - ) + _subs = self._subscribers[key] + _subs.update(subs) + return _subs + + def remove_subs( + self, + key: str, + subs: set[tuple], + + ) -> set[tuple]: + ''' + Remove a ``set`` of consumer subscription entries for key. + + ''' + _subs = self.get_subs(key) + _subs.difference_update(subs) + return _subs _bus: _FeedsBus = None @@ -969,12 +985,6 @@ class Flume(Struct): else: yield istream - async def pause(self) -> None: - await self.stream.send('pause') - - async def resume(self) -> None: - await self.stream.send('resume') - def get_ds_info( self, ) -> tuple[float, float, float]: @@ -1308,7 +1318,7 @@ async def open_feed_bus( # the sampler subscription since the backend isn't (yet) # expected to append it's own name to the fqsn, so we filter # on keys which *do not* include that name (e.g .ib) . - bus._subscribers.setdefault(bfqsn, []) + bus._subscribers.setdefault(bfqsn, set()) # sync feed subscribers with flume handles await ctx.started( @@ -1324,7 +1334,7 @@ async def open_feed_bus( ctx.open_stream() as stream, ): - local_subs: list = [] + local_subs: dict[str, set[tuple]] = {} for fqsn, flume in flumes.items(): # re-send to trigger display loop cycle (necessary especially # when the mkt is closed and no real-time messages are @@ -1361,43 +1371,42 @@ async def open_feed_bus( # stream it's the throttle task does the work of # incrementally forwarding to the IPC stream at the throttle # rate. - sub = (send, ctx, tick_throttle) + send._ctx = ctx # mock internal ``tractor.MsgStream`` ref + sub = (send, tick_throttle) else: - sub = (stream, ctx, tick_throttle) + sub = (stream, tick_throttle) # TODO: add an api for this on the bus? # maybe use the current task-id to key the sub list that's # added / removed? Or maybe we can add a general # pause-resume by sub-key api? bfqsn = fqsn.removesuffix(f'.{brokername}') - bus_subs = bus._subscribers[bfqsn] - bus_subs.append(sub) - local_subs.append(sub) + local_subs.setdefault(bfqsn, set()).add(sub) + bus.add_subs(bfqsn, {sub}) + # sync caller with all subs registered state sub_registered.set() + uid = ctx.chan.uid try: - uid = ctx.chan.uid - # ctrl protocol for start/stop of quote streams based on UI # state (eg. don't need a stream when a symbol isn't being # displayed). async for msg in stream: if msg == 'pause': - for sub in local_subs: - if sub in bus_subs: - log.info( - f'Pausing {fqsn} feed for {uid}') - bus_subs.remove(sub) + for bfqsn, subs in local_subs.items(): + log.info( + f'Pausing {bfqsn} feed for {uid}') + bus.remove_subs(bfqsn, subs) elif msg == 'resume': - for sub in local_subs: - if sub not in bus_subs: - log.info( - f'Resuming {fqsn} feed for {uid}') - bus_subs.append(sub) + for bfqsn, subs in local_subs.items(): + log.info( + f'Resuming {bfqsn} feed for {uid}') + bus.add_subs(bfqsn, subs) + else: raise ValueError(msg) finally: @@ -1410,11 +1419,8 @@ async def open_feed_bus( cs.cancel() # drop all subs for this task from the bus - for sub in local_subs: - try: - bus._subscribers[bfqsn].remove(sub) - except ValueError: - log.warning(f'{sub} for {symbol} was already removed?') + for bfqsn, subs in local_subs.items(): + bus.remove_subs(bfqsn, subs) class Feed(Struct): @@ -1492,6 +1498,14 @@ class Feed(Struct): # def name(self) -> str: # return self.mod.name + async def pause(self) -> None: + for stream in set(self.streams.values()): + await stream.send('pause') + + async def resume(self) -> None: + for stream in set(self.streams.values()): + await stream.send('resume') + @acm async def install_brokerd_search( From 967e28b7aceb91e1d83272fae7db8914f1d2aeb3 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 17 Nov 2022 13:34:50 -0500 Subject: [PATCH 36/49] Adjust built-in backend list to known working --- piker/brokers/__init__.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/piker/brokers/__init__.py b/piker/brokers/__init__.py index eb4e735a..95eb6f08 100644 --- a/piker/brokers/__init__.py +++ b/piker/brokers/__init__.py @@ -26,10 +26,21 @@ asks.init('trio') __brokers__ = [ 'binance', - 'questrade', - 'robinhood', 'ib', 'kraken', + + # broken but used to work + # 'questrade', + # 'robinhood', + + # TODO: we should get on these stat! + # alpaca + # wstrade + # iex + + # deribit + # kucoin + # bitso ] From c944db5f02c9b44c42c9e655244f4185c2050257 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 17 Nov 2022 15:24:13 -0500 Subject: [PATCH 37/49] Revert "Fix `_main()` arg back to `sym: str`" This reverts commit 02fbc0a0ed6d5a74239e1788a6fd8d44147b516f. --- piker/ui/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/piker/ui/cli.py b/piker/ui/cli.py index cc571eb6..6716607a 100644 --- a/piker/ui/cli.py +++ b/piker/ui/cli.py @@ -174,7 +174,7 @@ def chart( _main( - sym=symbols[0], + syms=symbols, brokernames=brokernames, piker_loglevel=pikerloglevel, tractor_kwargs={ From 28fd795280abb2817ab101f9e69998e25f5a729f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 15 Nov 2022 18:07:02 -0500 Subject: [PATCH 38/49] Only require `-b ` for filtering Instead of requiring any `-b` try to import all built-in broker backend python modules by default and only load those detected from the input symbol list's fqsn values. In other words the `piker chart` cmd can be run sin `-b` now and that flag is only required if you only want to load a subset of the built-ins or are trying to load a specific not-yet-builtin backend. --- piker/cli/__init__.py | 14 ++++++++------ piker/ui/_app.py | 37 +++++++++++++++++++++++++------------ piker/ui/cli.py | 6 +++--- 3 files changed, 36 insertions(+), 21 deletions(-) diff --git a/piker/cli/__init__.py b/piker/cli/__init__.py index 1386bc83..c1b8aab0 100644 --- a/piker/cli/__init__.py +++ b/piker/cli/__init__.py @@ -36,7 +36,6 @@ from .. import config log = get_logger('cli') -DEFAULT_BROKER = 'questrade' @click.command() @@ -118,7 +117,7 @@ def pikerd( @click.group(context_settings=config._context_defaults) @click.option( '--brokers', '-b', - default=[DEFAULT_BROKER], + default=None, multiple=True, help='Broker backend to use' ) @@ -144,10 +143,13 @@ def cli( ctx.ensure_object(dict) - if len(brokers) == 1: - brokermods = [get_brokermod(brokers[0])] - else: - brokermods = [get_brokermod(broker) for broker in brokers] + if not brokers: + # (try to) load all (supposedly) supported data/broker backends + from piker.brokers import __brokers__ + brokers = __brokers__ + + brokermods = [get_brokermod(broker) for broker in brokers] + assert brokermods reg_addr: None | tuple[str, int] = None if host or port: diff --git a/piker/ui/_app.py b/piker/ui/_app.py index 38a4db20..23a9d2ed 100644 --- a/piker/ui/_app.py +++ b/piker/ui/_app.py @@ -19,15 +19,16 @@ Main app startup and run. ''' from functools import partial +from types import ModuleType from PyQt5.QtCore import QEvent import trio from .._daemon import maybe_spawn_brokerd -from ..brokers import get_brokermod from . import _event from ._exec import run_qtractor from ..data.feed import install_brokerd_search +from ..data._source import unpack_fqsn from . import _search from ._chart import GodWidget from ..log import get_logger @@ -36,27 +37,26 @@ log = get_logger(__name__) async def load_provider_search( - - broker: str, + brokermod: str, loglevel: str, ) -> None: - log.info(f'loading brokerd for {broker}..') + name = brokermod.name + log.info(f'loading brokerd for {name}..') async with ( maybe_spawn_brokerd( - broker, + name, loglevel=loglevel ) as portal, install_brokerd_search( portal, - get_brokermod(broker), + brokermod, ), ): - # keep search engine stream up until cancelled await trio.sleep_forever() @@ -67,7 +67,7 @@ async def _async_main( main_widget: GodWidget, syms: list[str], - brokernames: str, + brokers: dict[str, ModuleType], loglevel: str, ) -> None: @@ -99,6 +99,11 @@ async def _async_main( sbar = godwidget.window.status_bar starting_done = sbar.open_status('starting ze sexy chartz') + needed_brokermods: dict[str, ModuleType] = {} + for fqsn in syms: + brokername, *_ = unpack_fqsn(fqsn) + needed_brokermods[brokername] = brokers[brokername] + async with ( trio.open_nursery() as root_n, ): @@ -140,8 +145,12 @@ async def _async_main( ): # load other providers into search **after** # the chart's select cache - for broker in brokernames: - root_n.start_soon(load_provider_search, broker, loglevel) + for brokername, mod in needed_brokermods.items(): + root_n.start_soon( + load_provider_search, + mod, + loglevel, + ) await order_mode_ready.wait() @@ -171,7 +180,7 @@ async def _async_main( def _main( syms: list[str], - brokernames: [str], + brokermods: list[ModuleType], piker_loglevel: str, tractor_kwargs, ) -> None: @@ -182,7 +191,11 @@ def _main( ''' run_qtractor( func=_async_main, - args=(syms, brokernames, piker_loglevel), + args=( + syms, + {mod.name: mod for mod in brokermods}, + piker_loglevel, + ), main_widget_type=GodWidget, tractor_kwargs=tractor_kwargs, ) diff --git a/piker/ui/cli.py b/piker/ui/cli.py index 6716607a..a72c2f5c 100644 --- a/piker/ui/cli.py +++ b/piker/ui/cli.py @@ -166,16 +166,16 @@ def chart( )) return - # global opts brokernames = config['brokers'] + brokermods = config['brokermods'] + assert brokermods tractorloglevel = config['tractorloglevel'] pikerloglevel = config['loglevel'] - _main( syms=symbols, - brokernames=brokernames, + brokermods=brokermods, piker_loglevel=pikerloglevel, tractor_kwargs={ 'debug_mode': pdb, From 5d021ffb854f4c01c1cd416ab1880a6a1d6fa038 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 17 Nov 2022 18:23:34 -0500 Subject: [PATCH 39/49] Bump up timeout on multi-feed test for CI --- tests/test_feeds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_feeds.py b/tests/test_feeds.py index b471c319..ec25b11b 100644 --- a/tests/test_feeds.py +++ b/tests/test_feeds.py @@ -96,7 +96,7 @@ def test_multi_fqsn_feed( assert quote['last'] == flume.first_quote['last'] cntr = Counter() - with trio.fail_after(3): + with trio.fail_after(5): async for quotes in stream: for fqsn, quote in quotes.items(): cntr[fqsn] += 1 From b7e1443618b867e1fdb79d8cf347b50bd2909237 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 10 Dec 2022 15:32:09 -0500 Subject: [PATCH 40/49] Use ETH on kraken to ensure enough quotes --- tests/test_feeds.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_feeds.py b/tests/test_feeds.py index ec25b11b..b357d615 100644 --- a/tests/test_feeds.py +++ b/tests/test_feeds.py @@ -25,10 +25,10 @@ from piker.data._source import ( (100, {'btcusdt.binance', 'ethusdt.binance'}), # kraken - (20, {'xbteur.kraken', 'xbtusd.kraken'}), + (20, {'ethusdt.kraken', 'xbtusd.kraken'}), # binance + kraken - (200, {'btcusdt.binance', 'xbtusd.kraken'}), + (100, {'btcusdt.binance', 'xbtusd.kraken'}), ], ids=lambda param: f'quotes={param[0]}@fqsns={param[1]}', ) @@ -96,7 +96,7 @@ def test_multi_fqsn_feed( assert quote['last'] == flume.first_quote['last'] cntr = Counter() - with trio.fail_after(5): + with trio.fail_after(6): async for quotes in stream: for fqsn, quote in quotes.items(): cntr[fqsn] += 1 From f232d6d4ee56757c1cb16c827c7c93d8ac0a0acc Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 9 Jan 2023 15:30:26 -0500 Subject: [PATCH 41/49] Add `ci_env` detector fixture --- tests/conftest.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 114812bd..2ad6f624 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -83,6 +83,15 @@ def travis(confdir): trio.run(ensure_config) +_ci_env: bool = os.environ.get('CI', False) + + +@pytest.fixture(scope='session') +def ci_env() -> bool: + """Detect CI envoirment. + """ + return _ci_env + @pytest.fixture def us_symbols(): return ['TSLA', 'AAPL', 'CGC', 'CRON'] @@ -96,3 +105,4 @@ def tmx_symbols(): @pytest.fixture def cse_symbols(): return ['TRUL.CN', 'CWEB.CN', 'SNN.CN'] + From 76f920a16bfad6f890c17a83bc1fcf607be25d1f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 20 Dec 2022 16:58:51 -0500 Subject: [PATCH 42/49] Always force lowercase on `binance` symbol keys Hopefully helps resolve #435 --- piker/brokers/binance.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/piker/brokers/binance.py b/piker/brokers/binance.py index 9d2742be..be3f35cf 100644 --- a/piker/brokers/binance.py +++ b/piker/brokers/binance.py @@ -181,7 +181,7 @@ class Client: params = {} if sym is not None: - sym = sym.upper() + sym = sym.lower() params = {'symbol': sym} resp = await self._api( @@ -465,7 +465,7 @@ async def stream_quotes( si = sym_infos[sym] = syminfo.to_dict() filters = {} for entry in syminfo.filters: - ftype = entry.pop('filterType') + ftype = entry['filterType'] filters[ftype] = entry # XXX: after manually inspecting the response format we From f6b7057b0d753ee47d93f60688ec3e67574ac9f3 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 5 Jan 2023 12:44:58 -0500 Subject: [PATCH 43/49] `binance`: always request an extra 1min OHLC bar Seems that by default their history indexing rounds down/back to the previous time step, so make sure we add a minute inside `Client.bars()` when the `end_dt=None`, indicating "get the latest bar". Add a breakpoint block that should trigger whenever the latest bar vs. the latest epoch time is mismatched; we'll remove this after some testing verifying the history bars issue is resolved. Further this drops the legacy `backfill_bars()` endpoint which has been deprecated and unused for a while. --- piker/brokers/binance.py | 48 +++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/piker/brokers/binance.py b/piker/brokers/binance.py index be3f35cf..5ea7860a 100644 --- a/piker/brokers/binance.py +++ b/piker/brokers/binance.py @@ -41,10 +41,15 @@ from ._util import ( SymbolNotFound, DataUnavailable, ) -from ..log import get_logger, get_console_log -from ..data import ShmArray +from ..log import ( + get_logger, + get_console_log, +) from ..data.types import Struct -from ..data._web_bs import open_autorecon_ws, NoBsWs +from ..data._web_bs import ( + open_autorecon_ws, + NoBsWs, +) log = get_logger(__name__) @@ -142,7 +147,9 @@ class OHLC(Struct): # convert datetime obj timestamp to unixtime in milliseconds -def binance_timestamp(when): +def binance_timestamp( + when: datetime +) -> int: return int((when.timestamp() * 1000) + (when.microsecond / 1000)) @@ -238,7 +245,7 @@ class Client: ) -> dict: if end_dt is None: - end_dt = pendulum.now('UTC') + end_dt = pendulum.now('UTC').add(minutes=1) if start_dt is None: start_dt = end_dt.start_of( @@ -396,8 +403,8 @@ async def open_history_client( async def get_ohlc( timeframe: float, - end_dt: Optional[datetime] = None, - start_dt: Optional[datetime] = None, + end_dt: datetime | None = None, + start_dt: datetime | None = None, ) -> tuple[ np.ndarray, @@ -412,27 +419,22 @@ async def open_history_client( start_dt=start_dt, end_dt=end_dt, ) - start_dt = pendulum.from_timestamp(array[0]['time']) - end_dt = pendulum.from_timestamp(array[-1]['time']) + times = array['time'] + if ( + end_dt is None + ): + inow = round(time.time()) + if (inow - times[-1]) > 60: + await tractor.breakpoint() + + start_dt = pendulum.from_timestamp(times[0]) + end_dt = pendulum.from_timestamp(times[-1]) + return array, start_dt, end_dt yield get_ohlc, {'erlangs': 3, 'rate': 3} -async def backfill_bars( - sym: str, - shm: ShmArray, # type: ignore # noqa - task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED, -) -> None: - """Fill historical bars into shared mem / storage afap. - """ - with trio.CancelScope() as cs: - async with open_cached_client('binance') as client: - bars = await client.bars(symbol=sym) - shm.push(bars) - task_status.started(cs) - - async def stream_quotes( send_chan: trio.abc.SendChannel, From 81585d9e6e0427c018c17cf0184a47a02e030e71 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 9 Jan 2023 17:27:27 -0500 Subject: [PATCH 44/49] Set global registry addr after first entry point spawns `pikerd` --- piker/_daemon.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/piker/_daemon.py b/piker/_daemon.py index 6609338b..c40b63df 100644 --- a/piker/_daemon.py +++ b/piker/_daemon.py @@ -37,10 +37,15 @@ _root_dname = 'pikerd' _registry_host: str = '127.0.0.1' _registry_port: int = 6116 -_registry_addr = ( +_default_reg_addr: tuple[str, int] = ( _registry_host, _registry_port, ) + +# NOTE: this value is set as an actor-global once the first endpoint +# who is capable, spawns a `pikerd` service tree. +_registry_addr: tuple[str, int] | None = None + _tractor_kwargs: dict[str, Any] = { # use a different registry addr then tractor's default 'arbiter_addr': _registry_addr @@ -152,13 +157,17 @@ async def open_pikerd( ''' global _services + global _registry_addr + + if _registry_addr is None: + _registry_addr = registry_addr or _default_reg_addr # XXX: this may open a root actor as well async with ( tractor.open_root_actor( # passed through to ``open_root_actor`` - arbiter_addr=registry_addr or _registry_addr, + arbiter_addr=_registry_addr, name=_root_dname, loglevel=loglevel, debug_mode=debug_mode, @@ -197,7 +206,7 @@ async def open_piker_runtime( # XXX: you should pretty much never want debug mode # for data daemons when running in production. debug_mode: bool = False, - registry_addr: None | tuple[str, int] = _registry_addr, + registry_addr: None | tuple[str, int] = None, ) -> tractor.Actor: ''' @@ -206,13 +215,17 @@ async def open_piker_runtime( ''' global _services + global _registry_addr + + if _registry_addr is None: + _registry_addr = registry_addr or _default_reg_addr # XXX: this may open a root actor as well async with ( tractor.open_root_actor( # passed through to ``open_root_actor`` - arbiter_addr=registry_addr, + arbiter_addr=_registry_addr, name=name, loglevel=loglevel, debug_mode=debug_mode, From 008ae47e14ebf9de88276b46a30aa9b4dbcd1c09 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 9 Jan 2023 18:15:23 -0500 Subject: [PATCH 45/49] Reset `._registry_addr` to any passed in value from caller --- piker/_daemon.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/piker/_daemon.py b/piker/_daemon.py index c40b63df..d4ca7f21 100644 --- a/piker/_daemon.py +++ b/piker/_daemon.py @@ -35,11 +35,11 @@ log = get_logger(__name__) _root_dname = 'pikerd' -_registry_host: str = '127.0.0.1' -_registry_port: int = 6116 +_default_registry_host: str = '127.0.0.1' +_default_registry_port: int = 6116 _default_reg_addr: tuple[str, int] = ( - _registry_host, - _registry_port, + _default_registry_host, + _default_registry_port, ) # NOTE: this value is set as an actor-global once the first endpoint @@ -159,7 +159,10 @@ async def open_pikerd( global _services global _registry_addr - if _registry_addr is None: + if ( + _registry_addr is None + or registry_addr + ): _registry_addr = registry_addr or _default_reg_addr # XXX: this may open a root actor as well @@ -217,7 +220,10 @@ async def open_piker_runtime( global _services global _registry_addr - if _registry_addr is None: + if ( + _registry_addr is None + or registry_addr + ): _registry_addr = registry_addr or _default_reg_addr # XXX: this may open a root actor as well From 06622105cdc1470e3907f70b17ab55c0bc30aa66 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 9 Jan 2023 18:16:09 -0500 Subject: [PATCH 46/49] Add a `open_test_pikerd()` acm fixture for easy booting of the service stack --- tests/conftest.py | 151 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 105 insertions(+), 46 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 2ad6f624..1bd1d86e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,10 +1,12 @@ +from contextlib import asynccontextmanager as acm import os import pytest import tractor -import trio -from piker import log, config -from piker.brokers import questrade +from piker import ( + # log, + config, +) def pytest_addoption(parser): @@ -28,9 +30,11 @@ def test_config(): @pytest.fixture(scope='session', autouse=True) def confdir(request, test_config): - """If the `--confdir` flag is not passed use the + ''' + If the `--confdir` flag is not passed use the broker config file found in that dir. - """ + + ''' confdir = request.config.option.confdir if confdir is not None: config._override_config_dir(confdir) @@ -38,49 +42,49 @@ def confdir(request, test_config): return confdir -@pytest.fixture(scope='session', autouse=True) -def travis(confdir): - is_travis = os.environ.get('TRAVIS', False) - if is_travis: - # this directory is cached, see .travis.yaml - conf_file = config.get_broker_conf_path() - refresh_token = os.environ['QT_REFRESH_TOKEN'] +# @pytest.fixture(scope='session', autouse=True) +# def travis(confdir): +# is_travis = os.environ.get('TRAVIS', False) +# if is_travis: +# # this directory is cached, see .travis.yaml +# conf_file = config.get_broker_conf_path() +# refresh_token = os.environ['QT_REFRESH_TOKEN'] - def write_with_token(token): - # XXX don't pass the dir path here since may be - # written behind the scenes in the `confdir fixture` - if not os.path.isfile(conf_file): - open(conf_file, 'w').close() - conf, path = config.load() - conf.setdefault('questrade', {}).update( - {'refresh_token': token, - 'is_practice': 'True'} - ) - config.write(conf, path) +# def write_with_token(token): +# # XXX don't pass the dir path here since may be +# # written behind the scenes in the `confdir fixture` +# if not os.path.isfile(conf_file): +# open(conf_file, 'w').close() +# conf, path = config.load() +# conf.setdefault('questrade', {}).update( +# {'refresh_token': token, +# 'is_practice': 'True'} +# ) +# config.write(conf, path) - async def ensure_config(): - # try to refresh current token using cached brokers config - # if it fails fail try using the refresh token provided by the - # env var and if that fails stop the test run here. - try: - async with questrade.get_client(ask_user=False): - pass - except ( - FileNotFoundError, ValueError, - questrade.BrokerError, questrade.QuestradeError, - trio.MultiError, - ): - # 3 cases: - # - config doesn't have a ``refresh_token`` k/v - # - cache dir does not exist yet - # - current token is expired; take it form env var - write_with_token(refresh_token) +# async def ensure_config(): +# # try to refresh current token using cached brokers config +# # if it fails fail try using the refresh token provided by the +# # env var and if that fails stop the test run here. +# try: +# async with questrade.get_client(ask_user=False): +# pass +# except ( +# FileNotFoundError, ValueError, +# questrade.BrokerError, questrade.QuestradeError, +# trio.MultiError, +# ): +# # 3 cases: +# # - config doesn't have a ``refresh_token`` k/v +# # - cache dir does not exist yet +# # - current token is expired; take it form env var +# write_with_token(refresh_token) - async with questrade.get_client(ask_user=False): - pass +# async with questrade.get_client(ask_user=False): +# pass - # XXX ``pytest_trio`` doesn't support scope or autouse - trio.run(ensure_config) +# # XXX ``pytest_trio`` doesn't support scope or autouse +# trio.run(ensure_config) _ci_env: bool = os.environ.get('CI', False) @@ -88,10 +92,13 @@ _ci_env: bool = os.environ.get('CI', False) @pytest.fixture(scope='session') def ci_env() -> bool: - """Detect CI envoirment. - """ + ''' + Detect CI envoirment. + + ''' return _ci_env + @pytest.fixture def us_symbols(): return ['TSLA', 'AAPL', 'CGC', 'CRON'] @@ -106,3 +113,55 @@ def tmx_symbols(): def cse_symbols(): return ['TRUL.CN', 'CWEB.CN', 'SNN.CN'] + +@acm +async def _open_test_pikerd( + reg_addr: tuple[str, int] | None = None, + **kwargs, + +) -> tuple[ + str, + int, + tractor.Portal +]: + ''' + Testing helper to startup the service tree and runtime on + a different port then the default to allow testing alongside + a running stack. + + ''' + import random + from piker._daemon import maybe_open_pikerd + + if reg_addr is None: + port = random.randint(6e3, 7e3) + reg_addr = ('127.0.0.1', port) + + async with ( + maybe_open_pikerd( + registry_addr=reg_addr, + **kwargs, + ), + ): + async with tractor.wait_for_actor( + 'pikerd', + arbiter_sockaddr=reg_addr, + ) as portal: + raddr = portal.channel.raddr + assert raddr == reg_addr + yield ( + raddr[0], + raddr[1], + portal, + ) + + +@pytest.fixture +def open_test_pikerd(): + + yield _open_test_pikerd + + # TODO: teardown checks such as, + # - no leaked subprocs or shm buffers + # - all requested container service are torn down + # - certain ``tractor`` runtime state? From 593db0ed0dfd47f9ec0b9e4d38e0f1ee7f0051e3 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 9 Jan 2023 18:16:55 -0500 Subject: [PATCH 47/49] Only run `kraken` feed tests in CI, use `open_test_pikerd()` --- tests/test_feeds.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/test_feeds.py b/tests/test_feeds.py index b357d615..2b85301f 100644 --- a/tests/test_feeds.py +++ b/tests/test_feeds.py @@ -4,15 +4,15 @@ Data feed layer APIs, performance, msg throttling. ''' from collections import Counter from pprint import pprint +from typing import AsyncContextManager import pytest # import tractor import trio -from piker import ( - open_piker_runtime, +from piker.data import ( + ShmArray, open_feed, ) -from piker.data import ShmArray from piker.data._source import ( unpack_fqsn, ) @@ -22,25 +22,33 @@ from piker.data._source import ( 'fqsns', [ # binance - (100, {'btcusdt.binance', 'ethusdt.binance'}), + (100, {'btcusdt.binance', 'ethusdt.binance'}, False), # kraken - (20, {'ethusdt.kraken', 'xbtusd.kraken'}), + (20, {'ethusdt.kraken', 'xbtusd.kraken'}, True), # binance + kraken - (100, {'btcusdt.binance', 'xbtusd.kraken'}), + (100, {'btcusdt.binance', 'xbtusd.kraken'}, False), ], ids=lambda param: f'quotes={param[0]}@fqsns={param[1]}', ) def test_multi_fqsn_feed( + open_test_pikerd: AsyncContextManager, fqsns: set[str], + ci_env: bool ): ''' Start a real-time data feed for provided fqsn and pull a few quotes then simply shut down. ''' - max_quotes, fqsns = fqsns + max_quotes, fqsns, run_in_ci = fqsns + + if ( + ci_env + and not run_in_ci + ): + pytest.skip('Skipping CI disabled test due to feed restrictions') brokers = set() for fqsn in fqsns: @@ -49,15 +57,7 @@ def test_multi_fqsn_feed( async def main(): async with ( - open_piker_runtime( - 'test_basic_rt_feed', - - # XXX tractor BUG: this doesn't translate through to the - # ``tractor._state._runtimevars``... - # registry_addr=('127.0.0.1', 6666), - - debug_mode=True, - ), + open_test_pikerd(), open_feed( fqsns, loglevel='info', From 55de9abc410b6e99e1a648b16bc1e1a8362132e6 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 9 Jan 2023 18:22:10 -0500 Subject: [PATCH 48/49] Adjust cli mod imports of daemon sockaddr vars --- piker/cli/__init__.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/piker/cli/__init__.py b/piker/cli/__init__.py index c1b8aab0..67647a83 100644 --- a/piker/cli/__init__.py +++ b/piker/cli/__init__.py @@ -29,8 +29,8 @@ from ..log import get_console_log, get_logger, colorize_json from ..brokers import get_brokermod from .._daemon import ( _tractor_kwargs, - _registry_host, - _registry_port, + _default_registry_host, + _default_registry_port, ) from .. import config @@ -76,8 +76,8 @@ def pikerd( reg_addr: None | tuple[str, int] = None if host or port: reg_addr = ( - host or _registry_host, - int(port) or _registry_port, + host or _default_registry_host, + int(port) or _default_registry_port, ) async def main(): @@ -154,8 +154,8 @@ def cli( reg_addr: None | tuple[str, int] = None if host or port: reg_addr = ( - host or _registry_host, - int(port) or _registry_port, + host or _default_registry_host, + int(port) or _default_registry_port, ) ctx.obj.update({ From 963e5bdd62d5090657a14630a817c092e688120d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 9 Jan 2023 18:41:12 -0500 Subject: [PATCH 49/49] Go back to `Feed.pause/resume()`, new flume APIs coming later --- piker/ui/_chart.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/piker/ui/_chart.py b/piker/ui/_chart.py index 93b41095..bad82544 100644 --- a/piker/ui/_chart.py +++ b/piker/ui/_chart.py @@ -916,7 +916,7 @@ class ChartPlotWidget(pg.PlotWidget): try: for feed in self._feeds.values(): for flume in feed.flumes.values(): - self.linked.godwidget._root_n.start_soon(flume.resume) + self.linked.godwidget._root_n.start_soon(feed.resume) except RuntimeError: # TODO: cancel the qtractor runtime here? raise @@ -924,7 +924,7 @@ class ChartPlotWidget(pg.PlotWidget): def pause_all_feeds(self): for feed in self._feeds.values(): for flume in feed.flumes.values(): - self.linked.godwidget._root_n.start_soon(flume.pause) + self.linked.godwidget._root_n.start_soon(feed.pause) @property def view(self) -> ChartView: