From 3c09bfba57c0fb132c25a3b191278d5a51d78917 Mon Sep 17 00:00:00 2001 From: Guillermo Rodriguez Date: Sat, 15 Jan 2022 23:33:23 -0300 Subject: [PATCH 001/105] Add multi ingestor support and update to new feed API --- piker/data/cli.py | 36 +++++++++----- piker/data/marketstore.py | 102 ++++++++++++++++++++++---------------- 2 files changed, 83 insertions(+), 55 deletions(-) diff --git a/piker/data/cli.py b/piker/data/cli.py index 7a774fb5..2206bd6a 100644 --- a/piker/data/cli.py +++ b/piker/data/cli.py @@ -136,7 +136,7 @@ def ingest(config, name, test_file, tl, url): """Ingest real-time broker quotes and ticks to a marketstore instance. """ # global opts - brokermod = config['brokermod'] + brokermods = config['brokermods'] loglevel = config['loglevel'] tractorloglevel = config['tractorloglevel'] # log = config['log'] @@ -145,15 +145,25 @@ def ingest(config, name, test_file, tl, url): watchlists = wl.merge_watchlist(watchlist_from_file, wl._builtins) symbols = watchlists[name] - tractor.run( - partial( - ingest_quote_stream, - symbols, - brokermod.name, - tries=1, - loglevel=loglevel, - ), - name='ingest_marketstore', - loglevel=tractorloglevel, - debug_mode=True, - ) + grouped_syms = {} + for sym in symbols: + symbol, _, provider = sym.rpartition('.') + if provider not in grouped_syms: + grouped_syms[provider] = [] + + grouped_syms[provider].append(symbol) + + async def entry_point(): + async with tractor.open_nursery() as n: + for provider, symbols in grouped_syms.items(): + await n.run_in_actor( + ingest_quote_stream, + name='ingest_marketstore', + symbols=symbols, + brokername=provider, + tries=1, + actorloglevel=loglevel, + loglevel=tractorloglevel + ) + + tractor.run(entry_point) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index 27bcda70..8fd737cf 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -24,7 +24,7 @@ - todo: docker container management automation """ from contextlib import asynccontextmanager -from typing import Dict, Any, List, Callable, Tuple +from typing import Dict, Any, List, Callable, Tuple, Optional import time from math import isnan @@ -49,25 +49,16 @@ _quote_dt = [ ('Epoch', 'i8'), ('Nanoseconds', 'i4'), - ('Tick', 'i4'), # (-1, 0, 1) = (on bid, same, on ask) - # ('fill_time', 'f4'), + ('Tick', 'i4'), ('Last', 'f4'), ('Bid', 'f4'), - ('Bsize', 'i8'), - ('Asize', 'i8'), + ('Bsize', 'f4'), + ('Asize', 'f4'), ('Ask', 'f4'), ('Size', 'i8'), - ('Volume', 'i8'), - # ('brokerd_ts', 'i64'), - # ('VWAP', 'f4') + ('Volume', 'f4'), ] _quote_tmp = {}.fromkeys(dict(_quote_dt).keys(), np.nan) -_tick_map = { - 'Up': 1, - 'Equal': 0, - 'Down': -1, - None: np.nan, -} class MarketStoreError(Exception): @@ -87,18 +78,20 @@ def err_on_resp(response: dict) -> None: def quote_to_marketstore_structarray( quote: Dict[str, Any], - last_fill: str, + last_fill: Optional[float], + ) -> np.array: """Return marketstore writeable structarray from quote ``dict``. """ if last_fill: # new fill bby - now = timestamp(last_fill) + now = timestamp(last_fill, unit='s') + else: # this should get inserted upstream by the broker-client to # subtract from IPC latency now = time.time_ns() - + secs, ns = now / 10**9, now % 10**9 # pack into List[Tuple[str, Any]] @@ -123,11 +116,11 @@ def quote_to_marketstore_structarray( return np.array([tuple(array_input)], dtype=_quote_dt) -def timestamp(datestr: str) -> int: +def timestamp(date, **kwargs) -> int: """Return marketstore compatible 'Epoch' integer in nanoseconds from a date formatted str. """ - return int(pd.Timestamp(datestr).value) + return int(pd.Timestamp(date, **kwargs).value) def mk_tbk(keys: Tuple[str, str, str]) -> str: @@ -206,46 +199,71 @@ async def ingest_quote_stream( symbols: List[str], brokername: str, tries: int = 1, - loglevel: str = None, + actorloglevel: str = None, ) -> None: """Ingest a broker quote stream into marketstore in (sampled) tick format. """ async with open_feed( brokername, symbols, - loglevel=loglevel, - ) as (first_quotes, qstream): - - quote_cache = first_quotes.copy() - + loglevel=actorloglevel, + ) as feed: async with get_client() as ms_client: + # _quote_dt = [ + # # these two are required for as a "primary key" + # ('Epoch', 'i8'), + # ('Nanoseconds', 'i4'), + # ('Tick', 'i4'), + # + # ('Last', 'f4'), + # ('Bid', 'f4'), + # ('Bsize', 'f4'), + # ('Asize', 'f4'), + # ('Ask', 'f4'), + # ('Size', 'i8'), + # ('Volume', 'f4'), + # ] + + quote_cache = { + 'size': 0, + 'tick': 0 + } # start ingest to marketstore - async for quotes in qstream: + async for quotes in feed.stream: log.info(quotes) for symbol, quote in quotes.items(): - # remap tick strs to ints - quote['tick'] = _tick_map[quote.get('tick', 'Equal')] + for tick in quote.get('ticks', ()): + ticktype = tick.get('type') + price = tick.get('price') + size = tick.get('size') - # check for volume update (i.e. did trades happen - # since last quote) - new_vol = quote.get('volume', None) - if new_vol is None: - log.debug(f"No fills for {symbol}") - if new_vol == quote_cache.get('volume'): - # should never happen due to field diffing - # on sender side - log.error( - f"{symbol}: got same volume as last quote?") + if ticktype == 'n/a' or price == -1: + # okkk.. + continue - quote_cache.update(quote) + # clearing price event + if ticktype == 'trade': + quote_cache['volume'] = quote['volume'] + quote_cache['last'] = price + # quote_cache['broker_ts'] = quote['broker_ts'] + + # l1 book events + elif ticktype in ('ask', 'asize'): + quote_cache['ask'] = price + quote_cache['asize'] = size + + elif ticktype in ('bid', 'bsize'): + quote_cache['bid'] = price + quote_cache['bsize'] = size a = quote_to_marketstore_structarray( - quote, - # TODO: check this closer to the broker query api - last_fill=quote.get('fill_time', '') + quote_cache, + last_fill=quote.get('broker_ts', None) ) + log.info(a) + # breakpoint() await ms_client.write(symbol, a) From 897a5cf2f646414a9466d257794f1ee446c0e045 Mon Sep 17 00:00:00 2001 From: Guillermo Rodriguez Date: Sun, 16 Jan 2022 00:20:57 -0300 Subject: [PATCH 002/105] Simplify and optimize tick format, similar to techtonicdb's --- piker/data/marketstore.py | 90 ++++++++++----------------------------- 1 file changed, 23 insertions(+), 67 deletions(-) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index 8fd737cf..1d271452 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -49,14 +49,10 @@ _quote_dt = [ ('Epoch', 'i8'), ('Nanoseconds', 'i4'), - ('Tick', 'i4'), - ('Last', 'f4'), - ('Bid', 'f4'), - ('Bsize', 'f4'), - ('Asize', 'f4'), - ('Ask', 'f4'), - ('Size', 'i8'), - ('Volume', 'f4'), + ('IsTrade', 'i1'), + ('IsBid', 'i1'), + ('Price', 'f8'), + ('Size', 'f8') ] _quote_tmp = {}.fromkeys(dict(_quote_dt).keys(), np.nan) @@ -109,8 +105,7 @@ def quote_to_marketstore_structarray( # for ``np.int`` we use 0 as a null value none = 0 - # casefold? see https://github.com/alpacahq/marketstore/issues/324 - val = quote.get(name.casefold(), none) + val = quote.get(name, none) array_input.append(val) return np.array([tuple(array_input)], dtype=_quote_dt) @@ -203,67 +198,28 @@ async def ingest_quote_stream( ) -> None: """Ingest a broker quote stream into marketstore in (sampled) tick format. """ - async with open_feed( - brokername, - symbols, - loglevel=actorloglevel, - ) as feed: - async with get_client() as ms_client: + async with ( + open_feed(brokername, symbols, loglevel=actorloglevel) as feed, + get_client() as ms_client + ): + async for quotes in feed.stream: + log.info(quotes) + for symbol, quote in quotes.items(): + for tick in quote.get('ticks', ()): + ticktype = tick.get('type', 'n/a') - # _quote_dt = [ - # # these two are required for as a "primary key" - # ('Epoch', 'i8'), - # ('Nanoseconds', 'i4'), - # ('Tick', 'i4'), - # - # ('Last', 'f4'), - # ('Bid', 'f4'), - # ('Bsize', 'f4'), - # ('Asize', 'f4'), - # ('Ask', 'f4'), - # ('Size', 'i8'), - # ('Volume', 'f4'), - # ] + if ticktype == 'n/a': + # okkk.. + continue - quote_cache = { - 'size': 0, - 'tick': 0 - } - # start ingest to marketstore - async for quotes in feed.stream: - log.info(quotes) - for symbol, quote in quotes.items(): + a = quote_to_marketstore_structarray({ + 'IsTrade': 1 if ticktype == 'trade' else 0, + 'IsBid': 1 if ticktype in ('bid', 'bsize') else 0, + 'Price': tick.get('price'), + 'Size': tick.get('size') + }, last_fill=quote.get('broker_ts', None)) - for tick in quote.get('ticks', ()): - ticktype = tick.get('type') - price = tick.get('price') - size = tick.get('size') - - if ticktype == 'n/a' or price == -1: - # okkk.. - continue - - # clearing price event - if ticktype == 'trade': - quote_cache['volume'] = quote['volume'] - quote_cache['last'] = price - # quote_cache['broker_ts'] = quote['broker_ts'] - - # l1 book events - elif ticktype in ('ask', 'asize'): - quote_cache['ask'] = price - quote_cache['asize'] = size - - elif ticktype in ('bid', 'bsize'): - quote_cache['bid'] = price - quote_cache['bsize'] = size - - a = quote_to_marketstore_structarray( - quote_cache, - last_fill=quote.get('broker_ts', None) - ) log.info(a) - # breakpoint() await ms_client.write(symbol, a) From 943b02573da44919ea762098910bc3c64ae55e7c Mon Sep 17 00:00:00 2001 From: Guillermo Rodriguez Date: Mon, 17 Jan 2022 17:47:20 -0300 Subject: [PATCH 003/105] Still WIP, switch to using new marketstore client, missing streaming from marketstore --- piker/data/cli.py | 101 ++++++------ piker/data/marketstore.py | 315 ++++++++++++++++---------------------- 2 files changed, 183 insertions(+), 233 deletions(-) diff --git a/piker/data/cli.py b/piker/data/cli.py index 2206bd6a..21416a80 100644 --- a/piker/data/cli.py +++ b/piker/data/cli.py @@ -25,11 +25,13 @@ import trio import tractor import click +from anyio_marketstore import open_marketstore_client + from .marketstore import ( get_client, - stream_quotes, + # stream_quotes, ingest_quote_stream, - _url, + # _url, _tick_tbk_ids, mk_tbk, ) @@ -54,46 +56,47 @@ def ms_stream(config: dict, names: List[str], url: str): and print to console. """ async def main(): - async for quote in stream_quotes(symbols=names): - log.info(f"Received quote:\n{quote}") + # async for quote in stream_quotes(symbols=names): + # log.info(f"Received quote:\n{quote}") + ... trio.run(main) -@cli.command() -@click.option( - '--url', - default=_url, - help='HTTP URL of marketstore instance' -) -@click.argument('names', nargs=-1) -@click.pass_obj -def ms_destroy(config: dict, names: List[str], url: str) -> None: - """Destroy symbol entries in the local marketstore instance. - """ - async def main(): - nonlocal names - async with get_client(url) as client: - - if not names: - names = await client.list_symbols() - - # default is to wipe db entirely. - answer = input( - "This will entirely wipe you local marketstore db @ " - f"{url} of the following symbols:\n {pformat(names)}" - "\n\nDelete [N/y]?\n") - - if answer == 'y': - for sym in names: - # tbk = _tick_tbk.format(sym) - tbk = tuple(sym, *_tick_tbk_ids) - print(f"Destroying {tbk}..") - await client.destroy(mk_tbk(tbk)) - else: - print("Nothing deleted.") - - tractor.run(main) +# @cli.command() +# @click.option( +# '--url', +# default=_url, +# help='HTTP URL of marketstore instance' +# ) +# @click.argument('names', nargs=-1) +# @click.pass_obj +# def ms_destroy(config: dict, names: List[str], url: str) -> None: +# """Destroy symbol entries in the local marketstore instance. +# """ +# async def main(): +# nonlocal names +# async with get_client(url) as client: +# +# if not names: +# names = await client.list_symbols() +# +# # default is to wipe db entirely. +# answer = input( +# "This will entirely wipe you local marketstore db @ " +# f"{url} of the following symbols:\n {pformat(names)}" +# "\n\nDelete [N/y]?\n") +# +# if answer == 'y': +# for sym in names: +# # tbk = _tick_tbk.format(sym) +# tbk = tuple(sym, *_tick_tbk_ids) +# print(f"Destroying {tbk}..") +# await client.destroy(mk_tbk(tbk)) +# else: +# print("Nothing deleted.") +# +# tractor.run(main) @cli.command() @@ -102,17 +105,19 @@ def ms_destroy(config: dict, names: List[str], url: str) -> None: is_flag=True, help='Enable tractor logging') @click.option( - '--url', - default=_url, - help='HTTP URL of marketstore instance' + '--host', + default='localhost' +) +@click.option( + '--port', + default=5995 ) -@click.argument('name', nargs=1, required=True) @click.pass_obj -def ms_shell(config, name, tl, url): +def ms_shell(config, tl, host, port): """Start an IPython shell ready to query the local marketstore db. """ async def main(): - async with get_client(url) as client: + async with open_marketstore_client(host, port) as client: query = client.query # noqa # TODO: write magics to query marketstore from IPython import embed @@ -124,15 +129,9 @@ def ms_shell(config, name, tl, url): @cli.command() @click.option('--test-file', '-t', help='Test quote stream file') @click.option('--tl', is_flag=True, help='Enable tractor logging') -@click.option('--tl', is_flag=True, help='Enable tractor logging') -@click.option( - '--url', - default=_url, - help='HTTP URL of marketstore instance' -) @click.argument('name', nargs=1, required=True) @click.pass_obj -def ingest(config, name, test_file, tl, url): +def ingest(config, name, test_file, tl): """Ingest real-time broker quotes and ticks to a marketstore instance. """ # global opts diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index 1d271452..3f75c994 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -14,7 +14,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -""" +''' ``marketstore`` integration. - client management routines @@ -22,7 +22,7 @@ - websocket client for subscribing to write triggers - todo: tick sequence stream-cloning for testing - todo: docker container management automation -""" +''' from contextlib import asynccontextmanager from typing import Dict, Any, List, Callable, Tuple, Optional import time @@ -31,9 +31,9 @@ from math import isnan import msgpack import numpy as np import pandas as pd -import pymarketstore as pymkts import tractor from trio_websocket import open_websocket_url +from anyio_marketstore import open_marketstore_client, MarketstoreClient from ..log import get_logger, get_console_log from ..data import open_feed @@ -43,7 +43,7 @@ log = get_logger(__name__) _tick_tbk_ids: Tuple[str, str] = ('1Sec', 'TICK') _tick_tbk: str = '{}/' + '/'.join(_tick_tbk_ids) -_url: str = 'http://localhost:5993/rpc' + _quote_dt = [ # these two are required for as a "primary key" ('Epoch', 'i8'), @@ -51,34 +51,27 @@ _quote_dt = [ ('IsTrade', 'i1'), ('IsBid', 'i1'), - ('Price', 'f8'), - ('Size', 'f8') + ('Price', 'f4'), + ('Size', 'f4') ] -_quote_tmp = {}.fromkeys(dict(_quote_dt).keys(), np.nan) -class MarketStoreError(Exception): - "Generic marketstore client error" - - -def err_on_resp(response: dict) -> None: - """Raise any errors found in responses from client request. +def mk_tbk(keys: Tuple[str, str, str]) -> str: + """Generate a marketstore table key from a tuple. + Converts, + ``('SPY', '1Sec', 'TICK')`` -> ``"SPY/1Sec/TICK"``` """ - responses = response['responses'] - if responses is not None: - for r in responses: - err = r['error'] - if err: - raise MarketStoreError(err) + return '{}/' + '/'.join(keys) def quote_to_marketstore_structarray( quote: Dict[str, Any], - last_fill: Optional[float], - + last_fill: Optional[float] ) -> np.array: - """Return marketstore writeable structarray from quote ``dict``. - """ + ''' + Return marketstore writeable structarray from quote ``dict``. + ''' + if last_fill: # new fill bby now = timestamp(last_fill, unit='s') @@ -112,82 +105,21 @@ def quote_to_marketstore_structarray( def timestamp(date, **kwargs) -> int: - """Return marketstore compatible 'Epoch' integer in nanoseconds + ''' + Return marketstore compatible 'Epoch' integer in nanoseconds from a date formatted str. - """ + ''' + return int(pd.Timestamp(date, **kwargs).value) -def mk_tbk(keys: Tuple[str, str, str]) -> str: - """Generate a marketstore table key from a tuple. - - Converts, - ``('SPY', '1Sec', 'TICK')`` -> ``"SPY/1Sec/TICK"``` - """ - return '{}/' + '/'.join(keys) - - -class Client: - """Async wrapper around the alpaca ``pymarketstore`` sync client. - - This will server as the shell for building out a proper async client - that isn't horribly documented and un-tested.. - """ - def __init__(self, url: str): - self._client = pymkts.Client(url) - - async def _invoke( - self, - meth: Callable, - *args, - **kwargs, - ) -> Any: - return err_on_resp(meth(*args, **kwargs)) - - async def destroy( - self, - tbk: Tuple[str, str, str], - ) -> None: - return await self._invoke(self._client.destroy, mk_tbk(tbk)) - - async def list_symbols( - self, - tbk: str, - ) -> List[str]: - return await self._invoke(self._client.list_symbols, mk_tbk(tbk)) - - async def write( - self, - symbol: str, - array: np.ndarray, - ) -> None: - start = time.time() - await self._invoke( - self._client.write, - array, - _tick_tbk.format(symbol), - isvariablelength=True - ) - log.debug(f"{symbol} write time (s): {time.time() - start}") - - def query( - self, - symbol, - tbk: Tuple[str, str] = _tick_tbk_ids, - ) -> pd.DataFrame: - # XXX: causes crash - # client.query(pymkts.Params(symbol, '*', 'OHCLV' - result = self._client.query( - pymkts.Params(symbol, *tbk), - ) - return result.first().df() - - @asynccontextmanager async def get_client( - url: str = _url, -) -> Client: - yield Client(url) + host: str = 'localhost', + port: int = 5995 +) -> MarketstoreClient: + async with open_marketstore_client(host, port) as client: + yield client async def ingest_quote_stream( @@ -196,8 +128,9 @@ async def ingest_quote_stream( tries: int = 1, actorloglevel: str = None, ) -> None: - """Ingest a broker quote stream into marketstore in (sampled) tick format. - """ + ''' + Ingest a broker quote stream into marketstore. + ''' async with ( open_feed(brokername, symbols, loglevel=actorloglevel) as feed, get_client() as ms_client @@ -212,107 +145,125 @@ async def ingest_quote_stream( # okkk.. continue - a = quote_to_marketstore_structarray({ + array = quote_to_marketstore_structarray({ 'IsTrade': 1 if ticktype == 'trade' else 0, 'IsBid': 1 if ticktype in ('bid', 'bsize') else 0, 'Price': tick.get('price'), 'Size': tick.get('size') }, last_fill=quote.get('broker_ts', None)) - log.info(a) - await ms_client.write(symbol, a) - + await ms_client.write( + array, _tick_tbk) + async def stream_quotes( symbols: List[str], + timeframe: str = '1Min', + attr_group: str = 'TICK', host: str = 'localhost', port: int = 5993, - diff_cached: bool = True, - loglevel: str = None, + loglevel: str = None ) -> None: - """Open a symbol stream from a running instance of marketstore and + ''' + Open a symbol stream from a running instance of marketstore and log to console. - """ - # XXX: required to propagate ``tractor`` loglevel to piker logging - get_console_log(loglevel or tractor.current_actor().loglevel) + ''' - tbks: Dict[str, str] = {sym: f"{sym}/*/*" for sym in symbols} + tbks: Dict[str, str] = { + sym: f'{sym}/{timeframe}/{attr_group}' for sym in symbols} - async with open_websocket_url(f'ws://{host}:{port}/ws') as ws: - # send subs topics to server - resp = await ws.send_message( - msgpack.dumps({'streams': list(tbks.values())}) - ) - log.info(resp) - async def recv() -> Dict[str, Any]: - return msgpack.loads((await ws.get_message()), encoding='utf-8') - streams = (await recv())['streams'] - log.info(f"Subscribed to {streams}") - - _cache = {} - - while True: - msg = await recv() - - # unpack symbol and quote data - # key is in format ``//`` - symbol = msg['key'].split('/')[0] - data = msg['data'] - - # calc time stamp(s) - s, ns = data.pop('Epoch'), data.pop('Nanoseconds') - ts = s * 10**9 + ns - data['broker_fill_time_ns'] = ts - - quote = {} - for k, v in data.items(): - if isnan(v): - continue - - quote[k.lower()] = v - - quote['symbol'] = symbol - - quotes = {} - - if diff_cached: - last = _cache.setdefault(symbol, {}) - new = set(quote.items()) - set(last.items()) - if new: - log.info(f"New quote {quote['symbol']}:\n{new}") - - # only ship diff updates and other required fields - payload = {k: quote[k] for k, v in new} - payload['symbol'] = symbol - - # if there was volume likely the last size of - # shares traded is useful info and it's possible - # that the set difference from above will disregard - # a "size" value since the same # of shares were traded - size = quote.get('size') - volume = quote.get('volume') - if size and volume: - new_volume_since_last = max( - volume - last.get('volume', 0), 0) - log.warning( - f"NEW VOLUME {symbol}:{new_volume_since_last}") - payload['size'] = size - payload['last'] = quote.get('last') - - # XXX: we append to a list for the options case where the - # subscription topic (key) is the same for all - # expiries even though this is uncessary for the - # stock case (different topic [i.e. symbol] for each - # quote). - quotes.setdefault(symbol, []).append(payload) - - # update cache - _cache[symbol].update(quote) - else: - quotes = { - symbol: [{key.lower(): val for key, val in quote.items()}]} - - if quotes: - yield quotes +# async def stream_quotes( +# symbols: List[str], +# host: str = 'localhost', +# port: int = 5993, +# diff_cached: bool = True, +# loglevel: str = None, +# ) -> None: +# """Open a symbol stream from a running instance of marketstore and +# log to console. +# """ +# # XXX: required to propagate ``tractor`` loglevel to piker logging +# get_console_log(loglevel or tractor.current_actor().loglevel) +# +# tbks: Dict[str, str] = {sym: f"{sym}/*/*" for sym in symbols} +# +# async with open_websocket_url(f'ws://{host}:{port}/ws') as ws: +# # send subs topics to server +# resp = await ws.send_message( +# msgpack.dumps({'streams': list(tbks.values())}) +# ) +# log.info(resp) +# +# async def recv() -> Dict[str, Any]: +# return msgpack.loads((await ws.get_message()), encoding='utf-8') +# +# streams = (await recv())['streams'] +# log.info(f"Subscribed to {streams}") +# +# _cache = {} +# +# while True: +# msg = await recv() +# +# # unpack symbol and quote data +# # key is in format ``//`` +# symbol = msg['key'].split('/')[0] +# data = msg['data'] +# +# # calc time stamp(s) +# s, ns = data.pop('Epoch'), data.pop('Nanoseconds') +# ts = s * 10**9 + ns +# data['broker_fill_time_ns'] = ts +# +# quote = {} +# for k, v in data.items(): +# if isnan(v): +# continue +# +# quote[k.lower()] = v +# +# quote['symbol'] = symbol +# +# quotes = {} +# +# if diff_cached: +# last = _cache.setdefault(symbol, {}) +# new = set(quote.items()) - set(last.items()) +# if new: +# log.info(f"New quote {quote['symbol']}:\n{new}") +# +# # only ship diff updates and other required fields +# payload = {k: quote[k] for k, v in new} +# payload['symbol'] = symbol +# +# # if there was volume likely the last size of +# # shares traded is useful info and it's possible +# # that the set difference from above will disregard +# # a "size" value since the same # of shares were traded +# size = quote.get('size') +# volume = quote.get('volume') +# if size and volume: +# new_volume_since_last = max( +# volume - last.get('volume', 0), 0) +# log.warning( +# f"NEW VOLUME {symbol}:{new_volume_since_last}") +# payload['size'] = size +# payload['last'] = quote.get('last') +# +# # XXX: we append to a list for the options case where the +# # subscription topic (key) is the same for all +# # expiries even though this is uncessary for the +# # stock case (different topic [i.e. symbol] for each +# # quote). +# quotes.setdefault(symbol, []).append(payload) +# +# # update cache +# _cache[symbol].update(quote) +# else: +# quotes = { +# symbol: [{key.lower(): val for key, val in quote.items()}]} +# +# if quotes: +# yield quotes From aca3ca8aa65ffc106f99ad2a256d75be351c1df9 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 15 Feb 2022 13:03:11 -0500 Subject: [PATCH 004/105] Basic module-script for spawning `marketstore`, needs correct bind mount usage --- piker/data/_ahab.py | 151 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 piker/data/_ahab.py diff --git a/piker/data/_ahab.py b/piker/data/_ahab.py new file mode 100644 index 00000000..591c19a9 --- /dev/null +++ b/piker/data/_ahab.py @@ -0,0 +1,151 @@ +# piker: trading gear for hackers +# Copyright (C) 2018-present Tyler Goodlet (in stewardship of piker0) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +Supervisor for docker with included specific-image service helpers. + +''' +from typing import Optional +from contextlib import contextmanager as cm +# import time + +import trio +import tractor +import docker +import json +# from docker.containers import Container +from requests import ConnectionError + +from ..log import get_logger, get_console_log + +log = get_logger(__name__) + + +_config = ''' +# mount this config using: +# sudo docker run --mount type=bind,source="$HOME/.config/piker/",target="/etc" -i -p 5993:5993 alpacamarkets/marketstore:latest +root_directory: data +listen_port: 5993 +grpc_listen_port: 5995 +log_level: info +queryable: true +stop_grace_period: 0 +wal_rotate_interval: 5 +stale_threshold: 5 +enable_add: true +enable_remove: false + +triggers: + - module: ondiskagg.so + on: "*/1Sec/OHLCV" + config: + # filter: "nasdaq" + destinations: + - 1Min + - 5Min + - 15Min + - 1H + - 1D + + - module: stream.so + on: '*/*/*' + # config: + # filter: "nasdaq" + +''' + + +@cm +def open_docker( + url: Optional[str] = None, + **kwargs, + +) -> docker.DockerClient: + + # yield docker.Client( + # base_url=url, + # **kwargs + # ) if url else + yield docker.from_env(**kwargs) + + +@tractor.context +async def open_marketstore_container( + ctx: tractor.Context, + **kwargs, + +) -> None: + log = get_console_log('info', name=__name__) + # this cli should "just work" + # sudo docker run --mount + # type=bind,source="$HOME/.config/piker/",target="/etc" -i -p + # 5993:5993 alpacamarkets/marketstore:latest + client = docker.from_env(**kwargs) + + # with open_docker() as client: + ctnr = client.containers.run( + 'alpacamarkets/marketstore:latest', + [ + '--mount', + 'type=bind,source="$HOME/.config/piker/",target="/etc"', + '-i', + '-p 5993:5993', + ], + detach=True, + ) + started: bool = False + logs = ctnr.logs(stream=True) + + with trio.move_on_after(0.5): + for entry in logs: + entry = entry.decode() + try: + record = json.loads(entry.strip()) + except json.JSONDecodeError: + if 'Error' in entry: + raise RuntimeError(entry) + # await tractor.breakpoint() + msg = record['msg'] + + if "launching tcp listener for all services..." in msg: + started = True + break + + await trio.sleep(0) + + if not started and ctnr not in client.containers.list(): + raise RuntimeError( + 'Failed to start `marketstore` check logs output for deats' + ) + + await ctx.started() + await tractor.breakpoint() + + +async def main(): + async with tractor.open_nursery( + loglevel='info', + ) as tn: + portal = await tn.start_actor('ahab', enable_modules=[__name__]) + + async with portal.open_context( + open_marketstore_container + + ) as (first, ctx): + await trio.sleep_forever() + +if __name__ == '__main__': + trio.run(main) From 1cdb94374c961bd9a395a7ca41ccea2de878d2c5 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 15 Feb 2022 14:03:24 -0500 Subject: [PATCH 005/105] Extract non-sudo user for config dir path --- piker/cli/__init__.py | 20 +++----------------- piker/config.py | 28 +++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/piker/cli/__init__.py b/piker/cli/__init__.py index 7eb7b5d1..3dc946b8 100644 --- a/piker/cli/__init__.py +++ b/piker/cli/__init__.py @@ -16,20 +16,6 @@ from .. import config log = get_logger('cli') DEFAULT_BROKER = 'questrade' -_config_dir = click.get_app_dir('piker') -_watchlists_data_path = os.path.join(_config_dir, 'watchlists.json') -_context_defaults = dict( - default_map={ - # Questrade specific quote poll rates - 'monitor': { - 'rate': 3, - }, - 'optschain': { - 'rate': 1, - }, - } -) - @click.command() @click.option('--loglevel', '-l', default='warning', help='Logging level') @@ -58,7 +44,7 @@ def pikerd(loglevel, host, tl, pdb): trio.run(main) -@click.group(context_settings=_context_defaults) +@click.group(context_settings=config._context_defaults) @click.option( '--brokers', '-b', default=[DEFAULT_BROKER], @@ -87,8 +73,8 @@ def cli(ctx, brokers, loglevel, tl, configdir): 'loglevel': loglevel, 'tractorloglevel': None, 'log': get_console_log(loglevel), - 'confdir': _config_dir, - 'wl_path': _watchlists_data_path, + 'confdir': config._config_dir, + 'wl_path': config._watchlists_data_path, }) # allow enabling same loglevel in ``tractor`` machinery diff --git a/piker/config.py b/piker/config.py index 93a47378..836da432 100644 --- a/piker/config.py +++ b/piker/config.py @@ -30,8 +30,34 @@ from .log import get_logger log = get_logger('broker-config') -_config_dir = click.get_app_dir('piker') +_config_dir = _click_config_dir = click.get_app_dir('piker') +_parent_user = os.environ.get('SUDO_USER') + +if _parent_user: + non_root_user_dir = os.path.expanduser( + f'~{_parent_user}' + ) + root = 'root' + _config_dir = ( + non_root_user_dir + + _click_config_dir[ + _click_config_dir.rfind(root) + len(root): + ] + ) + _file_name = 'brokers.toml' +_watchlists_data_path = os.path.join(_config_dir, 'watchlists.json') +_context_defaults = dict( + default_map={ + # Questrade specific quote poll rates + 'monitor': { + 'rate': 3, + }, + 'optschain': { + 'rate': 1, + }, + } +) def _override_config_dir( From fbd3d1e308f6cd244bdc041640234bf25e975afc Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 15 Feb 2022 22:07:50 -0500 Subject: [PATCH 006/105] Add a super simple `marketstore` container supervisor --- piker/data/_ahab.py | 190 ++++++++++++++++++++++++++++++-------------- 1 file changed, 131 insertions(+), 59 deletions(-) diff --git a/piker/data/_ahab.py b/piker/data/_ahab.py index 591c19a9..08c0bcd1 100644 --- a/piker/data/_ahab.py +++ b/piker/data/_ahab.py @@ -18,25 +18,33 @@ Supervisor for docker with included specific-image service helpers. ''' -from typing import Optional -from contextlib import contextmanager as cm +from typing import ( + Optional, + # Any, +) +from contextlib import asynccontextmanager as acm # import time import trio import tractor import docker import json -# from docker.containers import Container -from requests import ConnectionError +from docker.models.containers import Container -from ..log import get_logger, get_console_log +from ..log import get_logger # , get_console_log +from ..config import _config_dir log = get_logger(__name__) _config = ''' +# piker's ``marketstore`` config. + # mount this config using: -# sudo docker run --mount type=bind,source="$HOME/.config/piker/",target="/etc" -i -p 5993:5993 alpacamarkets/marketstore:latest +# sudo docker run --mount \ +# type=bind,source="$HOME/.config/piker/",target="/etc" -i -p \ +# 5993:5993 alpacamarkets/marketstore:latest + root_directory: data listen_port: 5993 grpc_listen_port: 5995 @@ -68,18 +76,58 @@ triggers: ''' -@cm -def open_docker( +@acm +async def open_docker( url: Optional[str] = None, **kwargs, ) -> docker.DockerClient: - # yield docker.Client( - # base_url=url, - # **kwargs - # ) if url else - yield docker.from_env(**kwargs) + client = docker.DockerClient( + base_url=url, + **kwargs + ) if url else docker.from_env(**kwargs) + + try: + yield client + finally: + # for c in client.containers.list(): + # c.kill() + client.close() + # client.api._custom_adapter.close() + + +# async def waitfor( +# cntr: Container, +# attr_path: tuple[str], +# expect=None, +# timeout: float = 0.5, + +# ) -> Any: +# ''' +# Wait for a container's attr value to be set. If ``expect`` is +# provided wait for the value to be set to that value. + +# This is an async version of the helper from our ``pytest-dockerctl`` +# plugin. + +# ''' +# def get(val, path): +# for key in path: +# val = val[key] +# return val + +# start = time.time() +# while time.time() - start < timeout: +# cntr.reload() +# val = get(cntr.attrs, attr_path) +# if expect is None and val: +# return val +# elif val == expect: +# return val +# else: +# raise TimeoutError("{} failed to be {}, value: \"{}\"".format( +# attr_path, expect if expect else 'not None', val)) @tractor.context @@ -88,64 +136,88 @@ async def open_marketstore_container( **kwargs, ) -> None: - log = get_console_log('info', name=__name__) - # this cli should "just work" - # sudo docker run --mount - # type=bind,source="$HOME/.config/piker/",target="/etc" -i -p - # 5993:5993 alpacamarkets/marketstore:latest - client = docker.from_env(**kwargs) + ''' + Start and supervise a marketstore instance with its config bind-mounted + in from the piker config directory on the system. - # with open_docker() as client: - ctnr = client.containers.run( - 'alpacamarkets/marketstore:latest', - [ - '--mount', - 'type=bind,source="$HOME/.config/piker/",target="/etc"', - '-i', - '-p 5993:5993', - ], - detach=True, - ) - started: bool = False - logs = ctnr.logs(stream=True) + The equivalent cli cmd to this code is: - with trio.move_on_after(0.5): - for entry in logs: - entry = entry.decode() - try: - record = json.loads(entry.strip()) - except json.JSONDecodeError: - if 'Error' in entry: - raise RuntimeError(entry) - # await tractor.breakpoint() - msg = record['msg'] + sudo docker run --mount \ + type=bind,source="$HOME/.config/piker/",target="/etc" -i -p \ + 5993:5993 alpacamarkets/marketstore:latest - if "launching tcp listener for all services..." in msg: - started = True - break + ''' + # log = get_console_log('info', name=__name__) - await trio.sleep(0) - - if not started and ctnr not in client.containers.list(): - raise RuntimeError( - 'Failed to start `marketstore` check logs output for deats' + # client = docker.from_env(**kwargs) + async with open_docker() as client: + # create a mount from user's local piker config dir into container + config_dir_mnt = docker.types.Mount( + target='/etc', + source=_config_dir, + type='bind', ) - await ctx.started() - await tractor.breakpoint() + cntr: Container = client.containers.run( + 'alpacamarkets/marketstore:latest', + # do we need this for cmds? + # '-i', + + # '-p 5993:5993', + ports={'5993/tcp': 5993}, + mounts=[config_dir_mnt], + detach=True, + # stop_signal='SIGINT', + # init=True, + # remove=True, + ) + try: + started: bool = False + logs = cntr.logs(stream=True) + + with trio.move_on_after(0.5): + for entry in logs: + entry = entry.decode() + try: + record = json.loads(entry.strip()) + except json.JSONDecodeError: + if 'Error' in entry: + raise RuntimeError(entry) + msg = record['msg'] + + if "launching tcp listener for all services..." in msg: + started = True + break + + await trio.sleep(0) + + if not started and cntr not in client.containers.list(): + raise RuntimeError( + 'Failed to start `marketstore` check logs output for deats' + ) + + await ctx.started() + await trio.sleep_forever() + + finally: + cntr.stop() async def main(): + async with tractor.open_nursery( - loglevel='info', + loglevel='runtime', ) as tn: - portal = await tn.start_actor('ahab', enable_modules=[__name__]) - - async with portal.open_context( - open_marketstore_container - - ) as (first, ctx): + async with ( + ( + await tn.start_actor('ahab', enable_modules=[__name__]) + ).open_context( + open_marketstore_container + ) as (ctx, first), + ): + assert not first await trio.sleep_forever() + if __name__ == '__main__': trio.run(main) From 9203ebe04445bdb24a84d3c38ed858794a796ac4 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 16 Feb 2022 08:53:54 -0500 Subject: [PATCH 007/105] Drop import, it's got madness with and SIGINT? --- piker/config.py | 66 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/piker/config.py b/piker/config.py index 836da432..cf946405 100644 --- a/piker/config.py +++ b/piker/config.py @@ -17,6 +17,8 @@ """ Broker configuration mgmt. """ +import platform +import sys import os from os.path import dirname import shutil @@ -24,13 +26,73 @@ from typing import Optional from bidict import bidict import toml -import click from .log import get_logger log = get_logger('broker-config') -_config_dir = _click_config_dir = click.get_app_dir('piker') + +# taken from ``click`` since apparently they have some +# super weirdness with sigint and sudo..no clue +def get_app_dir(app_name, roaming=True, force_posix=False): + r"""Returns the config folder for the application. The default behavior + is to return whatever is most appropriate for the operating system. + + To give you an idea, for an app called ``"Foo Bar"``, something like + the following folders could be returned: + + Mac OS X: + ``~/Library/Application Support/Foo Bar`` + Mac OS X (POSIX): + ``~/.foo-bar`` + Unix: + ``~/.config/foo-bar`` + Unix (POSIX): + ``~/.foo-bar`` + Win XP (roaming): + ``C:\Documents and Settings\\Local Settings\Application Data\Foo Bar`` + Win XP (not roaming): + ``C:\Documents and Settings\\Application Data\Foo Bar`` + Win 7 (roaming): + ``C:\Users\\AppData\Roaming\Foo Bar`` + Win 7 (not roaming): + ``C:\Users\\AppData\Local\Foo Bar`` + + .. versionadded:: 2.0 + + :param app_name: the application name. This should be properly capitalized + and can contain whitespace. + :param roaming: controls if the folder should be roaming or not on Windows. + Has no affect otherwise. + :param force_posix: if this is set to `True` then on any POSIX system the + folder will be stored in the home folder with a leading + dot instead of the XDG config home or darwin's + application support folder. + """ + + def _posixify(name): + return "-".join(name.split()).lower() + + # if WIN: + if platform.system() == 'Windows': + key = "APPDATA" if roaming else "LOCALAPPDATA" + folder = os.environ.get(key) + if folder is None: + folder = os.path.expanduser("~") + return os.path.join(folder, app_name) + if force_posix: + return os.path.join(os.path.expanduser("~/.{}".format(_posixify(app_name)))) + if sys.platform == "darwin": + return os.path.join( + os.path.expanduser("~/Library/Application Support"), app_name + ) + return os.path.join( + os.environ.get("XDG_CONFIG_HOME", os.path.expanduser("~/.config")), + _posixify(app_name), + ) + + +_config_dir = _click_config_dir = get_app_dir('piker') _parent_user = os.environ.get('SUDO_USER') if _parent_user: From ec413541d32d73eb1e6abcb76a2c6c5ac2f8da7a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 16 Feb 2022 08:55:22 -0500 Subject: [PATCH 008/105] Drop old client instantiate line --- piker/data/_ahab.py | 1 - 1 file changed, 1 deletion(-) diff --git a/piker/data/_ahab.py b/piker/data/_ahab.py index 08c0bcd1..1e09a65a 100644 --- a/piker/data/_ahab.py +++ b/piker/data/_ahab.py @@ -149,7 +149,6 @@ async def open_marketstore_container( ''' # log = get_console_log('info', name=__name__) - # client = docker.from_env(**kwargs) async with open_docker() as client: # create a mount from user's local piker config dir into container config_dir_mnt = docker.types.Mount( From 7d2e9bff46f57e3a4277b07b4474253ad93c78a7 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 16 Feb 2022 12:16:07 -0500 Subject: [PATCH 009/105] Type annot updates --- piker/data/_sampling.py | 4 +++- piker/data/feed.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/piker/data/_sampling.py b/piker/data/_sampling.py index d31bf7b1..4228f809 100644 --- a/piker/data/_sampling.py +++ b/piker/data/_sampling.py @@ -22,14 +22,16 @@ financial data flows. from __future__ import annotations from collections import Counter import time +from typing import TYPE_CHECKING import tractor import trio from trio_typing import TaskStatus -from ._sharedmem import ShmArray from ..log import get_logger +if TYPE_CHECKING: + from ._sharedmem import ShmArray log = get_logger(__name__) diff --git a/piker/data/feed.py b/piker/data/feed.py index 260cab9b..19504204 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -20,6 +20,7 @@ Data feed apis and infra. This module is enabled for ``brokerd`` daemons. """ +from __future__ import annotations from dataclasses import dataclass, field from contextlib import asynccontextmanager from functools import partial From faa5a785cb0ce1ec3c904a97cca73518f84561f4 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 17 Feb 2022 13:32:51 -0500 Subject: [PATCH 010/105] Add explicit no-docker error and supervisor start task-func --- piker/data/_ahab.py | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/piker/data/_ahab.py b/piker/data/_ahab.py index 1e09a65a..59e447c1 100644 --- a/piker/data/_ahab.py +++ b/piker/data/_ahab.py @@ -23,9 +23,11 @@ from typing import ( # Any, ) from contextlib import asynccontextmanager as acm +from requests.exceptions import ConnectionError # import time import trio +from trio_typing import TaskStatus import tractor import docker import json @@ -75,6 +77,8 @@ triggers: ''' +class DockerNotStarted(Exception): + 'Prolly you dint start da daemon bruh' @acm async def open_docker( @@ -83,10 +87,16 @@ async def open_docker( ) -> docker.DockerClient: - client = docker.DockerClient( - base_url=url, - **kwargs - ) if url else docker.from_env(**kwargs) + try: + client = docker.DockerClient( + base_url=url, + **kwargs + ) if url else docker.from_env(**kwargs) + except ( + ConnectionError, + docker.errors.DockerException, + ): + raise DockerNotStarted('!?!?') try: yield client @@ -131,7 +141,7 @@ async def open_docker( @tractor.context -async def open_marketstore_container( +async def open_marketstore( ctx: tractor.Context, **kwargs, @@ -195,15 +205,20 @@ async def open_marketstore_container( 'Failed to start `marketstore` check logs output for deats' ) - await ctx.started() + await ctx.started(cntr.id) await trio.sleep_forever() finally: cntr.stop() -async def main(): +async def start_ahab( + task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED, +) -> None: + + cn_ready = trio.Event() + task_status.started(cn_ready) async with tractor.open_nursery( loglevel='runtime', ) as tn: @@ -211,12 +226,19 @@ async def main(): ( await tn.start_actor('ahab', enable_modules=[__name__]) ).open_context( - open_marketstore_container + open_marketstore, ) as (ctx, first), ): - assert not first + assert str(first) + + # run till cancelled await trio.sleep_forever() +async def main(): + await start_ahab() + await trio.sleep_forever() + + if __name__ == '__main__': trio.run(main) From aecc5973fa3b5d711ef5876163ceac13cd2a44bf Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 17 Feb 2022 13:45:40 -0500 Subject: [PATCH 011/105] Handle the non-root perms case specifically too --- piker/data/_ahab.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/piker/data/_ahab.py b/piker/data/_ahab.py index 59e447c1..07310c4e 100644 --- a/piker/data/_ahab.py +++ b/piker/data/_ahab.py @@ -23,7 +23,6 @@ from typing import ( # Any, ) from contextlib import asynccontextmanager as acm -from requests.exceptions import ConnectionError # import time import trio @@ -32,6 +31,8 @@ import tractor import docker import json from docker.models.containers import Container +from docker.errors import DockerException +from requests.exceptions import ConnectionError from ..log import get_logger # , get_console_log from ..config import _config_dir @@ -87,24 +88,31 @@ async def open_docker( ) -> docker.DockerClient: + client: Optional[docker.DockerClient] = None try: client = docker.DockerClient( base_url=url, **kwargs ) if url else docker.from_env(**kwargs) - except ( - ConnectionError, - docker.errors.DockerException, - ): + yield client + except ConnectionError: + # prolly no daemon started raise DockerNotStarted('!?!?') - try: - yield client + except DockerException as err: + # could be more specific so let's check if it's just perms. + if 'PermissionError' in err.args[0]: + raise DockerException('You dint run as root yo!') + + # not perms? + raise + finally: - # for c in client.containers.list(): - # c.kill() - client.close() - # client.api._custom_adapter.close() + if client: + # for c in client.containers.list(): + # c.kill() + client.close() + # client.api._custom_adapter.close() # async def waitfor( From 7395b563216db51d15555faaa5d43710cf0be561 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 17 Feb 2022 14:29:04 -0500 Subject: [PATCH 012/105] De-escalate sudo perms in `pikerd` once docker spawns --- piker/data/_ahab.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/piker/data/_ahab.py b/piker/data/_ahab.py index 07310c4e..baa396ab 100644 --- a/piker/data/_ahab.py +++ b/piker/data/_ahab.py @@ -18,6 +18,7 @@ Supervisor for docker with included specific-image service helpers. ''' +import os from typing import ( Optional, # Any, @@ -35,7 +36,7 @@ from docker.errors import DockerException from requests.exceptions import ConnectionError from ..log import get_logger # , get_console_log -from ..config import _config_dir +from .. import config log = get_logger(__name__) @@ -78,9 +79,11 @@ triggers: ''' + class DockerNotStarted(Exception): 'Prolly you dint start da daemon bruh' + @acm async def open_docker( url: Optional[str] = None, @@ -171,7 +174,7 @@ async def open_marketstore( # create a mount from user's local piker config dir into container config_dir_mnt = docker.types.Mount( target='/etc', - source=_config_dir, + source=config._config_dir, type='bind', ) @@ -226,19 +229,26 @@ async def start_ahab( ) -> None: cn_ready = trio.Event() - task_status.started(cn_ready) async with tractor.open_nursery( loglevel='runtime', ) as tn: - async with ( - ( - await tn.start_actor('ahab', enable_modules=[__name__]) - ).open_context( - open_marketstore, - ) as (ctx, first), - ): - assert str(first) + portal = await tn.start_actor('ahab', enable_modules=[__name__]) + if config._parent_user: + import pwd + os.setuid( + pwd.getpwnam( + config._parent_user + )[2] # named user's uid + ) + + task_status.started(cn_ready) + + async with portal.open_context( + open_marketstore, + ) as (ctx, first): + + assert str(first) # run till cancelled await trio.sleep_forever() From facc86f76e74e253cd40730ef7f22274af7a6dfc Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 17 Feb 2022 14:29:41 -0500 Subject: [PATCH 013/105] Add `--tsdb` flag to start `marketstore` with `pikerd` --- piker/cli/__init__.py | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/piker/cli/__init__.py b/piker/cli/__init__.py index 3dc946b8..581c6d37 100644 --- a/piker/cli/__init__.py +++ b/piker/cli/__init__.py @@ -22,9 +22,16 @@ DEFAULT_BROKER = 'questrade' @click.option('--tl', is_flag=True, help='Enable tractor logging') @click.option('--pdb', is_flag=True, help='Enable tractor debug mode') @click.option('--host', '-h', default='127.0.0.1', help='Host address to bind') -def pikerd(loglevel, host, tl, pdb): - """Spawn the piker broker-daemon. - """ +@click.option( + '--tsdb', + is_flag=True, + help='Enable local ``marketstore`` instance' +) +def pikerd(loglevel, host, tl, pdb, tsdb): + ''' + Spawn the piker broker-daemon. + + ''' from .._daemon import open_pikerd log = get_console_log(loglevel) @@ -38,7 +45,21 @@ def pikerd(loglevel, host, tl, pdb): )) async def main(): - async with open_pikerd(loglevel=loglevel, debug_mode=pdb): + + async with ( + open_pikerd( + loglevel=loglevel, + debug_mode=pdb, + ) as services, + trio.open_nursery() as n, + ): + if tsdb: + from piker.data._ahab import start_ahab + log.info('Spawning `marketstore` supervisor') + ctn_ready = await n.start(start_ahab) + await ctn_ready.wait() + log.info('`marketstore` container:{uid} up') + await trio.sleep_forever() trio.run(main) From ed5bae0e11ec9c910f7b83c670f1922d0aab1557 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 17 Feb 2022 16:31:37 -0500 Subject: [PATCH 014/105] Py3.9+ type updates --- piker/data/marketstore.py | 50 +++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index 3f75c994..c9fc5d16 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -24,14 +24,14 @@ - todo: docker container management automation ''' from contextlib import asynccontextmanager -from typing import Dict, Any, List, Callable, Tuple, Optional +from typing import Any, Optional import time -from math import isnan +# from math import isnan -import msgpack +# import msgpack import numpy as np import pandas as pd -import tractor +# import tractor from trio_websocket import open_websocket_url from anyio_marketstore import open_marketstore_client, MarketstoreClient @@ -41,7 +41,7 @@ from ..data import open_feed log = get_logger(__name__) -_tick_tbk_ids: Tuple[str, str] = ('1Sec', 'TICK') +_tick_tbk_ids: tuple[str, str] = ('1Sec', 'TICK') _tick_tbk: str = '{}/' + '/'.join(_tick_tbk_ids) _quote_dt = [ @@ -56,16 +56,18 @@ _quote_dt = [ ] -def mk_tbk(keys: Tuple[str, str, str]) -> str: - """Generate a marketstore table key from a tuple. +def mk_tbk(keys: tuple[str, str, str]) -> str: + ''' + Generate a marketstore table key from a tuple. Converts, ``('SPY', '1Sec', 'TICK')`` -> ``"SPY/1Sec/TICK"``` - """ + + ''' return '{}/' + '/'.join(keys) def quote_to_marketstore_structarray( - quote: Dict[str, Any], + quote: dict[str, Any], last_fill: Optional[float] ) -> np.array: ''' @@ -83,7 +85,7 @@ def quote_to_marketstore_structarray( secs, ns = now / 10**9, now % 10**9 - # pack into List[Tuple[str, Any]] + # pack into list[tuple[str, Any]] array_input = [] # insert 'Epoch' entry first and then 'Nanoseconds'. @@ -123,17 +125,19 @@ async def get_client( async def ingest_quote_stream( - symbols: List[str], + symbols: list[str], brokername: str, tries: int = 1, - actorloglevel: str = None, + loglevel: str = None, + ) -> None: ''' - Ingest a broker quote stream into marketstore. + Ingest a broker quote stream into a ``marketstore`` tsdb. + ''' async with ( - open_feed(brokername, symbols, loglevel=actorloglevel) as feed, - get_client() as ms_client + open_feed(brokername, symbols, loglevel=loglevel) as feed, + get_client() as ms_client, ): async for quotes in feed.stream: log.info(quotes) @@ -152,30 +156,30 @@ async def ingest_quote_stream( 'Size': tick.get('size') }, last_fill=quote.get('broker_ts', None)) - await ms_client.write( - array, _tick_tbk) - + await ms_client.write(array, _tick_tbk) + async def stream_quotes( - symbols: List[str], + symbols: list[str], timeframe: str = '1Min', attr_group: str = 'TICK', host: str = 'localhost', port: int = 5993, loglevel: str = None + ) -> None: ''' Open a symbol stream from a running instance of marketstore and log to console. ''' - tbks: Dict[str, str] = { + tbks: dict[str, str] = { sym: f'{sym}/{timeframe}/{attr_group}' for sym in symbols} # async def stream_quotes( -# symbols: List[str], +# symbols: list[str], # host: str = 'localhost', # port: int = 5993, # diff_cached: bool = True, @@ -187,7 +191,7 @@ async def stream_quotes( # # XXX: required to propagate ``tractor`` loglevel to piker logging # get_console_log(loglevel or tractor.current_actor().loglevel) # -# tbks: Dict[str, str] = {sym: f"{sym}/*/*" for sym in symbols} +# tbks: dict[str, str] = {sym: f"{sym}/*/*" for sym in symbols} # # async with open_websocket_url(f'ws://{host}:{port}/ws') as ws: # # send subs topics to server @@ -196,7 +200,7 @@ async def stream_quotes( # ) # log.info(resp) # -# async def recv() -> Dict[str, Any]: +# async def recv() -> dict[str, Any]: # return msgpack.loads((await ws.get_message()), encoding='utf-8') # # streams = (await recv())['streams'] From 970393bb85c06fb015746f10e9bfe93b3eb29bf4 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 17 Feb 2022 16:34:22 -0500 Subject: [PATCH 015/105] Drop ununsed `Services` ref --- piker/_daemon.py | 8 +++++--- piker/cli/__init__.py | 2 +- piker/data/cli.py | 1 + 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/piker/_daemon.py b/piker/_daemon.py index 053d4864..ff7a129d 100644 --- a/piker/_daemon.py +++ b/piker/_daemon.py @@ -310,7 +310,7 @@ async def maybe_spawn_daemon( **kwargs, ) -> tractor.Portal: - """ + ''' If no ``service_name`` daemon-actor can be found, spawn one in a local subactor and return a portal to it. @@ -321,7 +321,7 @@ async def maybe_spawn_daemon( This can be seen as a service starting api for remote-actor clients. - """ + ''' if loglevel: get_console_log(loglevel) @@ -431,7 +431,9 @@ async def maybe_spawn_brokerd( **kwargs, ) -> tractor.Portal: - '''Helper to spawn a brokerd service. + ''' + Helper to spawn a brokerd service *from* a client + who wishes to use the sub-actor-daemon. ''' async with maybe_spawn_daemon( diff --git a/piker/cli/__init__.py b/piker/cli/__init__.py index 581c6d37..634c6175 100644 --- a/piker/cli/__init__.py +++ b/piker/cli/__init__.py @@ -50,7 +50,7 @@ def pikerd(loglevel, host, tl, pdb, tsdb): open_pikerd( loglevel=loglevel, debug_mode=pdb, - ) as services, + ), # normally delivers a ``Services`` handle trio.open_nursery() as n, ): if tsdb: diff --git a/piker/data/cli.py b/piker/data/cli.py index 21416a80..3839fe65 100644 --- a/piker/data/cli.py +++ b/piker/data/cli.py @@ -16,6 +16,7 @@ """ marketstore cli. + """ from typing import List from functools import partial From 804771410103143b21731a0fd5c7d85323afbfe7 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 18 Feb 2022 07:35:56 -0500 Subject: [PATCH 016/105] Add back in OHLCV dtype template and client side ws streamer --- piker/data/marketstore.py | 308 +++++++++++++++++++++++--------------- 1 file changed, 190 insertions(+), 118 deletions(-) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index c9fc5d16..4298eb96 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -26,12 +26,12 @@ from contextlib import asynccontextmanager from typing import Any, Optional import time -# from math import isnan +from math import isnan -# import msgpack +import msgpack import numpy as np import pandas as pd -# import tractor +import tractor from trio_websocket import open_websocket_url from anyio_marketstore import open_marketstore_client, MarketstoreClient @@ -44,15 +44,29 @@ log = get_logger(__name__) _tick_tbk_ids: tuple[str, str] = ('1Sec', 'TICK') _tick_tbk: str = '{}/' + '/'.join(_tick_tbk_ids) +_tick_dt = [ + # these two are required for as a "primary key" + ('Epoch', 'i8'), + ('Nanoseconds', 'i4'), + ('IsTrade', 'i1'), + ('IsBid', 'i1'), + ('Price', 'f4'), + ('Size', 'f4') +] + _quote_dt = [ # these two are required for as a "primary key" ('Epoch', 'i8'), ('Nanoseconds', 'i4'), - ('IsTrade', 'i1'), - ('IsBid', 'i1'), - ('Price', 'f4'), - ('Size', 'f4') + ('Tick', 'i4'), # do we need this? + ('Last', 'f4'), + ('Bid', 'f4'), + ('Bsize', 'f4'), + ('Asize', 'f4'), + ('Ask', 'f4'), + ('Size', 'i8'), + ('Volume', 'f4'), ] @@ -69,11 +83,12 @@ def mk_tbk(keys: tuple[str, str, str]) -> str: def quote_to_marketstore_structarray( quote: dict[str, Any], last_fill: Optional[float] + ) -> np.array: ''' Return marketstore writeable structarray from quote ``dict``. - ''' + ''' if last_fill: # new fill bby now = timestamp(last_fill, unit='s') @@ -82,7 +97,7 @@ def quote_to_marketstore_structarray( # this should get inserted upstream by the broker-client to # subtract from IPC latency now = time.time_ns() - + secs, ns = now / 10**9, now % 10**9 # pack into list[tuple[str, Any]] @@ -100,7 +115,8 @@ def quote_to_marketstore_structarray( # for ``np.int`` we use 0 as a null value none = 0 - val = quote.get(name, none) + # casefold? see https://github.com/alpacahq/marketstore/issues/324 + val = quote.get(name.casefold(), none) array_input.append(val) return np.array([tuple(array_input)], dtype=_quote_dt) @@ -119,6 +135,7 @@ def timestamp(date, **kwargs) -> int: async def get_client( host: str = 'localhost', port: int = 5995 + ) -> MarketstoreClient: async with open_marketstore_client(host, port) as client: yield client @@ -145,129 +162,184 @@ async def ingest_quote_stream( for tick in quote.get('ticks', ()): ticktype = tick.get('type', 'n/a') - if ticktype == 'n/a': - # okkk.. - continue + # _quote_dt = [ + # # these two are required for as a "primary key" + # ('Epoch', 'i8'), + # ('Nanoseconds', 'i4'), + # ('Tick', 'i4'), + # + # ('Last', 'f4'), + # ('Bid', 'f4'), + # ('Bsize', 'f4'), + # ('Asize', 'f4'), + # ('Ask', 'f4'), + # ('Size', 'i8'), + # ('Volume', 'f4'), + # ] - array = quote_to_marketstore_structarray({ - 'IsTrade': 1 if ticktype == 'trade' else 0, - 'IsBid': 1 if ticktype in ('bid', 'bsize') else 0, - 'Price': tick.get('price'), - 'Size': tick.get('size') - }, last_fill=quote.get('broker_ts', None)) + # techtonic tick write + array = quote_to_marketstore_structarray({ + 'IsTrade': 1 if ticktype == 'trade' else 0, + 'IsBid': 1 if ticktype in ('bid', 'bsize') else 0, + 'Price': tick.get('price'), + 'Size': tick.get('size') + }, last_fill=quote.get('broker_ts', None)) - await ms_client.write(array, _tick_tbk) + await ms_client.write(array, _tick_tbk) + + quote_cache = { + 'size': 0, + 'tick': 0 + } + # start ingest to marketstore + async for quotes in feed.stream: + log.info(quotes) + for symbol, quote in quotes.items(): + + for tick in quote.get('ticks', ()): + ticktype = tick.get('type') + price = tick.get('price') + size = tick.get('size') + + if ticktype == 'n/a' or price == -1: + # okkk.. + continue + + # clearing price event + if ticktype == 'trade': + quote_cache['volume'] = quote['volume'] + quote_cache['last'] = price + # quote_cache['broker_ts'] = quote['broker_ts'] + + # l1 book events + elif ticktype in ('ask', 'asize'): + quote_cache['ask'] = price + quote_cache['asize'] = size + + elif ticktype in ('bid', 'bsize'): + quote_cache['bid'] = price + quote_cache['bsize'] = size + + a = quote_to_marketstore_structarray( + quote_cache, + last_fill=quote.get('broker_ts', None) + ) + log.info(a) + # breakpoint() + await ms_client.write(symbol, a) + + +# async def stream_quotes( +# symbols: list[str], +# timeframe: str = '1Min', +# attr_group: str = 'TICK', +# host: str = 'localhost', +# port: int = 5993, +# loglevel: str = None + +# ) -> None: +# ''' +# Open a symbol stream from a running instance of marketstore and +# log to console. + +# ''' +# tbks: dict[str, str] = { +# sym: f'{sym}/{timeframe}/{attr_group}' for sym in symbols} async def stream_quotes( symbols: list[str], - timeframe: str = '1Min', - attr_group: str = 'TICK', host: str = 'localhost', port: int = 5993, - loglevel: str = None + diff_cached: bool = True, + loglevel: str = None, ) -> None: ''' Open a symbol stream from a running instance of marketstore and log to console. + ''' + # XXX: required to propagate ``tractor`` loglevel to piker logging + get_console_log(loglevel or tractor.current_actor().loglevel) - tbks: dict[str, str] = { - sym: f'{sym}/{timeframe}/{attr_group}' for sym in symbols} + tbks: dict[str, str] = {sym: f"{sym}/*/*" for sym in symbols} + async with open_websocket_url(f'ws://{host}:{port}/ws') as ws: + # send subs topics to server + resp = await ws.send_message( + msgpack.dumps({'streams': list(tbks.values())}) + ) + log.info(resp) + async def recv() -> dict[str, Any]: + return msgpack.loads((await ws.get_message()), encoding='utf-8') -# async def stream_quotes( -# symbols: list[str], -# host: str = 'localhost', -# port: int = 5993, -# diff_cached: bool = True, -# loglevel: str = None, -# ) -> None: -# """Open a symbol stream from a running instance of marketstore and -# log to console. -# """ -# # XXX: required to propagate ``tractor`` loglevel to piker logging -# get_console_log(loglevel or tractor.current_actor().loglevel) -# -# tbks: dict[str, str] = {sym: f"{sym}/*/*" for sym in symbols} -# -# async with open_websocket_url(f'ws://{host}:{port}/ws') as ws: -# # send subs topics to server -# resp = await ws.send_message( -# msgpack.dumps({'streams': list(tbks.values())}) -# ) -# log.info(resp) -# -# async def recv() -> dict[str, Any]: -# return msgpack.loads((await ws.get_message()), encoding='utf-8') -# -# streams = (await recv())['streams'] -# log.info(f"Subscribed to {streams}") -# -# _cache = {} -# -# while True: -# msg = await recv() -# -# # unpack symbol and quote data -# # key is in format ``//`` -# symbol = msg['key'].split('/')[0] -# data = msg['data'] -# -# # calc time stamp(s) -# s, ns = data.pop('Epoch'), data.pop('Nanoseconds') -# ts = s * 10**9 + ns -# data['broker_fill_time_ns'] = ts -# -# quote = {} -# for k, v in data.items(): -# if isnan(v): -# continue -# -# quote[k.lower()] = v -# -# quote['symbol'] = symbol -# -# quotes = {} -# -# if diff_cached: -# last = _cache.setdefault(symbol, {}) -# new = set(quote.items()) - set(last.items()) -# if new: -# log.info(f"New quote {quote['symbol']}:\n{new}") -# -# # only ship diff updates and other required fields -# payload = {k: quote[k] for k, v in new} -# payload['symbol'] = symbol -# -# # if there was volume likely the last size of -# # shares traded is useful info and it's possible -# # that the set difference from above will disregard -# # a "size" value since the same # of shares were traded -# size = quote.get('size') -# volume = quote.get('volume') -# if size and volume: -# new_volume_since_last = max( -# volume - last.get('volume', 0), 0) -# log.warning( -# f"NEW VOLUME {symbol}:{new_volume_since_last}") -# payload['size'] = size -# payload['last'] = quote.get('last') -# -# # XXX: we append to a list for the options case where the -# # subscription topic (key) is the same for all -# # expiries even though this is uncessary for the -# # stock case (different topic [i.e. symbol] for each -# # quote). -# quotes.setdefault(symbol, []).append(payload) -# -# # update cache -# _cache[symbol].update(quote) -# else: -# quotes = { -# symbol: [{key.lower(): val for key, val in quote.items()}]} -# -# if quotes: -# yield quotes + streams = (await recv())['streams'] + log.info(f"Subscribed to {streams}") + + _cache = {} + + while True: + msg = await recv() + + # unpack symbol and quote data + # key is in format ``//`` + symbol = msg['key'].split('/')[0] + data = msg['data'] + + # calc time stamp(s) + s, ns = data.pop('Epoch'), data.pop('Nanoseconds') + ts = s * 10**9 + ns + data['broker_fill_time_ns'] = ts + + quote = {} + for k, v in data.items(): + if isnan(v): + continue + + quote[k.lower()] = v + + quote['symbol'] = symbol + + quotes = {} + + if diff_cached: + last = _cache.setdefault(symbol, {}) + new = set(quote.items()) - set(last.items()) + if new: + log.info(f"New quote {quote['symbol']}:\n{new}") + + # only ship diff updates and other required fields + payload = {k: quote[k] for k, v in new} + payload['symbol'] = symbol + + # if there was volume likely the last size of + # shares traded is useful info and it's possible + # that the set difference from above will disregard + # a "size" value since the same # of shares were traded + size = quote.get('size') + volume = quote.get('volume') + if size and volume: + new_volume_since_last = max( + volume - last.get('volume', 0), 0) + log.warning( + f"NEW VOLUME {symbol}:{new_volume_since_last}") + payload['size'] = size + payload['last'] = quote.get('last') + + # XXX: we append to a list for the options case where the + # subscription topic (key) is the same for all + # expiries even though this is uncessary for the + # stock case (different topic [i.e. symbol] for each + # quote). + quotes.setdefault(symbol, []).append(payload) + + # update cache + _cache[symbol].update(quote) + else: + quotes = { + symbol: [{key.lower(): val for key, val in quote.items()}]} + + if quotes: + yield quotes From 445b82283dc4abcd4ce1505abe2b03cb330eb13c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 18 Feb 2022 08:21:17 -0500 Subject: [PATCH 017/105] Add back in legacy write loop for reference --- piker/data/marketstore.py | 100 ++++++++++++++++---------------------- 1 file changed, 43 insertions(+), 57 deletions(-) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index 4298eb96..c1484706 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -59,15 +59,25 @@ _quote_dt = [ ('Epoch', 'i8'), ('Nanoseconds', 'i4'), - ('Tick', 'i4'), # do we need this? + ('Tick', 'i4'), # (-1, 0, 1) = (on bid, same, on ask) + # ('fill_time', 'f4'), ('Last', 'f4'), ('Bid', 'f4'), - ('Bsize', 'f4'), - ('Asize', 'f4'), + ('Bsize', 'i8'), + ('Asize', 'i8'), ('Ask', 'f4'), ('Size', 'i8'), - ('Volume', 'f4'), + ('Volume', 'i8'), + # ('brokerd_ts', 'i64'), + # ('VWAP', 'f4') ] +_quote_tmp = {}.fromkeys(dict(_quote_dt).keys(), np.nan) +_tick_map = { + 'Up': 1, + 'Equal': 0, + 'Down': -1, + None: np.nan, +} def mk_tbk(keys: tuple[str, str, str]) -> str: @@ -91,8 +101,7 @@ def quote_to_marketstore_structarray( ''' if last_fill: # new fill bby - now = timestamp(last_fill, unit='s') - + now = timestamp(last_fill) else: # this should get inserted upstream by the broker-client to # subtract from IPC latency @@ -126,8 +135,8 @@ def timestamp(date, **kwargs) -> int: ''' Return marketstore compatible 'Epoch' integer in nanoseconds from a date formatted str. - ''' + ''' return int(pd.Timestamp(date, **kwargs).value) @@ -162,21 +171,6 @@ async def ingest_quote_stream( for tick in quote.get('ticks', ()): ticktype = tick.get('type', 'n/a') - # _quote_dt = [ - # # these two are required for as a "primary key" - # ('Epoch', 'i8'), - # ('Nanoseconds', 'i4'), - # ('Tick', 'i4'), - # - # ('Last', 'f4'), - # ('Bid', 'f4'), - # ('Bsize', 'f4'), - # ('Asize', 'f4'), - # ('Ask', 'f4'), - # ('Size', 'i8'), - # ('Volume', 'f4'), - # ] - # techtonic tick write array = quote_to_marketstore_structarray({ 'IsTrade': 1 if ticktype == 'trade' else 0, @@ -187,46 +181,38 @@ async def ingest_quote_stream( await ms_client.write(array, _tick_tbk) - quote_cache = { - 'size': 0, - 'tick': 0 - } - # start ingest to marketstore - async for quotes in feed.stream: - log.info(quotes) - for symbol, quote in quotes.items(): + # LEGACY WRITE LOOP (using old tick dt) + # quote_cache = { + # 'size': 0, + # 'tick': 0 + # } - for tick in quote.get('ticks', ()): - ticktype = tick.get('type') - price = tick.get('price') - size = tick.get('size') + # async for quotes in qstream: + # log.info(quotes) + # for symbol, quote in quotes.items(): - if ticktype == 'n/a' or price == -1: - # okkk.. - continue + # # remap tick strs to ints + # quote['tick'] = _tick_map[quote.get('tick', 'Equal')] - # clearing price event - if ticktype == 'trade': - quote_cache['volume'] = quote['volume'] - quote_cache['last'] = price - # quote_cache['broker_ts'] = quote['broker_ts'] + # # check for volume update (i.e. did trades happen + # # since last quote) + # new_vol = quote.get('volume', None) + # if new_vol is None: + # log.debug(f"No fills for {symbol}") + # if new_vol == quote_cache.get('volume'): + # # should never happen due to field diffing + # # on sender side + # log.error( + # f"{symbol}: got same volume as last quote?") - # l1 book events - elif ticktype in ('ask', 'asize'): - quote_cache['ask'] = price - quote_cache['asize'] = size + # quote_cache.update(quote) - elif ticktype in ('bid', 'bsize'): - quote_cache['bid'] = price - quote_cache['bsize'] = size - - a = quote_to_marketstore_structarray( - quote_cache, - last_fill=quote.get('broker_ts', None) - ) - log.info(a) - # breakpoint() - await ms_client.write(symbol, a) + # a = quote_to_marketstore_structarray( + # quote, + # # TODO: check this closer to the broker query api + # last_fill=quote.get('fill_time', '') + # ) + # await ms_client.write(symbol, a) # async def stream_quotes( From 4bcc301c012a5cbb70e42cd7cb36b3c8deb1c2bc Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 18 Feb 2022 12:13:38 -0500 Subject: [PATCH 018/105] Better handle nested erros from docker client --- piker/data/_ahab.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/piker/data/_ahab.py b/piker/data/_ahab.py index baa396ab..ed81fed0 100644 --- a/piker/data/_ahab.py +++ b/piker/data/_ahab.py @@ -32,7 +32,7 @@ import tractor import docker import json from docker.models.containers import Container -from docker.errors import DockerException +from docker.errors import DockerException, APIError from requests.exceptions import ConnectionError from ..log import get_logger # , get_console_log @@ -102,10 +102,23 @@ async def open_docker( # prolly no daemon started raise DockerNotStarted('!?!?') - except DockerException as err: + except ( + DockerException, + APIError, + ) as err: + + def unpack_msg(err: Exception) -> str: + args = getattr(err, 'args', None) + if args: + return args + # could be more specific so let's check if it's just perms. - if 'PermissionError' in err.args[0]: - raise DockerException('You dint run as root yo!') + if err.args: + errs = err.args + for err in errs: + msg = unpack_msg(err) + if msg and 'PermissionError' in msg: + raise DockerException('You dint run as root yo!') # not perms? raise @@ -233,7 +246,13 @@ async def start_ahab( loglevel='runtime', ) as tn: - portal = await tn.start_actor('ahab', enable_modules=[__name__]) + portal = await tn.start_actor( + 'marketstored', + enable_modules=[__name__] + ) + + # de-escalate root perms to the original user + # after the docker supervisor actor is spawned. if config._parent_user: import pwd os.setuid( From 56fa75945283d81dc7152c4bfa8680e973b99f10 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 18 Feb 2022 12:17:12 -0500 Subject: [PATCH 019/105] Add WIP backfiller from data feed helper --- piker/data/marketstore.py | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index c1484706..9419a6b5 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -36,7 +36,7 @@ from trio_websocket import open_websocket_url from anyio_marketstore import open_marketstore_client, MarketstoreClient from ..log import get_logger, get_console_log -from ..data import open_feed +from ..data.feed import maybe_open_feed log = get_logger(__name__) @@ -71,6 +71,7 @@ _quote_dt = [ # ('brokerd_ts', 'i64'), # ('VWAP', 'f4') ] + _quote_tmp = {}.fromkeys(dict(_quote_dt).keys(), np.nan) _tick_map = { 'Up': 1, @@ -79,6 +80,19 @@ _tick_map = { None: np.nan, } +_ohlcv_dt = [ + # these two are required for as a "primary key" + ('Epoch', 'i8'), + ('Nanoseconds', 'i4'), + + # ohlcv sampling + ('Open', 'f4'), + ('High', 'f4'), + ('Low', 'i8'), + ('Close', 'i8'), + ('Volume', 'f4'), +] + def mk_tbk(keys: tuple[str, str, str]) -> str: ''' @@ -143,13 +157,32 @@ def timestamp(date, **kwargs) -> int: @asynccontextmanager async def get_client( host: str = 'localhost', - port: int = 5995 + port: int = 5993 ) -> MarketstoreClient: async with open_marketstore_client(host, port) as client: yield client +async def backfill_history(): + + async with ( + get_client() as msclient, + maybe_open_feed( + 'ib', + ['mnq.globex'], + loglevel='info', + # backpressure=False, + start_stream=False, + ) as (feed, stream), + ): + await tractor.breakpoint() + await msclient.write( + feed.shm.array, + tbk='mnq.globex.ib/1Sec/OHLCV', + ) + + async def ingest_quote_stream( symbols: list[str], brokername: str, @@ -162,7 +195,7 @@ async def ingest_quote_stream( ''' async with ( - open_feed(brokername, symbols, loglevel=loglevel) as feed, + maybe_open_feed(brokername, symbols, loglevel=loglevel) as feed, get_client() as ms_client, ): async for quotes in feed.stream: From 2c51ad2a0da7302d6c8d0104aef236ac33412e84 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 18 Feb 2022 12:17:41 -0500 Subject: [PATCH 020/105] Revive `ms-shell` sub-cmd --- piker/data/cli.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/piker/data/cli.py b/piker/data/cli.py index 3839fe65..5b9c854d 100644 --- a/piker/data/cli.py +++ b/piker/data/cli.py @@ -111,20 +111,27 @@ def ms_stream(config: dict, names: List[str], url: str): ) @click.option( '--port', - default=5995 + default=5993 ) @click.pass_obj def ms_shell(config, tl, host, port): - """Start an IPython shell ready to query the local marketstore db. - """ - async def main(): - async with open_marketstore_client(host, port) as client: - query = client.query # noqa - # TODO: write magics to query marketstore - from IPython import embed - embed() + ''' + Start an IPython shell ready to query the local marketstore db. - tractor.run(main) + ''' + from piker.data.marketstore import backfill_history + from piker._daemon import open_piker_runtime + async def main(): + async with open_piker_runtime( + 'ms_shell', + enable_modules=['piker.data._ahab'], + ): + await backfill_history() + # TODO: write magics to query marketstore + # from IPython import embed + # embed() + + trio.run(main) @cli.command() From d9773217e9e17fed06cebb27249ebe84e5e339bc Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 19 Feb 2022 16:31:31 -0500 Subject: [PATCH 021/105] Map the grpc port and add graceful container teardown Not sure how I missed mapping the 5995 grpc port :facepalm:; done now. Also adds graceful teardown using SIGINT with included container logging relayed to the piker console B). --- piker/data/_ahab.py | 89 +++++++++++++++++++++++++++++++++------------ 1 file changed, 66 insertions(+), 23 deletions(-) diff --git a/piker/data/_ahab.py b/piker/data/_ahab.py index ed81fed0..9ec805a8 100644 --- a/piker/data/_ahab.py +++ b/piker/data/_ahab.py @@ -33,9 +33,9 @@ import docker import json from docker.models.containers import Container from docker.errors import DockerException, APIError -from requests.exceptions import ConnectionError +from requests.exceptions import ConnectionError, ReadTimeout -from ..log import get_logger # , get_console_log +from ..log import get_logger, get_console_log from .. import config log = get_logger(__name__) @@ -52,7 +52,7 @@ _config = ''' root_directory: data listen_port: 5993 grpc_listen_port: 5995 -log_level: info +log_level: debug queryable: true stop_grace_period: 0 wal_rotate_interval: 5 @@ -98,9 +98,6 @@ async def open_docker( **kwargs ) if url else docker.from_env(**kwargs) yield client - except ConnectionError: - # prolly no daemon started - raise DockerNotStarted('!?!?') except ( DockerException, @@ -111,15 +108,20 @@ async def open_docker( args = getattr(err, 'args', None) if args: return args + else: + return str(err) # could be more specific so let's check if it's just perms. if err.args: errs = err.args for err in errs: msg = unpack_msg(err) - if msg and 'PermissionError' in msg: + if 'PermissionError' in msg: raise DockerException('You dint run as root yo!') + elif 'FileNotFoundError' in msg: + raise DockerNotStarted('Did you start da service sister?') + # not perms? raise @@ -181,7 +183,7 @@ async def open_marketstore( 5993:5993 alpacamarkets/marketstore:latest ''' - # log = get_console_log('info', name=__name__) + log = get_console_log('info', name=__name__) async with open_docker() as client: # create a mount from user's local piker config dir into container @@ -197,46 +199,87 @@ async def open_marketstore( # '-i', # '-p 5993:5993', - ports={'5993/tcp': 5993}, + ports={ + '5993/tcp': 5993, # jsonrpc + '5995/tcp': 5995, # grpc + }, mounts=[config_dir_mnt], detach=True, - # stop_signal='SIGINT', - # init=True, + stop_signal='SIGINT', + init=True, # remove=True, ) try: - started: bool = False - logs = cntr.logs(stream=True) + seen_so_far = set() - with trio.move_on_after(0.5): + async def process_logs_until( + match: str, + bp_on_msg: bool = False, + ): + logs = cntr.logs(stream=True) for entry in logs: entry = entry.decode() + try: record = json.loads(entry.strip()) except json.JSONDecodeError: if 'Error' in entry: raise RuntimeError(entry) + msg = record['msg'] + if msg and entry not in seen_so_far: + seen_so_far.add(entry) + if bp_on_msg: + await tractor.breakpoint() + log.info(f'{msg}') - if "launching tcp listener for all services..." in msg: - started = True - break + # if "launching tcp listener for all services..." in msg: + if match in msg: + return True + # do a checkpoint so we don't block if cancelled B) await trio.sleep(0) - if not started and cntr not in client.containers.list(): - raise RuntimeError( - 'Failed to start `marketstore` check logs output for deats' + return False + + with trio.move_on_after(0.5): + found = await process_logs_until( + "launching tcp listener for all services...", ) + if not found and cntr not in client.containers.list(): + raise RuntimeError( + 'Failed to start `marketstore` check logs deats' + ) + await ctx.started(cntr.id) - await trio.sleep_forever() + await process_logs_until('exiting...',) + + except ( + trio.Cancelled, + KeyboardInterrupt, + ): + cntr.kill('SIGINT') + with trio.move_on_after(0.5) as cs: + cs.shield = True + await process_logs_until('exiting...',) + raise finally: - cntr.stop() + try: + cntr.wait( + timeout=0.5, + condition='not-running', + ) + except ( + ReadTimeout, + ConnectionError, + ): + cntr.kill() async def start_ahab( + service_name: str, task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED, ) -> None: @@ -247,7 +290,7 @@ async def start_ahab( ) as tn: portal = await tn.start_actor( - 'marketstored', + service_name, enable_modules=[__name__] ) From ba82a188900703177f1084dae05aaeb1007e0b28 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 19 Feb 2022 16:34:36 -0500 Subject: [PATCH 022/105] Pass in daemon name to `start_ahab()` --- piker/cli/__init__.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/piker/cli/__init__.py b/piker/cli/__init__.py index 634c6175..a1931b8f 100644 --- a/piker/cli/__init__.py +++ b/piker/cli/__init__.py @@ -54,13 +54,19 @@ def pikerd(loglevel, host, tl, pdb, tsdb): trio.open_nursery() as n, ): if tsdb: + # TODO: + # async with maybe_open_marketstored(): + from piker.data._ahab import start_ahab log.info('Spawning `marketstore` supervisor') - ctn_ready = await n.start(start_ahab) + ctn_ready = await n.start( + start_ahab, + 'marketstored', + ) await ctn_ready.wait() log.info('`marketstore` container:{uid} up') - await trio.sleep_forever() + await trio.sleep_forever() trio.run(main) From e1bbcff8e037032de7f46a6fc3ebb052114d57e6 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 19 Feb 2022 16:36:02 -0500 Subject: [PATCH 023/105] Get basic OHLCV writes working with `anyio` client --- piker/data/marketstore.py | 93 +++++++++++++++++++++++++++++++++------ 1 file changed, 80 insertions(+), 13 deletions(-) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index 9419a6b5..05ef549a 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -21,10 +21,15 @@ - ticK data ingest routines - websocket client for subscribing to write triggers - todo: tick sequence stream-cloning for testing -- todo: docker container management automation + ''' from contextlib import asynccontextmanager -from typing import Any, Optional +from typing import ( + Any, + Optional, + # Callable, + TYPE_CHECKING, +) import time from math import isnan @@ -36,7 +41,12 @@ from trio_websocket import open_websocket_url from anyio_marketstore import open_marketstore_client, MarketstoreClient from ..log import get_logger, get_console_log -from ..data.feed import maybe_open_feed +from .feed import maybe_open_feed +from ._source import mk_fqsn, Symbol + + +# if TYPE_CHECKING: +# from ._sharedmem import ShmArray log = get_logger(__name__) @@ -83,7 +93,7 @@ _tick_map = { _ohlcv_dt = [ # these two are required for as a "primary key" ('Epoch', 'i8'), - ('Nanoseconds', 'i4'), + # ('Nanoseconds', 'i4'), # ohlcv sampling ('Open', 'f4'), @@ -157,30 +167,87 @@ def timestamp(date, **kwargs) -> int: @asynccontextmanager async def get_client( host: str = 'localhost', - port: int = 5993 + port: int = 5995 ) -> MarketstoreClient: - async with open_marketstore_client(host, port) as client: + async with open_marketstore_client( + host, + port + ) as client: yield client -async def backfill_history(): +# class MarketStoreError(Exception): +# "Generic marketstore client error" + + +# def err_on_resp(response: dict) -> None: +# """Raise any errors found in responses from client request. +# """ +# responses = response['responses'] +# if responses is not None: +# for r in responses: +# err = r['error'] +# if err: +# raise MarketStoreError(err) + + +async def backfill_history( + # symbol: Symbol + +) -> list[str]: + # TODO: + # - compute time-smaple step + # - take ``Symbol`` as input + # - backtrack into history using backend helper endpoint + + # broker = 'ib' + # symbol = 'mnq.globex' + + broker = 'binance' + symbol = 'btcusdt' + + fqsn = mk_fqsn(broker, symbol) async with ( get_client() as msclient, maybe_open_feed( - 'ib', - ['mnq.globex'], + broker, + [symbol], loglevel='info', # backpressure=False, start_stream=False, ) as (feed, stream), ): - await tractor.breakpoint() - await msclient.write( - feed.shm.array, - tbk='mnq.globex.ib/1Sec/OHLCV', + ohlcv = feed.shm.array + mkts_dt = np.dtype(_ohlcv_dt) + + syms = await msclient.list_symbols() + log.info(f'Existing symbol set:\n{pformat(syms)}') + + # build mkts schema compat array + mkts_array = np.zeros( + len(ohlcv), + dtype=mkts_dt, ) + # copy from shm array + mkts_array[:] = ohlcv[[ + 'time', + 'open', + 'high', + 'low', + 'close', + 'volume', + ]] + + # write to db + resp = await msclient.write( + mkts_array, + tbk=f'{fqsn}/1Min/OHLCV', + isvariablelength=True, + ) + + # TODO: backfiller loop async def ingest_quote_stream( From 8c558d05d678c301741b09675b362af6717f4e4a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 20 Feb 2022 15:24:03 -0500 Subject: [PATCH 024/105] Persist backing `/data/` filesystem across container runs --- piker/data/_ahab.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/piker/data/_ahab.py b/piker/data/_ahab.py index 9ec805a8..59bb4d55 100644 --- a/piker/data/_ahab.py +++ b/piker/data/_ahab.py @@ -186,6 +186,7 @@ async def open_marketstore( log = get_console_log('info', name=__name__) async with open_docker() as client: + # create a mount from user's local piker config dir into container config_dir_mnt = docker.types.Mount( target='/etc', @@ -193,6 +194,20 @@ async def open_marketstore( type='bind', ) + # create a user config subdir where the marketstore + # backing filesystem database can be persisted. + persistent_data_dir = os.path.join( + config._config_dir, 'data', + ) + if not os.path.isdir(persistent_data_dir): + os.mkdir(persistent_data_dir) + + data_dir_mnt = docker.types.Mount( + target='/data', + source=persistent_data_dir, + type='bind', + ) + cntr: Container = client.containers.run( 'alpacamarkets/marketstore:latest', # do we need this for cmds? @@ -203,7 +218,7 @@ async def open_marketstore( '5993/tcp': 5993, # jsonrpc '5995/tcp': 5995, # grpc }, - mounts=[config_dir_mnt], + mounts=[config_dir_mnt, data_dir_mnt], detach=True, stop_signal='SIGINT', init=True, From a2fe814857aae48d89c62b358b60807d1f966da8 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 22 Feb 2022 15:20:33 -0500 Subject: [PATCH 025/105] Better doc string --- piker/data/_ahab.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/piker/data/_ahab.py b/piker/data/_ahab.py index 59bb4d55..651013f4 100644 --- a/piker/data/_ahab.py +++ b/piker/data/_ahab.py @@ -298,7 +298,16 @@ async def start_ahab( task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED, ) -> None: + ''' + Start a ``docker`` container supervisor with given service name. + Currently the actor calling this task should normally be started + with root permissions (until we decide to use something that doesn't + require this, like docker's rootless mode or some wrapper project) but + te root perms are de-escalated after the docker supervisor sub-actor + is started. + + ''' cn_ready = trio.Event() async with tractor.open_nursery( loglevel='runtime', From 4555a1f279085a81f71d228b1274ff3f1691f015 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 22 Feb 2022 15:21:41 -0500 Subject: [PATCH 026/105] Prototype out writing `1Sec` OHLCV data --- piker/data/marketstore.py | 113 ++++++++++++++++++++++++++++---------- 1 file changed, 83 insertions(+), 30 deletions(-) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index 05ef549a..457f37d7 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -23,7 +23,8 @@ - todo: tick sequence stream-cloning for testing ''' -from contextlib import asynccontextmanager +from contextlib import asynccontextmanager as acm +from pprint import pformat from typing import ( Any, Optional, @@ -33,12 +34,13 @@ from typing import ( import time from math import isnan +from bidict import bidict import msgpack import numpy as np import pandas as pd import tractor from trio_websocket import open_websocket_url -from anyio_marketstore import open_marketstore_client, MarketstoreClient +from anyio_marketstore import open_marketstore_client, MarketstoreClient, Params from ..log import get_logger, get_console_log from .feed import maybe_open_feed @@ -164,12 +166,17 @@ def timestamp(date, **kwargs) -> int: return int(pd.Timestamp(date, **kwargs).value) -@asynccontextmanager +@acm async def get_client( host: str = 'localhost', port: int = 5995 ) -> MarketstoreClient: + ''' + Load a ``anyio_marketstore`` grpc client connected + to an existing ``marketstore`` server. + + ''' async with open_marketstore_client( host, port @@ -192,6 +199,41 @@ async def get_client( # raise MarketStoreError(err) +tf_in_1s = bidict({ + 1: '1Sec', + 60: '1Min', + 60*5: '5Min', + 60*15: '15Min', + 60*30: '30Min', + 60*60: '1H', + 60*60*24: '1D', +}) + + +# @acm +async def load_history( + symbol: Symbol, + period: int = 1, # in seconds + +) -> np.ndarray: + + async with get_client() as client: + + tfstr = tf_in_1s[period] + result = await client.query( + Params(fqsn, tf_in_1s, 'OHLCV',) + ) + # Dig out `numpy` results map + arrays = {} + await tractor.breakpoint() + # for qr in [onem, fivem]: + # for name, data_set in qr.by_symbols().items(): + # arrays[(name, qr)] = data_set.array + + # # TODO: backfiller loop + # array = arrays[(fqsn, qr)] + + async def backfill_history( # symbol: Symbol @@ -201,28 +243,32 @@ async def backfill_history( # - take ``Symbol`` as input # - backtrack into history using backend helper endpoint - # broker = 'ib' - # symbol = 'mnq.globex' + broker = 'ib' + symbol = 'mnq.globex' - broker = 'binance' - symbol = 'btcusdt' + # broker = 'binance' + # symbol = 'btcusdt' fqsn = mk_fqsn(broker, symbol) + print('yo') async with ( - get_client() as msclient, + get_client() as client, maybe_open_feed( broker, [symbol], loglevel='info', # backpressure=False, start_stream=False, + ) as (feed, stream), ): + print('yo') ohlcv = feed.shm.array mkts_dt = np.dtype(_ohlcv_dt) - syms = await msclient.list_symbols() + print('yo') + syms = await client.list_symbols() log.info(f'Existing symbol set:\n{pformat(syms)}') # build mkts schema compat array @@ -240,14 +286,39 @@ async def backfill_history( 'volume', ]] + key = (fqsn, '1Sec', 'OHLCV') + tbk = mk_tbk(key) + + # diff vs. existing array and append new history + # TODO: + # write to db - resp = await msclient.write( + resp = await client.write( mkts_array, - tbk=f'{fqsn}/1Min/OHLCV', - isvariablelength=True, + tbk=tbk, + # NOTE: will will append duplicates + # for the same timestamp-index. + # isvariablelength=True, ) + # TODO: should be no error? + # assert not resp.responses + + # # Dig out `numpy` results map + qr = await client.query( + Params(fqsn, '1Min`', 'OHLCV',) + ) + qr = await client.query( + # Params(fqsn, '1Sec`', 'OHLCV',) + Params(*key), + ) + arrays = {} + # for qr in [onem, fivem]: + for name, data_set in qr.by_symbols().items(): + arrays[(name, qr)] = data_set.array # TODO: backfiller loop + array = arrays[(fqsn, qr)] + await tractor.breakpoint() async def ingest_quote_stream( @@ -315,24 +386,6 @@ async def ingest_quote_stream( # await ms_client.write(symbol, a) -# async def stream_quotes( -# symbols: list[str], -# timeframe: str = '1Min', -# attr_group: str = 'TICK', -# host: str = 'localhost', -# port: int = 5993, -# loglevel: str = None - -# ) -> None: -# ''' -# Open a symbol stream from a running instance of marketstore and -# log to console. - -# ''' -# tbks: dict[str, str] = { -# sym: f'{sym}/{timeframe}/{attr_group}' for sym in symbols} - - async def stream_quotes( symbols: list[str], host: str = 'localhost', From 3dba456cf83eff727a88c90f35d6cf7c218eed61 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 28 Feb 2022 07:41:37 -0500 Subject: [PATCH 027/105] Add latency measures around diffs/writes to mkts --- piker/data/cli.py | 4 +- piker/data/marketstore.py | 108 +++++++++++++++++++++++--------------- 2 files changed, 67 insertions(+), 45 deletions(-) diff --git a/piker/data/cli.py b/piker/data/cli.py index 5b9c854d..6ea2503d 100644 --- a/piker/data/cli.py +++ b/piker/data/cli.py @@ -119,14 +119,14 @@ def ms_shell(config, tl, host, port): Start an IPython shell ready to query the local marketstore db. ''' - from piker.data.marketstore import backfill_history + from piker.data.marketstore import backfill_history_diff from piker._daemon import open_piker_runtime async def main(): async with open_piker_runtime( 'ms_shell', enable_modules=['piker.data._ahab'], ): - await backfill_history() + await backfill_history_diff() # TODO: write magics to query marketstore # from IPython import embed # embed() diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index 457f37d7..cdcaeb02 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -113,7 +113,7 @@ def mk_tbk(keys: tuple[str, str, str]) -> str: ``('SPY', '1Sec', 'TICK')`` -> ``"SPY/1Sec/TICK"``` ''' - return '{}/' + '/'.join(keys) + return '/'.join(keys) def quote_to_marketstore_structarray( @@ -184,8 +184,8 @@ async def get_client( yield client -# class MarketStoreError(Exception): -# "Generic marketstore client error" +class MarketStoreError(Exception): + "Generic marketstore client error" # def err_on_resp(response: dict) -> None: @@ -210,13 +210,16 @@ tf_in_1s = bidict({ }) -# @acm -async def load_history( - symbol: Symbol, +async def manage_history( + fqsn: str, period: int = 1, # in seconds -) -> np.ndarray: +) -> dict[str, np.ndarray]: + ''' + Load a series by key and deliver in ``numpy`` struct array + format. + ''' async with get_client() as client: tfstr = tf_in_1s[period] @@ -225,16 +228,17 @@ async def load_history( ) # Dig out `numpy` results map arrays = {} - await tractor.breakpoint() # for qr in [onem, fivem]: - # for name, data_set in qr.by_symbols().items(): - # arrays[(name, qr)] = data_set.array + for name, data_set in result.by_symbols().items(): + arrays[(name, qr)] = data_set.array + await tractor.breakpoint() # # TODO: backfiller loop # array = arrays[(fqsn, qr)] + return arrays -async def backfill_history( +async def backfill_history_diff( # symbol: Symbol ) -> list[str]: @@ -251,7 +255,6 @@ async def backfill_history( fqsn = mk_fqsn(broker, symbol) - print('yo') async with ( get_client() as client, maybe_open_feed( @@ -263,21 +266,52 @@ async def backfill_history( ) as (feed, stream), ): - print('yo') - ohlcv = feed.shm.array - mkts_dt = np.dtype(_ohlcv_dt) - - print('yo') syms = await client.list_symbols() log.info(f'Existing symbol set:\n{pformat(syms)}') - # build mkts schema compat array + # diff db history with shm and only write the missing portions + ohlcv = feed.shm.array + + key = (fqsn, '1Sec', 'OHLCV') + tbk = mk_tbk(key) + + # diff vs. existing array and append new history + # TODO: + + # TODO: should be no error? + # assert not resp.responses + + start = time.time() + + qr = await client.query( + # Params(fqsn, '1Sec`', 'OHLCV',) + Params(*key), + ) + # # Dig out `numpy` results map + arrays: dict[tuple[str, int], np.ndarray] = {} + for name, data_set in qr.by_symbols().items(): + in_secs = tf_in_1s.inverse[data_set.timeframe] + arrays[(name, in_secs)] = data_set.array + + s1 = arrays[(fqsn, 1)] + to_append = ohlcv[ohlcv['time'] > s1['Epoch'][-1]] + + end_diff = time.time() + diff_ms = round((end_diff - start) * 1e3, ndigits=2) + + log.info( + f'Appending {to_append.size} datums to tsdb from shm\n' + f'Total diff time: {diff_ms} ms' + ) + + # build mkts schema compat array for writing + mkts_dt = np.dtype(_ohlcv_dt) mkts_array = np.zeros( - len(ohlcv), + len(to_append), dtype=mkts_dt, ) # copy from shm array - mkts_array[:] = ohlcv[[ + mkts_array[:] = to_append[[ 'time', 'open', 'high', @@ -286,39 +320,27 @@ async def backfill_history( 'volume', ]] - key = (fqsn, '1Sec', 'OHLCV') - tbk = mk_tbk(key) - - # diff vs. existing array and append new history - # TODO: - # write to db resp = await client.write( mkts_array, tbk=tbk, # NOTE: will will append duplicates # for the same timestamp-index. - # isvariablelength=True, + isvariablelength=True, ) - # TODO: should be no error? - # assert not resp.responses - - # # Dig out `numpy` results map - qr = await client.query( - Params(fqsn, '1Min`', 'OHLCV',) + end_write = time.time() + diff_ms = round((end_write - end_diff) * 1e3, ndigits=2) + log.info( + f'Wrote {to_append.size} datums to tsdb\n' + f'Total write time: {diff_ms} ms' ) - qr = await client.query( - # Params(fqsn, '1Sec`', 'OHLCV',) - Params(*key), - ) - arrays = {} - # for qr in [onem, fivem]: - for name, data_set in qr.by_symbols().items(): - arrays[(name, qr)] = data_set.array + for resp in resp.responses: + err = resp.error + if err: + raise MarketStoreError(err) # TODO: backfiller loop - array = arrays[(fqsn, qr)] - await tractor.breakpoint() + # await tractor.breakpoint() async def ingest_quote_stream( From cbe74d126e37dab00e94f095615d87139cb5942d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 28 Feb 2022 12:15:10 -0500 Subject: [PATCH 028/105] Doc str formatting --- piker/data/_source.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/piker/data/_source.py b/piker/data/_source.py index 3fa6db7b..12cad24f 100644 --- a/piker/data/_source.py +++ b/piker/data/_source.py @@ -127,11 +127,11 @@ def unpack_fqsn(fqsn: str) -> tuple[str, str, str]: class Symbol(BaseModel): - """I guess this is some kinda container thing for dealing with + ''' + I guess this is some kinda container thing for dealing with all the different meta-data formats from brokers? - Yah, i guess dats what it izz. - """ + ''' key: str tick_size: float = 0.01 lot_tick_size: float = 0.0 # "volume" precision as min step value From 706c8085f2905ca5f303d0c5f929e7beed30a2f6 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 1 Mar 2022 12:29:49 -0500 Subject: [PATCH 029/105] Prototype a high level `Storage` api Starts a wrapper around the `marketstore` client to do basic ohlcv query and retrieval and prototypes out write methods for ohlc and tick. Try to connect to `marketstore` automatically (which will fail if not started currently) but we will eventually first do a service query. Further: - get `pikerd` working with and without `--tsdb` flag. - support spawning `brokerd` with no real-time quotes. - bring back in "fqsn" support that was originally not in this history before commits factoring. --- piker/data/feed.py | 84 +++++++++++++++++---- piker/data/marketstore.py | 152 +++++++++++++++++++++++++++++++------- 2 files changed, 194 insertions(+), 42 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index 19504204..35d006de 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -22,6 +22,7 @@ This module is enabled for ``brokerd`` daemons. """ from __future__ import annotations from dataclasses import dataclass, field +from datetime import datetime from contextlib import asynccontextmanager from functools import partial from types import ModuleType @@ -42,11 +43,13 @@ from .._cacheables import maybe_open_context from ..log import get_logger, get_console_log from .._daemon import ( maybe_spawn_brokerd, + check_for_service, ) from ._sharedmem import ( maybe_open_shm_array, attach_shm_array, ShmArray, + _secs_in_day, ) from .ingest import get_ingestormod from ._source import ( @@ -125,7 +128,7 @@ class _FeedsBus(BaseModel): # def cancel_task( # self, - # task: trio.lowlevel.Task + # task: trio.lowlevel.Task, # ) -> bool: # ... @@ -218,7 +221,61 @@ async def manage_history( readonly=False, ) - if opened: + log.info('Scanning for existing `marketstored`') + is_up = await check_for_service('marketstored') + if is_up and opened: + log.info('Found existing `marketstored`') + from . import marketstore + + async with marketstore.open_storage_client( + fqsn, + ) as (storage, tsdb_arrays): + + # TODO: history validation + # assert opened, f'Persistent shm for {symbol} was already open?!' + # if not opened: + # raise RuntimeError( + # "Persistent shm for sym was already open?!" + # ) + + if tsdb_arrays: + log.info(f'Loaded tsdb history {tsdb_arrays}') + fastest = list(tsdb_arrays[fqsn].values())[0] + last_s = fastest['Epoch'][-1] + + # TODO: see if there's faster multi-field reads: + # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields + + # re-index with a `time` and index field + shm.push( + fastest[-3 * _secs_in_day:], + + # insert the history pre a "days worth" of samples + # to leave some real-time buffer space at the end. + prepend=True, + start=shm._len - _secs_in_day, + field_map={ + 'Epoch': 'time', + 'Open': 'open', + 'High': 'high', + 'Low': 'low', + 'Close': 'close', + 'Volume': 'volume', + }, + ) + + # start history anal and load missing new data via backend. + async with mod.open_history_client(fqsn) as hist: + + # get latest query's worth of history + array, next_dt = await hist(end_dt='') + + last_dt = datetime.fromtimestamp(last_s) + array, next_dt = await hist(end_dt=last_dt) + + some_data_ready.set() + + elif opened: log.info('No existing `marketstored` found..') # start history backfill task ``backfill_bars()`` is @@ -254,6 +311,7 @@ async def manage_history( ) await trio.sleep_forever() + # cs.cancel() async def allocate_persistent_feed( @@ -261,6 +319,7 @@ async def allocate_persistent_feed( brokername: str, symbol: str, loglevel: str, + start_stream: bool = True, task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED, @@ -302,10 +361,8 @@ async def allocate_persistent_feed( loglevel=loglevel, ) ) - # the broker-specific fully qualified symbol name, - # but ensure it is lower-cased for external use. - bfqsn = init_msg[symbol]['fqsn'].lower() - init_msg[symbol]['fqsn'] = bfqsn + # the broker-specific fully qualified symbol name + bfqsn = init_msg[symbol]['fqsn'] # HISTORY, run 2 tasks: # - a history loader / maintainer @@ -333,6 +390,7 @@ async def allocate_persistent_feed( # true fqsn fqsn = '.'.join((bfqsn, brokername)) + # add a fqsn entry that includes the ``.`` suffix init_msg[fqsn] = msg @@ -364,6 +422,9 @@ async def allocate_persistent_feed( # task_status.started((init_msg, generic_first_quotes)) task_status.started() + if not start_stream: + await trio.sleep_forever() + # backend will indicate when real-time quotes have begun. await feed_is_live.wait() @@ -429,13 +490,12 @@ async def open_feed_bus( bus=bus, brokername=brokername, - # here we pass through the selected symbol in native # "format" (i.e. upper vs. lowercase depending on # provider). symbol=symbol, - loglevel=loglevel, + start_stream=start_stream, ) ) # TODO: we can remove this? @@ -446,7 +506,7 @@ async def open_feed_bus( init_msg, first_quotes = bus.feeds[symbol] msg = init_msg[symbol] - bfqsn = msg['fqsn'].lower() + bfqsn = msg['fqsn'] # true fqsn fqsn = '.'.join([bfqsn, brokername]) @@ -765,10 +825,7 @@ async def maybe_open_feed( **kwargs, -) -> ( - Feed, - ReceiveChannel[dict[str, Any]], -): +) -> (Feed, ReceiveChannel[dict[str, Any]]): ''' Maybe open a data to a ``brokerd`` daemon only if there is no local one for the broker-symbol pair, if one is cached use it wrapped @@ -789,7 +846,6 @@ async def maybe_open_feed( 'start_stream': kwargs.get('start_stream', True), }, key=fqsn, - ) as (cache_hit, feed): if cache_hit: diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index cdcaeb02..ff4a52eb 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -28,8 +28,9 @@ from pprint import pformat from typing import ( Any, Optional, + Union, # Callable, - TYPE_CHECKING, + # TYPE_CHECKING, ) import time from math import isnan @@ -40,12 +41,19 @@ import numpy as np import pandas as pd import tractor from trio_websocket import open_websocket_url -from anyio_marketstore import open_marketstore_client, MarketstoreClient, Params +from anyio_marketstore import ( + open_marketstore_client, + MarketstoreClient, + Params, +) +import purerpc -from ..log import get_logger, get_console_log from .feed import maybe_open_feed -from ._source import mk_fqsn, Symbol - +from ._source import ( + mk_fqsn, + # Symbol, +) +from ..log import get_logger, get_console_log # if TYPE_CHECKING: # from ._sharedmem import ShmArray @@ -210,42 +218,130 @@ tf_in_1s = bidict({ }) -async def manage_history( - fqsn: str, - period: int = 1, # in seconds - -) -> dict[str, np.ndarray]: +class Storage: ''' - Load a series by key and deliver in ``numpy`` struct array - format. + High level storage api for both real-time and historical ingest. + + + ''' + def __init__( + self, + client: MarketstoreClient, + + ) -> None: + # TODO: eventually this should be an api/interface type that + # ensures we can support multiple tsdb backends. + self.client = client + + # series' cache from tsdb reads + self._arrays: dict[str, np.ndarray] = {} + + async def write_ticks(self, ticks: list) -> None: + ... + + async def write_ohlcv(self, ohlcv: np.ndarray) -> None: + ... + + async def read_ohlcv( + self, + fqsn: str, + timeframe: Optional[Union[int, str]] = None, + + ) -> tuple[ + MarketstoreClient, + Union[dict, np.ndarray] + ]: + client = self.client + syms = await client.list_symbols() + + if fqsn not in syms: + return {} + + if timeframe is None: + log.info(f'starting {fqsn} tsdb granularity scan..') + # loop through and try to find highest granularity + for tfstr in tf_in_1s.values(): + try: + log.info(f'querying for {tfstr}@{fqsn}') + result = await client.query(Params(fqsn, tfstr, 'OHLCV',)) + break + except purerpc.grpclib.exceptions.UnknownError: + # XXX: this is already logged by the container and + # thus shows up through `marketstored` logs relay. + # log.warning(f'{tfstr}@{fqsn} not found') + continue + else: + return {} + + else: + tfstr = tf_in_1s[timeframe] + result = await client.query(Params(fqsn, tfstr, 'OHLCV',)) + + # Fill out a `numpy` array-results map + arrays = {} + for fqsn, data_set in result.by_symbols().items(): + arrays.setdefault(fqsn, {})[ + tf_in_1s.inverse[data_set.timeframe] + ] = data_set.array + + return ( + client, + arrays[fqsn][timeframe] if timeframe else arrays, + ) + + +@acm +async def open_storage_client( + fqsn: str, + period: Optional[Union[int, str]] = None, # in seconds + +) -> tuple[Storage, dict[str, np.ndarray]]: + ''' + Load a series by key and deliver in ``numpy`` struct array format. ''' async with get_client() as client: - tfstr = tf_in_1s[period] - result = await client.query( - Params(fqsn, tf_in_1s, 'OHLCV',) + storage_client = Storage(client) + arrays = await storage_client.read_ohlcv( + fqsn, + period, ) - # Dig out `numpy` results map - arrays = {} - # for qr in [onem, fivem]: - for name, data_set in result.by_symbols().items(): - arrays[(name, qr)] = data_set.array - await tractor.breakpoint() - # # TODO: backfiller loop - # array = arrays[(fqsn, qr)] - return arrays + yield storage_client, arrays async def backfill_history_diff( # symbol: Symbol ) -> list[str]: - # TODO: - # - compute time-smaple step - # - take ``Symbol`` as input - # - backtrack into history using backend helper endpoint + + # TODO: real-time dedicated task for ensuring + # history consistency between the tsdb, shm and real-time feed.. + + # update sequence design notes: + + # - load existing highest frequency data from mkts + # * how do we want to offer this to the UI? + # - lazy loading? + # - try to load it all and expect graphics caching/diffing + # to hide extra bits that aren't in view? + + # - compute the diff between latest data from broker and shm + # * use sql api in mkts to determine where the backend should + # start querying for data? + # * append any diff with new shm length + # * determine missing (gapped) history by scanning + # * how far back do we look? + + # - begin rt update ingest and aggregation + # * could start by always writing ticks to mkts instead of + # worrying about a shm queue for now. + # * we have a short list of shm queues worth groking: + # - https://github.com/pikers/piker/issues/107 + # * the original data feed arch blurb: + # - https://github.com/pikers/piker/issues/98 + # broker = 'ib' symbol = 'mnq.globex' From 8003878248fbe6fc3396f5236810e486207d903f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 1 Mar 2022 12:34:16 -0500 Subject: [PATCH 030/105] Proxy `marketstore` container log level to our own --- piker/data/_ahab.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/piker/data/_ahab.py b/piker/data/_ahab.py index 651013f4..b844d25b 100644 --- a/piker/data/_ahab.py +++ b/piker/data/_ahab.py @@ -242,11 +242,12 @@ async def open_marketstore( raise RuntimeError(entry) msg = record['msg'] + level = record['level'] if msg and entry not in seen_so_far: seen_so_far.add(entry) if bp_on_msg: await tractor.breakpoint() - log.info(f'{msg}') + getattr(log, level)(f'{msg}') # if "launching tcp listener for all services..." in msg: if match in msg: From 3d6d77364b3f5b237dcf8c9c9fe7a3403413b77e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 6 Mar 2022 17:05:06 -0500 Subject: [PATCH 031/105] Allow kill-child-proc-with-root-perms to fail silently in `tractor` reaping --- piker/data/_ahab.py | 82 +++++++++++++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 26 deletions(-) diff --git a/piker/data/_ahab.py b/piker/data/_ahab.py index b844d25b..79980708 100644 --- a/piker/data/_ahab.py +++ b/piker/data/_ahab.py @@ -167,7 +167,7 @@ async def open_docker( @tractor.context -async def open_marketstore( +async def open_marketstored( ctx: tractor.Context, **kwargs, @@ -272,8 +272,9 @@ async def open_marketstore( await process_logs_until('exiting...',) except ( - trio.Cancelled, - KeyboardInterrupt, + BaseException, + # trio.Cancelled, + # KeyboardInterrupt, ): cntr.kill('SIGINT') with trio.move_on_after(0.5) as cs: @@ -310,34 +311,63 @@ async def start_ahab( ''' cn_ready = trio.Event() - async with tractor.open_nursery( - loglevel='runtime', - ) as tn: + try: + async with tractor.open_nursery( + loglevel='runtime', + ) as tn: - portal = await tn.start_actor( - service_name, - enable_modules=[__name__] - ) - - # de-escalate root perms to the original user - # after the docker supervisor actor is spawned. - if config._parent_user: - import pwd - os.setuid( - pwd.getpwnam( - config._parent_user - )[2] # named user's uid + portal = await tn.start_actor( + service_name, + enable_modules=[__name__] ) - task_status.started(cn_ready) + # TODO: we have issues with this on teardown + # where ``tractor`` tries to issue ``os.kill()`` + # and hits perms errors since the root process + # doesn't any longer have root perms.. - async with portal.open_context( - open_marketstore, - ) as (ctx, first): + # de-escalate root perms to the original user + # after the docker supervisor actor is spawned. + if config._parent_user: + import pwd + os.setuid( + pwd.getpwnam( + config._parent_user + )[2] # named user's uid + ) - assert str(first) - # run till cancelled - await trio.sleep_forever() + task_status.started(cn_ready) + + async with portal.open_context( + open_marketstored, + ) as (ctx, first): + + assert str(first) + # run till cancelled + await trio.sleep_forever() + + # since we demoted root perms in this parent + # we'll get a perms error on proc cleanup in + # ``tractor`` nursery exit. just make sure + # the child is terminated and don't raise the + # error if so. + + # TODO: we could also consider adding + # a ``tractor.ZombieDetected`` or something that we could raise + # if we find the child didn't terminate. + # await tractor.breakpoint() + except PermissionError: + log.warning('Failed to cancel root permsed container') + + except ( + trio.MultiError, + ) as err: + for subexc in err.exceptions: + if isinstance(subexc, PermissionError): + log.warning('Failed to cancel root perms-ed container') + return + else: + raise async def main(): From dd2edaeb3c695fa8608a4df1443f5549e1418efb Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 7 Mar 2022 07:18:53 -0500 Subject: [PATCH 032/105] Add a service checker predicate --- piker/_daemon.py | 53 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/piker/_daemon.py b/piker/_daemon.py index ff7a129d..a77b189d 100644 --- a/piker/_daemon.py +++ b/piker/_daemon.py @@ -19,7 +19,7 @@ Structured, daemon tree service management. """ from typing import Optional, Union, Callable, Any -from contextlib import asynccontextmanager +from contextlib import asynccontextmanager as acm from collections import defaultdict from pydantic import BaseModel @@ -130,7 +130,7 @@ class Services(BaseModel): _services: Optional[Services] = None -@asynccontextmanager +@acm async def open_pikerd( start_method: str = 'trio', loglevel: Optional[str] = None, @@ -185,7 +185,7 @@ async def open_pikerd( yield _services -@asynccontextmanager +@acm async def open_piker_runtime( name: str, enable_modules: list[str] = [], @@ -226,7 +226,7 @@ async def open_piker_runtime( yield tractor.current_actor() -@asynccontextmanager +@acm async def maybe_open_runtime( loglevel: Optional[str] = None, **kwargs, @@ -249,7 +249,7 @@ async def maybe_open_runtime( yield -@asynccontextmanager +@acm async def maybe_open_pikerd( loglevel: Optional[str] = None, **kwargs, @@ -300,7 +300,34 @@ class Brokerd: locks = defaultdict(trio.Lock) -@asynccontextmanager +@acm +async def find_service( + service_name: str, + +) -> tractor.Portal: + + log.info(f'Scanning for existing {service_name}') + # attach to existing daemon by name if possible + async with tractor.find_actor( + service_name, + arbiter_sockaddr=_registry_addr, + ) as portal: + yield portal + + +async def check_for_service( + service_name: str, + +) -> bool: + ''' + Service daemon "liveness" predicate. + + ''' + async with find_service(service_name) as portal: + return portal is not None + + +@acm async def maybe_spawn_daemon( service_name: str, @@ -330,19 +357,13 @@ async def maybe_spawn_daemon( lock = Brokerd.locks[service_name] await lock.acquire() - log.info(f'Scanning for existing {service_name}') - # attach to existing daemon by name if possible - async with tractor.find_actor( - service_name, - arbiter_sockaddr=_registry_addr, - - ) as portal: + async with find_service(service_name) as portal: if portal is not None: lock.release() yield portal return - log.warning(f"Couldn't find any existing {service_name}") + log.warning(f"Couldn't find any existing {service_name}") # ask root ``pikerd`` daemon to spawn the daemon we need if # pikerd is not live we now become the root of the @@ -423,7 +444,7 @@ async def spawn_brokerd( return True -@asynccontextmanager +@acm async def maybe_spawn_brokerd( brokername: str, @@ -485,7 +506,7 @@ async def spawn_emsd( return True -@asynccontextmanager +@acm async def maybe_open_emsd( brokername: str, From f582af4c9f889ad220b16365ccf0cb40b7fac639 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 7 Mar 2022 07:22:12 -0500 Subject: [PATCH 033/105] Make `pikerd` work again without `--tsdb` flag --- piker/cli/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/piker/cli/__init__.py b/piker/cli/__init__.py index a1931b8f..721b2123 100644 --- a/piker/cli/__init__.py +++ b/piker/cli/__init__.py @@ -66,7 +66,7 @@ def pikerd(loglevel, host, tl, pdb, tsdb): await ctn_ready.wait() log.info('`marketstore` container:{uid} up') - await trio.sleep_forever() + await trio.sleep_forever() trio.run(main) From 9138f376f7e96ce1b2f65649fedb189fc837c9af Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 7 Mar 2022 11:16:12 -0500 Subject: [PATCH 034/105] Return all timeframe arrays if `timeframe` not passed as input --- piker/data/marketstore.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index ff4a52eb..a9a8e286 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -222,7 +222,6 @@ class Storage: ''' High level storage api for both real-time and historical ingest. - ''' def __init__( self, @@ -284,10 +283,7 @@ class Storage: tf_in_1s.inverse[data_set.timeframe] ] = data_set.array - return ( - client, - arrays[fqsn][timeframe] if timeframe else arrays, - ) + return arrays[fqsn][timeframe] if timeframe else arrays @acm @@ -406,7 +402,8 @@ async def backfill_history_diff( len(to_append), dtype=mkts_dt, ) - # copy from shm array + # copy from shm array (yes it's this easy): + # https://numpy.org/doc/stable/user/basics.rec.html#assignment-from-other-structured-arrays mkts_array[:] = to_append[[ 'time', 'open', From 6c6f2abd06054c3def69a7a3b4f7066ca7a73982 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 7 Mar 2022 17:47:45 -0500 Subject: [PATCH 035/105] Use new `tractor.query_actor()` for service checking --- piker/_daemon.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/piker/_daemon.py b/piker/_daemon.py index a77b189d..999b8fce 100644 --- a/piker/_daemon.py +++ b/piker/_daemon.py @@ -303,16 +303,15 @@ class Brokerd: @acm async def find_service( service_name: str, +) -> Optional[tractor.Portal]: -) -> tractor.Portal: - - log.info(f'Scanning for existing {service_name}') + log.info(f'Scanning for service `{service_name}`') # attach to existing daemon by name if possible async with tractor.find_actor( service_name, arbiter_sockaddr=_registry_addr, - ) as portal: - yield portal + ) as maybe_portal: + yield maybe_portal async def check_for_service( @@ -323,8 +322,11 @@ async def check_for_service( Service daemon "liveness" predicate. ''' - async with find_service(service_name) as portal: - return portal is not None + async with tractor.query_actor( + service_name, + arbiter_sockaddr=_registry_addr, + ) as sockaddr: + return sockaddr @acm From 25dfe4115d44736696d9254ac0f5630b5eb56141 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 7 Mar 2022 18:02:30 -0500 Subject: [PATCH 036/105] Move ib data reset script into a new `scripts/` dir --- {snippets => scripts}/ib_data_reset.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) rename {snippets => scripts}/ib_data_reset.py (92%) diff --git a/snippets/ib_data_reset.py b/scripts/ib_data_reset.py similarity index 92% rename from snippets/ib_data_reset.py rename to scripts/ib_data_reset.py index a65321dc..3639d257 100644 --- a/snippets/ib_data_reset.py +++ b/scripts/ib_data_reset.py @@ -30,11 +30,13 @@ orig_win_id = t.find_focused().window # for tws win_names: list[str] = [ 'Interactive Brokers', # tws running in i3 - 'IB Gateway.', # gw running in i3 + 'IB Gateway', # gw running in i3 + # 'IB', # gw running in i3 (newer version?) ] for name in win_names: - results = t.find_named(name) + results = t.find_titled(name) + print(f'results for {name}: {results}') if results: con = results[0] print(f'Resetting data feed for {name}') From 39b4d2684a4fe60103cb3e5a1bc8f181caae10cf Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 8 Mar 2022 09:52:22 -0500 Subject: [PATCH 037/105] Get ib key hack script to work with reconnect --- scripts/ib_data_reset.py | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/scripts/ib_data_reset.py b/scripts/ib_data_reset.py index 3639d257..6ef87142 100644 --- a/scripts/ib_data_reset.py +++ b/scripts/ib_data_reset.py @@ -1,5 +1,5 @@ # piker: trading gear for hackers -# Copyright (C) Tyler Goodlet (in stewardship for piker0) +# Copyright (C) Tyler Goodlet (in stewardship for pikers) # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -49,22 +49,30 @@ for name in win_names: # https://github.com/rr-/pyxdotool # https://github.com/ShaneHutter/pyxdotool # https://github.com/cphyc/pyxdotool - subprocess.call([ - 'xdotool', - 'windowactivate', '--sync', win_id, - # move mouse to bottom left of window (where there should - # be nothing to click). - 'mousemove_relative', '--sync', str(w-4), str(h-4), + # TODO: only run the reconnect (2nd) kc on a detected + # disconnect? + for key_combo, timeout in [ + ('ctrl+alt+r', 12), + ('ctrl+alt+f', 6) + ]: + subprocess.call([ + 'xdotool', + 'windowactivate', '--sync', win_id, - # NOTE: we may need to stick a `--retry 3` in here.. - 'click', '--window', win_id, '--repeat', '3', '1', + # move mouse to bottom left of window (where there should + # be nothing to click). + 'mousemove_relative', '--sync', str(w-4), str(h-4), - # hackzorzes - 'key', 'ctrl+alt+f', - ], - timeout=1, - ) + # NOTE: we may need to stick a `--retry 3` in here.. + 'click', '--window', win_id, + '--repeat', '3', '1', + + # hackzorzes + 'key', key_combo, + ], + timeout=timeout, + ) # re-activate and focus original window subprocess.call([ From 565573b6099dad0590e2db85d1109ebf03f0d93d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 9 Mar 2022 21:07:48 -0500 Subject: [PATCH 038/105] Load any symbol-matching shm array if no `marketstored` found --- piker/data/cli.py | 68 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 12 deletions(-) diff --git a/piker/data/cli.py b/piker/data/cli.py index 6ea2503d..29ccf2cf 100644 --- a/piker/data/cli.py +++ b/piker/data/cli.py @@ -18,15 +18,14 @@ marketstore cli. """ -from typing import List from functools import partial from pprint import pformat +from anyio_marketstore import open_marketstore_client import trio import tractor import click - -from anyio_marketstore import open_marketstore_client +import numpy as np from .marketstore import ( get_client, @@ -39,6 +38,12 @@ from .marketstore import ( from ..cli import cli from .. import watchlists as wl from ..log import get_logger +from ._sharedmem import ( + maybe_open_shm_array, +) +from ._source import ( + base_iohlc_dtype, +) log = get_logger(__name__) @@ -52,10 +57,16 @@ log = get_logger(__name__) ) @click.argument('names', nargs=-1) @click.pass_obj -def ms_stream(config: dict, names: List[str], url: str): - """Connect to a marketstore time bucket stream for (a set of) symbols(s) +def ms_stream( + config: dict, + names: list[str], + url: str, +) -> None: + ''' + Connect to a marketstore time bucket stream for (a set of) symbols(s) and print to console. - """ + + ''' async def main(): # async for quote in stream_quotes(symbols=names): # log.info(f"Received quote:\n{quote}") @@ -72,7 +83,7 @@ def ms_stream(config: dict, names: List[str], url: str): # ) # @click.argument('names', nargs=-1) # @click.pass_obj -# def ms_destroy(config: dict, names: List[str], url: str) -> None: +# def ms_destroy(config: dict, names: list[str], url: str) -> None: # """Destroy symbol entries in the local marketstore instance. # """ # async def main(): @@ -113,23 +124,56 @@ def ms_stream(config: dict, names: List[str], url: str): '--port', default=5993 ) +@click.argument('symbols', nargs=-1) @click.pass_obj -def ms_shell(config, tl, host, port): +def ms_shell( + config, + tl, + host, + port, + symbols: list[str], +): ''' Start an IPython shell ready to query the local marketstore db. ''' from piker.data.marketstore import backfill_history_diff from piker._daemon import open_piker_runtime + async def main(): + nonlocal symbols + async with open_piker_runtime( 'ms_shell', enable_modules=['piker.data._ahab'], ): - await backfill_history_diff() - # TODO: write magics to query marketstore - # from IPython import embed - # embed() + try: + await backfill_history_diff() + except OSError: + # TODO: write magics to query marketstore + + sym = symbols[0] + symbol, _, broker = sym.rpartition('.') + # (maybe) allocate shm array for this broker/symbol which will + # be used for fast near-term history capture and processing. + shm, opened = maybe_open_shm_array( + key=sym, + dtype=base_iohlc_dtype, + ) + # load anything found in shm + from numpy.lib.recfunctions import structured_to_unstructured + mxmn = structured_to_unstructured( + shm.array[['low', 'high']], + # dtype=[('mxmn', ' Date: Fri, 11 Mar 2022 15:15:54 -0500 Subject: [PATCH 039/105] Comment each special key combo --- scripts/ib_data_reset.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/ib_data_reset.py b/scripts/ib_data_reset.py index 6ef87142..aa0e93c4 100644 --- a/scripts/ib_data_reset.py +++ b/scripts/ib_data_reset.py @@ -53,7 +53,9 @@ for name in win_names: # TODO: only run the reconnect (2nd) kc on a detected # disconnect? for key_combo, timeout in [ + # only required if we need a connection reset. ('ctrl+alt+r', 12), + # data feed reset. ('ctrl+alt+f', 6) ]: subprocess.call([ From 6dc6d00a9bd921812ba512ab14d9502f4ec90a11 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 11 Mar 2022 16:09:07 -0500 Subject: [PATCH 040/105] Try downsampling mkts data --- piker/data/marketstore.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index a9a8e286..34de23ea 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -433,7 +433,13 @@ async def backfill_history_diff( raise MarketStoreError(err) # TODO: backfiller loop - # await tractor.breakpoint() + from piker.ui._compression import downsample + x, y = downsample( + s1['Epoch'], + s1['Close'], + bins=10, + ) + await tractor.breakpoint() async def ingest_quote_stream( From 6cdd017cd62e442be3238a30082ab716fd1d690f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 24 Mar 2022 13:44:12 -0400 Subject: [PATCH 041/105] Ensure bfqsn is lower cased for feed api consumers Also, Start tinkering with `tractor.trionics.ipython_embed()` In effort to get back to a usable REPL around the mkts client this adds usage of the new `tractor` integration api as well as logic for skipping backfilling if existing tsdb arrays are found. --- piker/data/feed.py | 47 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index 35d006de..ff8a543a 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -192,6 +192,22 @@ async def _setup_persistent_brokerd( await trio.sleep_forever() +async def start_backfill( + mod: ModuleType, + fqsn: str, + shm: ShmArray, + + task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED, + +) -> int: + + return await mod.backfill_bars( + fqsn, + shm, + task_status=task_status, + ) + + async def manage_history( mod: ModuleType, bus: _FeedsBus, @@ -222,7 +238,12 @@ async def manage_history( ) log.info('Scanning for existing `marketstored`') + is_up = await check_for_service('marketstored') + + # for now only do backfilling if no tsdb can be found + do_backfill = not is_up and opened + if is_up and opened: log.info('Found existing `marketstored`') from . import marketstore @@ -231,6 +252,11 @@ async def manage_history( fqsn, ) as (storage, tsdb_arrays): + # TODO: get this shit workin + from tractor.trionics import ipython_embed + await ipython_embed() + # await ipython_embed(ns=locals()) + # TODO: history validation # assert opened, f'Persistent shm for {symbol} was already open?!' # if not opened: @@ -272,16 +298,27 @@ async def manage_history( last_dt = datetime.fromtimestamp(last_s) array, next_dt = await hist(end_dt=last_dt) + else: + do_backfill = True + + # await tractor.breakpoint() some_data_ready.set() - elif opened: + if do_backfill: log.info('No existing `marketstored` found..') # start history backfill task ``backfill_bars()`` is # a required backend func this must block until shm is # filled with first set of ohlc bars - _ = await bus.nursery.start(mod.backfill_bars, fqsn, shm) + await bus.nursery.start( + start_backfill, + mod, + fqsn, + shm, + ) + + # _ = await bus.nursery.start(mod.backfill_bars, fqsn, shm) # yield back after client connect with filled shm task_status.started(shm) @@ -361,8 +398,10 @@ async def allocate_persistent_feed( loglevel=loglevel, ) ) - # the broker-specific fully qualified symbol name - bfqsn = init_msg[symbol]['fqsn'] + # the broker-specific fully qualified symbol name, + # but ensure it is lower-cased for external use. + bfqsn = init_msg[symbol]['fqsn'].lower() + init_msg[symbol]['fqsn'] = bfqsn # HISTORY, run 2 tasks: # - a history loader / maintainer From 907b7dd5c6c2bbf097f7a10daab35eb5bf2607ff Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 24 Mar 2022 13:46:08 -0400 Subject: [PATCH 042/105] Disable re-connect for now in ib script --- scripts/ib_data_reset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ib_data_reset.py b/scripts/ib_data_reset.py index aa0e93c4..7e328190 100644 --- a/scripts/ib_data_reset.py +++ b/scripts/ib_data_reset.py @@ -54,7 +54,7 @@ for name in win_names: # disconnect? for key_combo, timeout in [ # only required if we need a connection reset. - ('ctrl+alt+r', 12), + # ('ctrl+alt+r', 12), # data feed reset. ('ctrl+alt+f', 6) ]: From 950cb03e07497ec8be08d9d311d303eee44dce8a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 29 Mar 2022 13:15:23 -0400 Subject: [PATCH 043/105] Drop `pandas` to `numpy` converter --- piker/data/_source.py | 58 +------------------------------------------ 1 file changed, 1 insertion(+), 57 deletions(-) diff --git a/piker/data/_source.py b/piker/data/_source.py index 12cad24f..b7f0ecd7 100644 --- a/piker/data/_source.py +++ b/piker/data/_source.py @@ -22,8 +22,7 @@ from typing import Any import decimal import numpy as np -import pandas as pd -from pydantic import BaseModel, validate_arguments +from pydantic import BaseModel # from numba import from_dtype @@ -254,61 +253,6 @@ class Symbol(BaseModel): return keys -def from_df( - - df: pd.DataFrame, - source=None, - default_tf=None - -) -> np.recarray: - """Convert OHLC formatted ``pandas.DataFrame`` to ``numpy.recarray``. - - """ - df.reset_index(inplace=True) - - # hackery to convert field names - date = 'Date' - if 'date' in df.columns: - date = 'date' - - # convert to POSIX time - df[date] = [d.timestamp() for d in df[date]] - - # try to rename from some camel case - columns = { - 'Date': 'time', - 'date': 'time', - 'Open': 'open', - 'High': 'high', - 'Low': 'low', - 'Close': 'close', - 'Volume': 'volume', - - # most feeds are providing this over sesssion anchored - 'vwap': 'bar_wap', - - # XXX: ib_insync calls this the "wap of the bar" - # but no clue what is actually is... - # https://github.com/pikers/piker/issues/119#issuecomment-729120988 - 'average': 'bar_wap', - } - - df = df.rename(columns=columns) - - for name in df.columns: - # if name not in base_ohlc_dtype.names[1:]: - if name not in base_ohlc_dtype.names: - del df[name] - - # TODO: it turns out column access on recarrays is actually slower: - # https://jakevdp.github.io/PythonDataScienceHandbook/02.09-structured-data-numpy.html#RecordArrays:-Structured-Arrays-with-a-Twist - # it might make sense to make these structured arrays? - array = df.to_records(index=False) - _nan_to_closest_num(array) - - return array - - def _nan_to_closest_num(array: np.ndarray): """Return interpolated values instead of NaN. From ca48577c60c944b811718a2925f30c63092edf18 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 29 Mar 2022 13:26:18 -0400 Subject: [PATCH 044/105] Add diffing logic to `tsdb_history_update()` Add some basic `numpy` epoch slice logic to generate append and prepend arrays to write to the db. Mooar cool things, - add a `Storage.delete_ts()` method to wipe a column series from the db easily. - don't attempt to read in any OHLC series by default on client load - add some `pyqtgraph` profiling and drop manual latency measures - if no db series for the fqsn exists write the entire shm array --- piker/data/marketstore.py | 209 +++++++++++++++++++------------------- 1 file changed, 106 insertions(+), 103 deletions(-) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index 34de23ea..887df45e 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -29,14 +29,13 @@ from typing import ( Any, Optional, Union, - # Callable, - # TYPE_CHECKING, ) import time from math import isnan from bidict import bidict import msgpack +import pyqtgraph as pg import numpy as np import pandas as pd import tractor @@ -49,15 +48,8 @@ from anyio_marketstore import ( import purerpc from .feed import maybe_open_feed -from ._source import ( - mk_fqsn, - # Symbol, -) from ..log import get_logger, get_console_log -# if TYPE_CHECKING: -# from ._sharedmem import ShmArray - log = get_logger(__name__) @@ -235,6 +227,16 @@ class Storage: # series' cache from tsdb reads self._arrays: dict[str, np.ndarray] = {} + async def list_keys(self) -> list[str]: + return await self.client.list_symbols() + + async def search_keys(self, pattern: str) -> list[str]: + ''' + Search for time series key in the storage backend. + + ''' + ... + async def write_ticks(self, ticks: list) -> None: ... @@ -262,7 +264,9 @@ class Storage: for tfstr in tf_in_1s.values(): try: log.info(f'querying for {tfstr}@{fqsn}') - result = await client.query(Params(fqsn, tfstr, 'OHLCV',)) + result = await client.query( + Params(fqsn, tfstr, 'OHLCV',) + ) break except purerpc.grpclib.exceptions.UnknownError: # XXX: this is already logged by the container and @@ -276,6 +280,9 @@ class Storage: tfstr = tf_in_1s[timeframe] result = await client.query(Params(fqsn, tfstr, 'OHLCV',)) + # TODO: it turns out column access on recarrays is actually slower: + # https://jakevdp.github.io/PythonDataScienceHandbook/02.09-structured-data-numpy.html#RecordArrays:-Structured-Arrays-with-a-Twist + # it might make sense to make these structured arrays? # Fill out a `numpy` array-results map arrays = {} for fqsn, data_set in result.by_symbols().items(): @@ -283,7 +290,22 @@ class Storage: tf_in_1s.inverse[data_set.timeframe] ] = data_set.array - return arrays[fqsn][timeframe] if timeframe else arrays + return arrays[fqsn][timeframe] if timeframe else arrays[fqsn] + + async def delete_ts( + self, + key: str, + timeframe: Optional[Union[int, str]] = None, + + ) -> bool: + + client = self.client + syms = await client.list_symbols() + print(syms) + # if key not in syms: + # raise KeyError(f'`{fqsn}` table key not found?') + + return await client.destroy(tbk=key) @acm @@ -296,19 +318,16 @@ async def open_storage_client( Load a series by key and deliver in ``numpy`` struct array format. ''' - async with get_client() as client: - - storage_client = Storage(client) - arrays = await storage_client.read_ohlcv( - fqsn, - period, - ) - - yield storage_client, arrays + async with ( + # eventually a storage backend endpoint + get_client() as client, + ): + # slap on our wrapper api + yield Storage(client) -async def backfill_history_diff( - # symbol: Symbol +async def tsdb_history_update( + fqsn: str, ) -> list[str]: @@ -338,108 +357,92 @@ async def backfill_history_diff( # * the original data feed arch blurb: # - https://github.com/pikers/piker/issues/98 # - - broker = 'ib' - symbol = 'mnq.globex' - - # broker = 'binance' - # symbol = 'btcusdt' - - fqsn = mk_fqsn(broker, symbol) + profiler = pg.debug.Profiler( + disabled=False, # not pg_profile_enabled(), + delayed=False, + ) async with ( - get_client() as client, + open_storage_client(fqsn) as storage, + maybe_open_feed( - broker, - [symbol], - loglevel='info', - # backpressure=False, + [fqsn], start_stream=False, ) as (feed, stream), ): - syms = await client.list_symbols() - log.info(f'Existing symbol set:\n{pformat(syms)}') + profiler(f'opened feed for {fqsn}') + + symbol = feed.symbols.get(fqsn) + if symbol: + fqsn = symbol.front_fqsn() + + syms = await storage.client.list_symbols() + log.info(f'Existing tsdb symbol set:\n{pformat(syms)}') + profiler(f'listed symbols {syms}') # diff db history with shm and only write the missing portions ohlcv = feed.shm.array - key = (fqsn, '1Sec', 'OHLCV') - tbk = mk_tbk(key) + # TODO: use pg profiler + tsdb_arrays = await storage.read_ohlcv(fqsn) - # diff vs. existing array and append new history - # TODO: + to_append = feed.shm.array + to_prepend = None - # TODO: should be no error? - # assert not resp.responses + # hist diffing + if tsdb_arrays: + onesec = tsdb_arrays[1] + to_append = ohlcv[ohlcv['time'] > onesec['Epoch'][-1]] + to_prepend = ohlcv[ohlcv['time'] < onesec['Epoch'][0]] - start = time.time() + profiler('Finished db arrays diffs') - qr = await client.query( - # Params(fqsn, '1Sec`', 'OHLCV',) - Params(*key), - ) - # # Dig out `numpy` results map - arrays: dict[tuple[str, int], np.ndarray] = {} - for name, data_set in qr.by_symbols().items(): - in_secs = tf_in_1s.inverse[data_set.timeframe] - arrays[(name, in_secs)] = data_set.array + for array in [to_append, to_prepend]: + if array is None: + continue - s1 = arrays[(fqsn, 1)] - to_append = ohlcv[ohlcv['time'] > s1['Epoch'][-1]] + log.info( + f'Writing datums {array.size} -> to tsdb from shm\n' + ) - end_diff = time.time() - diff_ms = round((end_diff - start) * 1e3, ndigits=2) + # build mkts schema compat array for writing + mkts_dt = np.dtype(_ohlcv_dt) + mkts_array = np.zeros( + len(array), + dtype=mkts_dt, + ) + # copy from shm array (yes it's this easy): + # https://numpy.org/doc/stable/user/basics.rec.html#assignment-from-other-structured-arrays + mkts_array[:] = array[[ + 'time', + 'open', + 'high', + 'low', + 'close', + 'volume', + ]] - log.info( - f'Appending {to_append.size} datums to tsdb from shm\n' - f'Total diff time: {diff_ms} ms' - ) + # write to db + resp = await storage.client.write( + mkts_array, + tbk=f'{fqsn}/1Sec/OHLCV', - # build mkts schema compat array for writing - mkts_dt = np.dtype(_ohlcv_dt) - mkts_array = np.zeros( - len(to_append), - dtype=mkts_dt, - ) - # copy from shm array (yes it's this easy): - # https://numpy.org/doc/stable/user/basics.rec.html#assignment-from-other-structured-arrays - mkts_array[:] = to_append[[ - 'time', - 'open', - 'high', - 'low', - 'close', - 'volume', - ]] + # NOTE: will will append duplicates + # for the same timestamp-index. + # TODO: pre deduplicate? + isvariablelength=True, + ) - # write to db - resp = await client.write( - mkts_array, - tbk=tbk, - # NOTE: will will append duplicates - # for the same timestamp-index. - isvariablelength=True, - ) - end_write = time.time() - diff_ms = round((end_write - end_diff) * 1e3, ndigits=2) - log.info( - f'Wrote {to_append.size} datums to tsdb\n' - f'Total write time: {diff_ms} ms' - ) - for resp in resp.responses: - err = resp.error - if err: - raise MarketStoreError(err) + log.info( + f'Wrote {to_append.size} datums to tsdb\n' + ) + profiler('Finished db writes') - # TODO: backfiller loop - from piker.ui._compression import downsample - x, y = downsample( - s1['Epoch'], - s1['Close'], - bins=10, - ) - await tractor.breakpoint() + for resp in resp.responses: + err = resp.error + if err: + raise MarketStoreError(err) async def ingest_quote_stream( From a971de2b6777cc325361d9ab096b00ee5ceb57ac Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 29 Mar 2022 13:33:43 -0400 Subject: [PATCH 045/105] Drop `ms-shell`, add `piker storesh` cmd --- piker/data/cli.py | 42 +++++++++------------------------------ piker/data/marketstore.py | 3 +++ 2 files changed, 12 insertions(+), 33 deletions(-) diff --git a/piker/data/cli.py b/piker/data/cli.py index 29ccf2cf..554048a4 100644 --- a/piker/data/cli.py +++ b/piker/data/cli.py @@ -126,7 +126,7 @@ def ms_stream( ) @click.argument('symbols', nargs=-1) @click.pass_obj -def ms_shell( +def storesh( config, tl, host, @@ -137,43 +137,18 @@ def ms_shell( Start an IPython shell ready to query the local marketstore db. ''' - from piker.data.marketstore import backfill_history_diff + from piker.data.marketstore import tsdb_history_update from piker._daemon import open_piker_runtime async def main(): nonlocal symbols async with open_piker_runtime( - 'ms_shell', + 'storesh', enable_modules=['piker.data._ahab'], ): - try: - await backfill_history_diff() - except OSError: - # TODO: write magics to query marketstore - - sym = symbols[0] - symbol, _, broker = sym.rpartition('.') - # (maybe) allocate shm array for this broker/symbol which will - # be used for fast near-term history capture and processing. - shm, opened = maybe_open_shm_array( - key=sym, - dtype=base_iohlc_dtype, - ) - # load anything found in shm - from numpy.lib.recfunctions import structured_to_unstructured - mxmn = structured_to_unstructured( - shm.array[['low', 'high']], - # dtype=[('mxmn', ' Date: Tue, 29 Mar 2022 14:06:28 -0400 Subject: [PATCH 046/105] Add basic tsdb history loading If `marketstore` is detected try to only load most recent missing data from the data provider (broker) and the rest from the tsdb and push it all to shm for display in the UI. If the provider/broker doesn't have the history client endpoint, just use the old one for now so we can start to incrementally add support. Don't start the ohlc step incrementer task until the backend signals that the feed is live. --- piker/data/feed.py | 194 ++++++++++++++++++++++++++------------------- 1 file changed, 113 insertions(+), 81 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index ff8a543a..9ab98600 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -22,7 +22,6 @@ This module is enabled for ``brokerd`` daemons. """ from __future__ import annotations from dataclasses import dataclass, field -from datetime import datetime from contextlib import asynccontextmanager from functools import partial from types import ModuleType @@ -49,7 +48,6 @@ from ._sharedmem import ( maybe_open_shm_array, attach_shm_array, ShmArray, - _secs_in_day, ) from .ingest import get_ingestormod from ._source import ( @@ -236,119 +234,137 @@ async def manage_history( # we expect the sub-actor to write readonly=False, ) + # TODO: history validation + if not opened: + raise RuntimeError( + "Persistent shm for sym was already open?!" + ) log.info('Scanning for existing `marketstored`') is_up = await check_for_service('marketstored') # for now only do backfilling if no tsdb can be found - do_backfill = not is_up and opened + do_legacy_backfill = not is_up and opened + + open_history_client = getattr(mod, 'open_history_client', None) + + if is_up and opened and open_history_client: - if is_up and opened: log.info('Found existing `marketstored`') from . import marketstore async with marketstore.open_storage_client( fqsn, - ) as (storage, tsdb_arrays): + ) as storage: - # TODO: get this shit workin - from tractor.trionics import ipython_embed - await ipython_embed() - # await ipython_embed(ns=locals()) + tsdb_arrays = await storage.read_ohlcv(fqsn) - # TODO: history validation - # assert opened, f'Persistent shm for {symbol} was already open?!' - # if not opened: - # raise RuntimeError( - # "Persistent shm for sym was already open?!" - # ) + if not tsdb_arrays: + do_legacy_backfill = True - if tsdb_arrays: + else: log.info(f'Loaded tsdb history {tsdb_arrays}') - fastest = list(tsdb_arrays[fqsn].values())[0] - last_s = fastest['Epoch'][-1] - # TODO: see if there's faster multi-field reads: - # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields - - # re-index with a `time` and index field - shm.push( - fastest[-3 * _secs_in_day:], - - # insert the history pre a "days worth" of samples - # to leave some real-time buffer space at the end. - prepend=True, - start=shm._len - _secs_in_day, - field_map={ - 'Epoch': 'time', - 'Open': 'open', - 'High': 'high', - 'Low': 'low', - 'Close': 'close', - 'Volume': 'volume', - }, + fastest = list(tsdb_arrays.values())[0] + times = fastest['Epoch'] + first, last = times[0], times[-1] + first_tsdb_dt, last_tsdb_dt = map( + pendulum.from_timestamp, [first, last] ) + # TODO: this should be used verbatim for the pure + # shm backfiller approach below. + # start history anal and load missing new data via backend. - async with mod.open_history_client(fqsn) as hist: + async with open_history_client(fqsn) as hist: - # get latest query's worth of history - array, next_dt = await hist(end_dt='') + # get latest query's worth of history all the way + # back to what is recorded in the tsdb + array, start_dt, end_dt = await hist(end_dt='') + shm.push(array) - last_dt = datetime.fromtimestamp(last_s) - array, next_dt = await hist(end_dt=last_dt) - else: - do_backfill = True + # let caller unblock and deliver latest history frame + task_status.started(shm) + some_data_ready.set() - # await tractor.breakpoint() + # pull new history frames until we hit latest + # already in the tsdb + while start_dt > last_tsdb_dt: - some_data_ready.set() + array, start_dt, end_dt = await hist(end_dt=start_dt) + s_diff = (last_tsdb_dt - start_dt).seconds - if do_backfill: + # if we detect a partial frame's worth of data + # that is new, slice out only that history and + # write to shm. + if s_diff > 0: + assert last_tsdb_dt > start_dt + selected = array['time'] > last_tsdb_dt.timestamp() + to_push = array[selected] + log.info( + f'Pushing partial frame {to_push.size} to shm' + ) + shm.push(to_push, prepend=True) + break + + else: + # write to shm + log.info(f'Pushing {array.size} datums to shm') + shm.push(array, prepend=True) + + # TODO: see if there's faster multi-field reads: + # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields + # re-index with a `time` and index field + # await tractor.breakpoint() + + shm.push( + fastest[-shm._first.value:], + + # insert the history pre a "days worth" of samples + # to leave some real-time buffer space at the end. + prepend=True, + # start=shm._len - _secs_in_day, + field_map={ + 'Epoch': 'time', + 'Open': 'open', + 'High': 'high', + 'Low': 'low', + 'Close': 'close', + 'Volume': 'volume', + }, + ) + + # TODO: write new data to tsdb to be ready to for next + # read. + + if do_legacy_backfill: + # do a legacy incremental backfill from the provider. log.info('No existing `marketstored` found..') + bfqsn = fqsn.replace('.' + mod.name, '') # start history backfill task ``backfill_bars()`` is # a required backend func this must block until shm is # filled with first set of ohlc bars await bus.nursery.start( start_backfill, mod, - fqsn, + bfqsn, shm, ) - # _ = await bus.nursery.start(mod.backfill_bars, fqsn, shm) + # yield back after client connect with filled shm + task_status.started(shm) - # yield back after client connect with filled shm - task_status.started(shm) - - # indicate to caller that feed can be delivered to - # remote requesting client since we've loaded history - # data that can be used. - some_data_ready.set() - - # detect sample step size for sampled historical data - times = shm.array['time'] - delay_s = times[-1] - times[times != times[-1]][-1] - - # begin real-time updates of shm and tsb once the feed - # goes live. - await feed_is_live.wait() - - if opened: - sampler.ohlcv_shms.setdefault(delay_s, []).append(shm) - - # start shm incrementing for OHLC sampling at the current - # detected sampling period if one dne. - if sampler.incrementers.get(delay_s) is None: - await bus.start_task( - increment_ohlc_buffer, - delay_s, - ) + # indicate to caller that feed can be delivered to + # remote requesting client since we've loaded history + # data that can be used. + some_data_ready.set() + # history retreival loop depending on user interaction and thus + # a small RPC-prot for remotely controllinlg what data is loaded + # for viewing. await trio.sleep_forever() - # cs.cancel() async def allocate_persistent_feed( @@ -416,7 +432,7 @@ async def allocate_persistent_feed( manage_history, mod, bus, - bfqsn, + '.'.join((bfqsn, brokername)), some_data_ready, feed_is_live, ) @@ -429,7 +445,6 @@ async def allocate_persistent_feed( # true fqsn fqsn = '.'.join((bfqsn, brokername)) - # add a fqsn entry that includes the ``.`` suffix init_msg[fqsn] = msg @@ -464,9 +479,22 @@ async def allocate_persistent_feed( if not start_stream: await trio.sleep_forever() - # backend will indicate when real-time quotes have begun. + # begin real-time updates of shm and tsb once the feed goes live and + # the backend will indicate when real-time quotes have begun. await feed_is_live.wait() + # start shm incrementer task for OHLC style sampling + # at the current detected step period. + times = shm.array['time'] + delay_s = times[-1] - times[times != times[-1]][-1] + + sampler.ohlcv_shms.setdefault(delay_s, []).append(shm) + if sampler.incrementers.get(delay_s) is None: + await bus.start_task( + increment_ohlc_buffer, + delay_s, + ) + sum_tick_vlm: bool = init_msg.get( 'shm_write_opts', {} ).get('sum_tick_vlm', True) @@ -545,7 +573,7 @@ async def open_feed_bus( init_msg, first_quotes = bus.feeds[symbol] msg = init_msg[symbol] - bfqsn = msg['fqsn'] + bfqsn = msg['fqsn'].lower() # true fqsn fqsn = '.'.join([bfqsn, brokername]) @@ -864,7 +892,10 @@ async def maybe_open_feed( **kwargs, -) -> (Feed, ReceiveChannel[dict[str, Any]]): +) -> ( + Feed, + ReceiveChannel[dict[str, Any]], +): ''' Maybe open a data to a ``brokerd`` daemon only if there is no local one for the broker-symbol pair, if one is cached use it wrapped @@ -885,6 +916,7 @@ async def maybe_open_feed( 'start_stream': kwargs.get('start_stream', True), }, key=fqsn, + ) as (cache_hit, feed): if cache_hit: From 53ad5e6f65e847cd685041baa783310369189e90 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 30 Mar 2022 14:10:15 -0400 Subject: [PATCH 047/105] Handle "fatal" level log msgs in docker super --- piker/data/_ahab.py | 45 +++++++-------------------------------------- 1 file changed, 7 insertions(+), 38 deletions(-) diff --git a/piker/data/_ahab.py b/piker/data/_ahab.py index 79980708..52088e91 100644 --- a/piker/data/_ahab.py +++ b/piker/data/_ahab.py @@ -24,7 +24,6 @@ from typing import ( # Any, ) from contextlib import asynccontextmanager as acm -# import time import trio from trio_typing import TaskStatus @@ -97,6 +96,7 @@ async def open_docker( base_url=url, **kwargs ) if url else docker.from_env(**kwargs) + yield client except ( @@ -127,43 +127,10 @@ async def open_docker( finally: if client: - # for c in client.containers.list(): - # c.kill() client.close() # client.api._custom_adapter.close() - - -# async def waitfor( -# cntr: Container, -# attr_path: tuple[str], -# expect=None, -# timeout: float = 0.5, - -# ) -> Any: -# ''' -# Wait for a container's attr value to be set. If ``expect`` is -# provided wait for the value to be set to that value. - -# This is an async version of the helper from our ``pytest-dockerctl`` -# plugin. - -# ''' -# def get(val, path): -# for key in path: -# val = val[key] -# return val - -# start = time.time() -# while time.time() - start < timeout: -# cntr.reload() -# val = get(cntr.attrs, attr_path) -# if expect is None and val: -# return val -# elif val == expect: -# return val -# else: -# raise TimeoutError("{} failed to be {}, value: \"{}\"".format( -# attr_path, expect if expect else 'not None', val)) + for c in client.containers.list(): + c.kill() @tractor.context @@ -220,7 +187,7 @@ async def open_marketstored( }, mounts=[config_dir_mnt, data_dir_mnt], detach=True, - stop_signal='SIGINT', + # stop_signal='SIGINT', init=True, # remove=True, ) @@ -247,7 +214,7 @@ async def open_marketstored( seen_so_far.add(entry) if bp_on_msg: await tractor.breakpoint() - getattr(log, level)(f'{msg}') + getattr(log, level, log.error)(f'{msg}') # if "launching tcp listener for all services..." in msg: if match in msg: @@ -269,6 +236,8 @@ async def open_marketstored( ) await ctx.started(cntr.id) + + # block for the expected "teardown log msg".. await process_logs_until('exiting...',) except ( From ce3229df7d05f137b64362ad234e4cfe328220f2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 30 Mar 2022 14:11:21 -0400 Subject: [PATCH 048/105] Get sync-to-marketstore-tsdb history retrieval workinnn --- piker/data/feed.py | 52 ++++++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index 9ab98600..6cbfbebc 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -36,6 +36,7 @@ from trio.abc import ReceiveChannel from trio_typing import TaskStatus import tractor from pydantic import BaseModel +import numpy as np from ..brokers import get_brokermod from .._cacheables import maybe_open_context @@ -276,13 +277,38 @@ async def manage_history( # TODO: this should be used verbatim for the pure # shm backfiller approach below. + def diff_history( + array, + start_dt, + end_dt, + + ) -> np.ndarray: + + s_diff = (last_tsdb_dt - start_dt).seconds + + # if we detect a partial frame's worth of data + # that is new, slice out only that history and + # write to shm. + if s_diff > 0: + assert last_tsdb_dt > start_dt + selected = array['time'] > last_tsdb_dt.timestamp() + to_push = array[selected] + log.info( + f'Pushing partial frame {to_push.size} to shm' + ) + return to_push + + else: + return array + # start history anal and load missing new data via backend. async with open_history_client(fqsn) as hist: # get latest query's worth of history all the way # back to what is recorded in the tsdb array, start_dt, end_dt = await hist(end_dt='') - shm.push(array) + to_push = diff_history(array, start_dt, end_dt) + shm.push(to_push) # let caller unblock and deliver latest history frame task_status.started(shm) @@ -291,33 +317,13 @@ async def manage_history( # pull new history frames until we hit latest # already in the tsdb while start_dt > last_tsdb_dt: - array, start_dt, end_dt = await hist(end_dt=start_dt) - s_diff = (last_tsdb_dt - start_dt).seconds - - # if we detect a partial frame's worth of data - # that is new, slice out only that history and - # write to shm. - if s_diff > 0: - assert last_tsdb_dt > start_dt - selected = array['time'] > last_tsdb_dt.timestamp() - to_push = array[selected] - log.info( - f'Pushing partial frame {to_push.size} to shm' - ) - shm.push(to_push, prepend=True) - break - - else: - # write to shm - log.info(f'Pushing {array.size} datums to shm') - shm.push(array, prepend=True) + to_push = diff_history(array, start_dt, end_dt) + shm.push(to_push, prepend=True) # TODO: see if there's faster multi-field reads: # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields # re-index with a `time` and index field - # await tractor.breakpoint() - shm.push( fastest[-shm._first.value:], From 15630f465d9d9971048b90570d6f5f484972459e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 8 Apr 2022 11:47:22 -0400 Subject: [PATCH 049/105] Limit ohlc queries to 800k datums to avoid `purepc` size error --- piker/data/marketstore.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index a1a54edb..f82770ab 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -258,16 +258,29 @@ class Storage: if fqsn not in syms: return {} + tfstr = tf_in_1s[1] + + params = Params( + symbols=fqsn, + timeframe=tfstr, + attrgroup='OHLCV', + # limit_from_start=True, + + # TODO: figure the max limit here given the + # ``purepc`` msg size limit of purerpc: 33554432 + limit=int(800e3), + ) + if timeframe is None: log.info(f'starting {fqsn} tsdb granularity scan..') # loop through and try to find highest granularity for tfstr in tf_in_1s.values(): try: log.info(f'querying for {tfstr}@{fqsn}') - result = await client.query( - Params(fqsn, tfstr, 'OHLCV',) - ) + params.set('timeframe', tfstr) + result = await client.query(params) break + except purerpc.grpclib.exceptions.UnknownError: # XXX: this is already logged by the container and # thus shows up through `marketstored` logs relay. @@ -277,8 +290,7 @@ class Storage: return {} else: - tfstr = tf_in_1s[timeframe] - result = await client.query(Params(fqsn, tfstr, 'OHLCV',)) + result = await client.query(params) # TODO: it turns out column access on recarrays is actually slower: # https://jakevdp.github.io/PythonDataScienceHandbook/02.09-structured-data-numpy.html#RecordArrays:-Structured-Arrays-with-a-Twist From 9fe5cd647a8d29d6fd0d55144e3f84041fbaf15b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 8 Apr 2022 11:48:14 -0400 Subject: [PATCH 050/105] Handle non-fqsn for derivs and don't put brokername in --- piker/data/feed.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index 6cbfbebc..e6f4990e 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -375,8 +375,10 @@ async def manage_history( async def allocate_persistent_feed( bus: _FeedsBus, + brokername: str, symbol: str, + loglevel: str, start_stream: bool = True, @@ -396,6 +398,7 @@ async def allocate_persistent_feed( - a real-time streaming task which connec ''' + # load backend module try: mod = get_brokermod(brokername) except ImportError: @@ -452,7 +455,10 @@ async def allocate_persistent_feed( # true fqsn fqsn = '.'.join((bfqsn, brokername)) # add a fqsn entry that includes the ``.`` suffix + # and an entry that includes the broker-specific fqsn (including + # any new suffixes or elements as injected by the backend). init_msg[fqsn] = msg + init_msg[bfqsn] = msg # TODO: pretty sure we don't need this? why not just leave 1s as # the fastest "sample period" since we'll probably always want that @@ -466,13 +472,14 @@ async def allocate_persistent_feed( await some_data_ready.wait() # append ``.`` suffix to each quote symbol - bsym = symbol + f'.{brokername}' + acceptable_not_fqsn_with_broker_suffix = symbol + f'.{brokername}' + generic_first_quotes = { - bsym: first_quote, + acceptable_not_fqsn_with_broker_suffix: first_quote, fqsn: first_quote, } - bus.feeds[symbol] = bus.feeds[fqsn] = ( + bus.feeds[symbol] = bus.feeds[bfqsn] = ( init_msg, generic_first_quotes, ) @@ -523,7 +530,7 @@ async def open_feed_bus( ctx: tractor.Context, brokername: str, - symbol: str, + symbol: str, # normally expected to the broker-specific fqsn loglevel: str, tick_throttle: Optional[float] = None, start_stream: bool = True, @@ -545,7 +552,9 @@ async def open_feed_bus( # TODO: check for any stale shm entries for this symbol # (after we also group them in a nice `/dev/shm/piker/` subdir). # ensure we are who we think we are - assert 'brokerd' in tractor.current_actor().name + servicename = tractor.current_actor().name + assert 'brokerd' in servicename + assert brokername in servicename bus = get_feed_bus(brokername) @@ -555,7 +564,7 @@ async def open_feed_bus( entry = bus.feeds.get(symbol) if entry is None: # allocate a new actor-local stream bus which - # will persist for this `brokerd`. + # will persist for this `brokerd`'s service lifetime. async with bus.task_lock: await bus.nursery.start( partial( @@ -584,7 +593,7 @@ async def open_feed_bus( # true fqsn fqsn = '.'.join([bfqsn, brokername]) assert fqsn in first_quotes - assert bus.feeds[fqsn] + assert bus.feeds[bfqsn] # broker-ambiguous symbol (provided on cli - eg. mnq.globex.ib) bsym = symbol + f'.{brokername}' From 71416f57521e410d26f9dc7dfbc15e6d5b239e29 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 30 Apr 2022 13:22:30 -0400 Subject: [PATCH 051/105] Add `anyio-marketstore` client as dev dep --- requirements.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/requirements.txt b/requirements.txt index e64267b9..077fe24f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,7 @@ # pin this to a dev branch that we have more control over especially # as more graphics stuff gets hashed out. -e git+https://github.com/pikers/pyqtgraph.git@piker_pin#egg=pyqtgraph + + +# our async client for ``marketstore`` (the tsdb) +-e git+https://github.com/pikers/anyio-marketstore.git@master#egg=anyio-marketstore From a10dc4fe777a5e39a6b8dfca183af6e1055c8b6e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 30 Apr 2022 13:24:12 -0400 Subject: [PATCH 052/105] Add `docker` as `tsdb` extras dep --- setup.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/setup.py b/setup.py index fc3ce78a..71b27d48 100755 --- a/setup.py +++ b/setup.py @@ -77,6 +77,14 @@ setup( # tsdbs 'pymarketstore', ], + extras_require={ + + # serialization + 'tsdb': [ + 'docker', + ], + + }, tests_require=['pytest'], python_requires=">=3.9", # literally for ``datetime.datetime.fromisoformat``... keywords=["async", "trading", "finance", "quant", "charting"], From a6c5902437b432ad2f3fb4d3b87b2e43983f4fbe Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 5 May 2022 21:04:10 -0400 Subject: [PATCH 053/105] More reliable `marketstored` + container supervision It turns out (i guess not so shockingly?) that `marketstore` doesn't always teardown "gracefully" under SIGINT (seems to hang if there are open client connections which are also in the midst of teardown?) so this instead first tries the SIGINT and then fails over to a SIGKILL (destroy loop) which seems to be much more reliable to ensure shutdown without any downside - in terms of a "hard kill". Originally i was thinking the issue was root perms related (which get relegated solely to the `marketstored` daemon actor after spawn) but actually it was indeed the signalling / application layer causing the hold-up/latency on teardown. There's a bunch of lingering (now commented) code which tried to solve this non-problem as well as a bunch logging/prints to help decipher the root of the issue - this will all get cleaned out shortly. --- piker/data/_ahab.py | 245 +++++++++++++++++++++++++++++++++----------- 1 file changed, 183 insertions(+), 62 deletions(-) diff --git a/piker/data/_ahab.py b/piker/data/_ahab.py index 52088e91..14e278a7 100644 --- a/piker/data/_ahab.py +++ b/piker/data/_ahab.py @@ -30,7 +30,7 @@ from trio_typing import TaskStatus import tractor import docker import json -from docker.models.containers import Container +from docker.models.containers import Container as DockerContainer from docker.errors import DockerException, APIError from requests.exceptions import ConnectionError, ReadTimeout @@ -133,6 +133,136 @@ async def open_docker( c.kill() +class Container: + ''' + Wrapper around a ``docker.models.containers.Container`` to include + log capture and relay through our native logging system and helper + method(s) for cancellation/teardown. + + ''' + def __init__( + self, + cntr: DockerContainer, + ) -> None: + + self.cntr = cntr + # log msg de-duplication + self.seen_so_far = set() + + async def process_logs_until( + self, + patt: str, + bp_on_msg: bool = False, + ) -> bool: + ''' + Attempt to capture container log messages and relay through our + native logging system. + + ''' + seen_so_far = self.seen_so_far + + while True: + logs = self.cntr.logs() + entries = logs.decode().split('\n') + for entry in entries: + + # ignore null lines + if not entry: + continue + + try: + record = json.loads(entry.strip()) + except json.JSONDecodeError: + if 'Error' in entry: + raise RuntimeError(entry) + raise + + msg = record['msg'] + level = record['level'] + if msg and entry not in seen_so_far: + seen_so_far.add(entry) + if bp_on_msg: + await tractor.breakpoint() + + getattr(log, level, log.error)(f'{msg}') + + if patt in msg: + return True + + # do a checkpoint so we don't block if cancelled B) + await trio.sleep(0.01) + + return False + + def try_signal( + self, + signal: str = 'SIGINT', + + ) -> bool: + try: + # XXX: market store doesn't seem to shutdown nicely all the + # time with this (maybe because there are still open grpc + # connections?) noticably after client connections have been + # made or are in use/teardown. It works just fine if you + # just start and stop the container tho?.. + log.cancel(f'SENDING {signal} to {self.cntr.id}') + self.cntr.kill(signal) + return True + + except docker.errors.APIError as err: + # _err = err + if 'is not running' in err.explanation: + return False + + async def cancel( + self, + ) -> None: + + cid = self.cntr.id + self.try_signal('SIGINT') + + with trio.move_on_after(0.5) as cs: + cs.shield = True + # print('PROCESSINGN LOGS') + await self.process_logs_until('initiating graceful shutdown') + # print('SHUTDOWN REPORTED BY CONTAINER') + await self.process_logs_until('exiting...',) + + for _ in range(10): + with trio.move_on_after(0.5) as cs: + cs.shield = True + # print('waiting on EXITING') + await self.process_logs_until('exiting...',) + # print('got EXITING') + break + + if cs.cancelled_caught: + # get out the big guns, bc apparently marketstore + # doesn't actually know how to terminate gracefully + # :eyeroll:... + self.try_signal('SIGKILL') + + try: + log.info('Waiting on container shutdown: {cid}') + self.cntr.wait( + timeout=0.1, + condition='not-running', + ) + break + + except ( + ReadTimeout, + ConnectionError, + ): + log.error(f'failed to wait on container {cid}') + raise + + else: + raise RuntimeError('Failed to cancel container {cid}') + + log.cancel(f'Container stopped: {cid}') + + @tractor.context async def open_marketstored( ctx: tractor.Context, @@ -175,7 +305,7 @@ async def open_marketstored( type='bind', ) - cntr: Container = client.containers.run( + dcntr: DockerContainer = client.containers.run( 'alpacamarkets/marketstore:latest', # do we need this for cmds? # '-i', @@ -191,77 +321,59 @@ async def open_marketstored( init=True, # remove=True, ) - try: - seen_so_far = set() + cntr = Container(dcntr) - async def process_logs_until( - match: str, - bp_on_msg: bool = False, - ): - logs = cntr.logs(stream=True) - for entry in logs: - entry = entry.decode() + with trio.move_on_after(1): + found = await cntr.process_logs_until( + "launching tcp listener for all services...", + ) - try: - record = json.loads(entry.strip()) - except json.JSONDecodeError: - if 'Error' in entry: - raise RuntimeError(entry) - - msg = record['msg'] - level = record['level'] - if msg and entry not in seen_so_far: - seen_so_far.add(entry) - if bp_on_msg: - await tractor.breakpoint() - getattr(log, level, log.error)(f'{msg}') - - # if "launching tcp listener for all services..." in msg: - if match in msg: - return True - - # do a checkpoint so we don't block if cancelled B) - await trio.sleep(0) - - return False - - with trio.move_on_after(0.5): - found = await process_logs_until( - "launching tcp listener for all services...", + if not found and cntr not in client.containers.list(): + raise RuntimeError( + 'Failed to start `marketstore` check logs deats' ) - if not found and cntr not in client.containers.list(): - raise RuntimeError( - 'Failed to start `marketstore` check logs deats' - ) + await ctx.started((cntr.cntr.id, os.getpid())) - await ctx.started(cntr.id) + # async with ctx.open_stream() as stream: - # block for the expected "teardown log msg".. - await process_logs_until('exiting...',) + try: + + # TODO: we might eventually want a proxy-style msg-prot here + # to allow remote control of containers without needing + # callers to have root perms? + await trio.sleep_forever() + + # await cntr.cancel() + # with trio.CancelScope(shield=True): + # # block for the expected "teardown log msg".. + # # await cntr.process_logs_until('exiting...',) + + # # only msg should be to signal killing the + # # container and this super daemon. + # msg = await stream.receive() + # # print("GOT CANCEL MSG") + + # cid = msg['cancel'] + # log.cancel(f'Cancelling container {cid}') + + # # print("CANCELLING CONTAINER") + # await cntr.cancel() + + # # print("SENDING ACK") + # await stream.send('ack') except ( BaseException, # trio.Cancelled, # KeyboardInterrupt, ): - cntr.kill('SIGINT') - with trio.move_on_after(0.5) as cs: - cs.shield = True - await process_logs_until('exiting...',) - raise - finally: - try: - cntr.wait( - timeout=0.5, - condition='not-running', - ) - except ( - ReadTimeout, - ConnectionError, - ): - cntr.kill() + with trio.CancelScope(shield=True): + await cntr.cancel() + # await stream.send('ack') + + raise async def start_ahab( @@ -311,9 +423,18 @@ async def start_ahab( open_marketstored, ) as (ctx, first): - assert str(first) - # run till cancelled + cid, pid = first + await trio.sleep_forever() + # async with ctx.open_stream() as stream: + # try: + # # run till cancelled + # await trio.sleep_forever() + # finally: + # with trio.CancelScope(shield=True): + # # print('SENDING CANCEL TO MARKETSTORED') + # await stream.send({'cancel': (cid, pid)}) + # assert await stream.receive() == 'ack' # since we demoted root perms in this parent # we'll get a perms error on proc cleanup in From e1a88cb93c10cdd86bcc64725dddcd6d71179ca9 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 12 Apr 2022 11:40:48 -0400 Subject: [PATCH 054/105] Only update y mxmn from L1 when last index in view --- piker/ui/_display.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/piker/ui/_display.py b/piker/ui/_display.py index 82f12196..ea399bff 100644 --- a/piker/ui/_display.py +++ b/piker/ui/_display.py @@ -599,7 +599,7 @@ def graphics_update_cycle( yrange=(mn, mx), ) - vars['last_mx'], vars['last_mn'] = mx, mn + vars['last_mx'], vars['last_mn'] = mx, mn # run synchronous update on all linked flows for curve_name, flow in chart._flows.items(): From 79160619bc499cd30926d93e565601849cba37c4 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 11 Mar 2022 15:16:31 -0500 Subject: [PATCH 055/105] Drop old type annot --- piker/ui/_cursor.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/piker/ui/_cursor.py b/piker/ui/_cursor.py index ad3ddc98..c902fcb8 100644 --- a/piker/ui/_cursor.py +++ b/piker/ui/_cursor.py @@ -295,7 +295,8 @@ class ContentsLabels: class Cursor(pg.GraphicsObject): - '''Multi-plot cursor for use on a ``LinkedSplits`` chart (set). + ''' + Multi-plot cursor for use on a ``LinkedSplits`` chart (set). ''' def __init__( @@ -310,7 +311,7 @@ class Cursor(pg.GraphicsObject): self.linked = linkedsplits self.graphics: dict[str, pg.GraphicsObject] = {} - self.plots: List['PlotChartWidget'] = [] # type: ignore # noqa + self.plots: list['PlotChartWidget'] = [] # type: ignore # noqa self.active_plot = None self.digits: int = digits self._datum_xy: tuple[int, float] = (0, 0) @@ -439,7 +440,10 @@ class Cursor(pg.GraphicsObject): if plot.linked.xaxis_chart is plot: xlabel = self.xaxis_label = XAxisLabel( parent=self.plots[plot_index].getAxis('bottom'), - # parent=self.plots[plot_index].pi_overlay.get_axis(plot.plotItem, 'bottom'), + # parent=self.plots[plot_index].pi_overlay.get_axis( + # plot.plotItem, 'bottom' + # ), + opacity=_ch_label_opac, bg_color=self.label_color, ) From 2a07005c970279d612466e66d290831dd640aad5 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 16 Apr 2022 17:48:02 -0400 Subject: [PATCH 056/105] Add binance history client support with datetime use throughout --- piker/brokers/binance.py | 41 +++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/piker/brokers/binance.py b/piker/brokers/binance.py index d0a00b68..7678c173 100644 --- a/piker/brokers/binance.py +++ b/piker/brokers/binance.py @@ -19,6 +19,7 @@ Binance backend """ from contextlib import asynccontextmanager as acm +from datetime import datetime from typing import ( Any, Union, Optional, AsyncGenerator, Callable, @@ -221,20 +222,22 @@ class Client: async def bars( self, symbol: str, - start_time: int = None, - end_time: int = None, + start_dt: Optional[datetime] = None, + end_dt: Optional[datetime] = None, limit: int = 1000, # <- max allowed per query as_np: bool = True, ) -> dict: - if start_time is None: - start_time = binance_timestamp( - pendulum.now('UTC').start_of('minute').subtract(minutes=limit) - ) + if end_dt is None: + end_dt = pendulum.now('UTC') - if end_time is None: - end_time = binance_timestamp(pendulum.now('UTC')) + if start_dt is None: + start_dt = end_dt.start_of( + 'minute').subtract(minutes=limit) + + start_time = binance_timestamp(start_dt) + end_time = binance_timestamp(end_dt) # https://binance-docs.github.io/apidocs/spot/en/#kline-candlestick-data bars = await self._api( @@ -379,7 +382,27 @@ async def open_history_client( # TODO implement history getter for the new storage layer. async with open_cached_client('binance') as client: - yield client + + async def get_ohlc( + end_dt: Optional[datetime] = None, + start_dt: Optional[datetime] = None, + + ) -> tuple[ + np.ndarray, + datetime, # start + datetime, # end + ]: + + array = await client.bars( + symbol, + start_dt=start_dt, + end_dt=end_dt, + ) + start_dt = pendulum.from_timestamp(array[0]['time']) + end_dt = pendulum.from_timestamp(array[-1]['time']) + return array, start_dt, end_dt + + yield get_ohlc async def backfill_bars( From ab8629aa11477747e6ea676780c841d5952b926c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 16 Apr 2022 18:31:51 -0400 Subject: [PATCH 057/105] Make ib history client expect datetimes for input --- piker/brokers/ib.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/piker/brokers/ib.py b/piker/brokers/ib.py index f66f81c5..b337ea6c 100644 --- a/piker/brokers/ib.py +++ b/piker/brokers/ib.py @@ -295,6 +295,10 @@ class Client: global _enters # log.info(f'REQUESTING BARS {_enters} @ end={end_dt}') print(f'REQUESTING BARS {_enters} @ end={end_dt}') + + if not end_dt: + end_dt = '' + _enters += 1 contract = await self.find_contract(fqsn) @@ -1546,8 +1550,8 @@ async def open_history_client( async with open_client_proxy() as proxy: async def get_hist( - end_dt: str, - start_dt: str = '', + end_dt: Optional[datetime] = None, + start_dt: Optional[datetime] = None, ) -> tuple[np.ndarray, str]: From d4e0d4463f6cafb9e4a540da9372352cf35ca813 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 16 Apr 2022 18:32:36 -0400 Subject: [PATCH 058/105] Always update ohlc (main source chart) on `trigger_all=True` --- piker/ui/_display.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/piker/ui/_display.py b/piker/ui/_display.py index ea399bff..0a0b8fd6 100644 --- a/piker/ui/_display.py +++ b/piker/ui/_display.py @@ -398,9 +398,11 @@ def graphics_update_cycle( ) if ( - (xpx < update_uppx or i_diff > 0) + ( + xpx < update_uppx or i_diff > 0 + and liv + ) or trigger_all - and liv ): # TODO: make it so this doesn't have to be called # once the $vlm is up? @@ -494,6 +496,7 @@ def graphics_update_cycle( if ( xpx < update_uppx or i_diff > 0 + or trigger_all ): chart.update_graphics_from_array( chart.name, From 7d8cf3eaf871e67e53dafb13124a06becc0254ac Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 16 Apr 2022 18:33:26 -0400 Subject: [PATCH 059/105] Factor subscription broadcasting into a func --- piker/data/_sampling.py | 43 +++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/piker/data/_sampling.py b/piker/data/_sampling.py index 4228f809..bdc7b4d0 100644 --- a/piker/data/_sampling.py +++ b/piker/data/_sampling.py @@ -22,7 +22,7 @@ financial data flows. from __future__ import annotations from collections import Counter import time -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Optional import tractor import trio @@ -90,6 +90,7 @@ async def increment_ohlc_buffer( total_s = 0 # total seconds counted lowest = min(sampler.ohlcv_shms.keys()) + lowest_shm = sampler.ohlcv_shms[lowest][0] ad = lowest - 0.001 with trio.CancelScope() as cs: @@ -133,21 +134,33 @@ async def increment_ohlc_buffer( # write to the buffer shm.push(last) - # broadcast the buffer index step to any subscribers for - # a given sample period. - subs = sampler.subscribers.get(delay_s, ()) + await broadcast(delay_s, shm=lowest_shm) - for stream in subs: - try: - await stream.send({'index': shm._last.value}) - except ( - trio.BrokenResourceError, - trio.ClosedResourceError - ): - log.error( - f'{stream._ctx.chan.uid} dropped connection' - ) - subs.remove(stream) + +async def broadcast( + delay_s: int, + shm: Optional[ShmArray] = None, + +) -> None: + # broadcast the buffer index step to any subscribers for + # a given sample period. + subs = sampler.subscribers.get(delay_s, ()) + + if shm is None: + lowest = min(sampler.ohlcv_shms.keys()) + shm = sampler.ohlcv_shms[lowest][0] + + for stream in subs: + try: + await stream.send({'index': shm._last.value}) + except ( + trio.BrokenResourceError, + trio.ClosedResourceError + ): + log.error( + f'{stream._ctx.chan.uid} dropped connection' + ) + subs.remove(stream) @tractor.context From bcf3be1fe4c18e3783f6128ad97ebcf6a88cac10 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 16 Apr 2022 18:34:22 -0400 Subject: [PATCH 060/105] A bit hacky but, broadcast index streams on each history prepend --- piker/data/feed.py | 68 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index e6f4990e..8cb89d81 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -22,6 +22,7 @@ This module is enabled for ``brokerd`` daemons. """ from __future__ import annotations from dataclasses import dataclass, field +from datetime import datetime from contextlib import asynccontextmanager from functools import partial from types import ModuleType @@ -31,6 +32,7 @@ from typing import ( Awaitable, ) +import pendulum import trio from trio.abc import ReceiveChannel from trio_typing import TaskStatus @@ -59,6 +61,7 @@ from ._source import ( from ..ui import _search from ._sampling import ( sampler, + broadcast, increment_ohlc_buffer, iter_ohlc_periods, sample_and_broadcast, @@ -250,6 +253,8 @@ async def manage_history( open_history_client = getattr(mod, 'open_history_client', None) + bfqsn = fqsn.replace('.' + mod.name, '') + if is_up and opened and open_history_client: log.info('Found existing `marketstored`') @@ -281,45 +286,69 @@ async def manage_history( array, start_dt, end_dt, + last_tsdb_dt: Optional[datetime] = None ) -> np.ndarray: - s_diff = (last_tsdb_dt - start_dt).seconds + if last_tsdb_dt: + s_diff = (last_tsdb_dt - start_dt).seconds - # if we detect a partial frame's worth of data - # that is new, slice out only that history and - # write to shm. - if s_diff > 0: - assert last_tsdb_dt > start_dt - selected = array['time'] > last_tsdb_dt.timestamp() - to_push = array[selected] - log.info( - f'Pushing partial frame {to_push.size} to shm' - ) - return to_push + # if we detect a partial frame's worth of data + # that is new, slice out only that history and + # write to shm. + if s_diff > 0: + assert last_tsdb_dt > start_dt + selected = array['time'] > last_tsdb_dt.timestamp() + to_push = array[selected] + log.info( + f'Pushing partial frame {to_push.size} to shm' + ) + return to_push - else: - return array + return array # start history anal and load missing new data via backend. - async with open_history_client(fqsn) as hist: + + broker, symbol, expiry = unpack_fqsn(fqsn) + + async with open_history_client(bfqsn) as hist: # get latest query's worth of history all the way # back to what is recorded in the tsdb - array, start_dt, end_dt = await hist(end_dt='') - to_push = diff_history(array, start_dt, end_dt) + array, start_dt, end_dt = await hist(end_dt=None) + to_push = diff_history( + array, + start_dt, + end_dt, + last_tsdb_dt=last_tsdb_dt, + ) + log.info(f'Pushing {to_push.size} to shm!') shm.push(to_push) + for delay_s in sampler.subscribers: + await broadcast(delay_s) + # let caller unblock and deliver latest history frame task_status.started(shm) some_data_ready.set() # pull new history frames until we hit latest # already in the tsdb - while start_dt > last_tsdb_dt: + # while start_dt > last_tsdb_dt: + while True: array, start_dt, end_dt = await hist(end_dt=start_dt) - to_push = diff_history(array, start_dt, end_dt) + to_push = diff_history( + array, + start_dt, + end_dt, + # last_tsdb_dt=last_tsdb_dt, + # just run indefinitely + last_tsdb_dt=None, + ) + log.info(f'Pushing {to_push.size} to shm!') shm.push(to_push, prepend=True) + for delay_s in sampler.subscribers: + await broadcast(delay_s) # TODO: see if there's faster multi-field reads: # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields @@ -348,7 +377,6 @@ async def manage_history( # do a legacy incremental backfill from the provider. log.info('No existing `marketstored` found..') - bfqsn = fqsn.replace('.' + mod.name, '') # start history backfill task ``backfill_bars()`` is # a required backend func this must block until shm is # filled with first set of ohlc bars From 46c23e90dbd21edd7019d01e31d6daad384589ab Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 17 Apr 2022 15:12:26 -0400 Subject: [PATCH 061/105] Add `Storage.load()` and `.write_ohlcv()` --- piker/data/marketstore.py | 115 ++++++++++++++++++++++++-------------- 1 file changed, 73 insertions(+), 42 deletions(-) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index f82770ab..50804031 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -24,6 +24,7 @@ ''' from contextlib import asynccontextmanager as acm +from datetime import datetime from pprint import pformat from typing import ( Any, @@ -45,6 +46,7 @@ from anyio_marketstore import ( MarketstoreClient, Params, ) +import pendulum import purerpc from .feed import maybe_open_feed @@ -240,8 +242,29 @@ class Storage: async def write_ticks(self, ticks: list) -> None: ... - async def write_ohlcv(self, ohlcv: np.ndarray) -> None: - ... + async def load( + self, + fqsn: str, + + ) -> tuple[ + dict[int, np.ndarray], # timeframe (in secs) to series + Optional[datetime], # first dt + Optional[datetime], # last dt + ]: + + first_tsdb_dt, last_tsdb_dt = None, None + tsdb_arrays = await self.read_ohlcv(fqsn) + log.info(f'Loaded tsdb history {tsdb_arrays}') + + if tsdb_arrays: + fastest = list(tsdb_arrays.values())[0] + times = fastest['Epoch'] + first, last = times[0], times[-1] + first_tsdb_dt, last_tsdb_dt = map( + pendulum.from_timestamp, [first, last] + ) + + return tsdb_arrays, first_tsdb_dt, last_tsdb_dt async def read_ohlcv( self, @@ -319,6 +342,49 @@ class Storage: return await client.destroy(tbk=key) + async def write_ohlcv( + self, + fqsn: str, + ohlcv: np.ndarray, + + ) -> None: + # build mkts schema compat array for writing + mkts_dt = np.dtype(_ohlcv_dt) + mkts_array = np.zeros( + len(ohlcv), + dtype=mkts_dt, + ) + # copy from shm array (yes it's this easy): + # https://numpy.org/doc/stable/user/basics.rec.html#assignment-from-other-structured-arrays + mkts_array[:] = ohlcv[[ + 'time', + 'open', + 'high', + 'low', + 'close', + 'volume', + ]] + + # write to db + resp = await self.client.write( + mkts_array, + tbk=f'{fqsn}/1Sec/OHLCV', + + # NOTE: will will append duplicates + # for the same timestamp-index. + # TODO: pre deduplicate? + isvariablelength=True, + ) + + log.info( + f'Wrote {mkts_array.size} datums to tsdb\n' + ) + + for resp in resp.responses: + err = resp.error + if err: + raise MarketStoreError(err) + @acm async def open_storage_client( @@ -402,6 +468,9 @@ async def tsdb_history_update( to_append = feed.shm.array to_prepend = None + from tractor.trionics import ipython_embed + await ipython_embed() + # hist diffing if tsdb_arrays: onesec = tsdb_arrays[1] @@ -417,47 +486,9 @@ async def tsdb_history_update( log.info( f'Writing datums {array.size} -> to tsdb from shm\n' ) + await storage.write_ohlcv(fqsn, array) - # build mkts schema compat array for writing - mkts_dt = np.dtype(_ohlcv_dt) - mkts_array = np.zeros( - len(array), - dtype=mkts_dt, - ) - # copy from shm array (yes it's this easy): - # https://numpy.org/doc/stable/user/basics.rec.html#assignment-from-other-structured-arrays - mkts_array[:] = array[[ - 'time', - 'open', - 'high', - 'low', - 'close', - 'volume', - ]] - - # write to db - resp = await storage.client.write( - mkts_array, - tbk=f'{fqsn}/1Sec/OHLCV', - - # NOTE: will will append duplicates - # for the same timestamp-index. - # TODO: pre deduplicate? - isvariablelength=True, - ) - - log.info( - f'Wrote {to_append.size} datums to tsdb\n' - ) - profiler('Finished db writes') - - for resp in resp.responses: - err = resp.error - if err: - raise MarketStoreError(err) - - from tractor.trionics import ipython_embed - await ipython_embed() + profiler('Finished db writes') async def ingest_quote_stream( From 727d3cc027547289ef17c7a7bc1191e4c0b7b7ec Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 17 Apr 2022 15:13:07 -0400 Subject: [PATCH 062/105] Unify backfilling logic into common task-routine --- piker/data/feed.py | 235 +++++++++++++++++++++++---------------------- 1 file changed, 122 insertions(+), 113 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index 8cb89d81..0a24f747 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -32,7 +32,6 @@ from typing import ( Awaitable, ) -import pendulum import trio from trio.abc import ReceiveChannel from trio_typing import TaskStatus @@ -194,20 +193,97 @@ async def _setup_persistent_brokerd( await trio.sleep_forever() +def diff_history( + array, + start_dt, + end_dt, + last_tsdb_dt: Optional[datetime] = None + +) -> np.ndarray: + + if last_tsdb_dt: + s_diff = (last_tsdb_dt - start_dt).seconds + + # if we detect a partial frame's worth of data + # that is new, slice out only that history and + # write to shm. + if s_diff > 0: + assert last_tsdb_dt > start_dt + selected = array['time'] > last_tsdb_dt.timestamp() + to_push = array[selected] + log.info( + f'Pushing partial frame {to_push.size} to shm' + ) + return to_push + + return array + + async def start_backfill( mod: ModuleType, - fqsn: str, + bfqsn: str, shm: ShmArray, + last_tsdb_dt: Optional[datetime] = None, + do_legacy: bool = False, + task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED, ) -> int: - return await mod.backfill_bars( - fqsn, - shm, - task_status=task_status, - ) + if do_legacy: + return await mod.backfill_bars( + bfqsn, + shm, + task_status=task_status, + ) + + async with mod.open_history_client(bfqsn) as hist: + + # get latest query's worth of history all the way + # back to what is recorded in the tsdb + array, start_dt, end_dt = await hist(end_dt=None) + + to_push = diff_history( + array, + start_dt, + end_dt, + last_tsdb_dt=last_tsdb_dt, + ) + + log.info(f'Pushing {to_push.size} to shm!') + shm.push(to_push) + + for delay_s in sampler.subscribers: + await broadcast(delay_s) + + # let caller unblock and deliver latest history frame + task_status.started(shm) + + # pull new history frames until we hit latest + # already in the tsdb + # while start_dt > last_tsdb_dt: + while True: + array, start_dt, end_dt = await hist(end_dt=start_dt) + to_push = diff_history( + array, + start_dt, + end_dt, + + # last_tsdb_dt=last_tsdb_dt, + # XXX: hacky, just run indefinitely + last_tsdb_dt=None, + ) + log.info(f'Pushing {to_push.size} to shm!') + + # bail on shm allocation overrun + try: + shm.push(to_push, prepend=True) + except ValueError: + break + + for delay_s in sampler.subscribers: + await broadcast(delay_s) async def manage_history( @@ -251,127 +327,59 @@ async def manage_history( # for now only do backfilling if no tsdb can be found do_legacy_backfill = not is_up and opened - open_history_client = getattr(mod, 'open_history_client', None) - bfqsn = fqsn.replace('.' + mod.name, '') + open_history_client = getattr(mod, 'open_history_client', None) if is_up and opened and open_history_client: log.info('Found existing `marketstored`') from . import marketstore - async with marketstore.open_storage_client( fqsn, ) as storage: - tsdb_arrays = await storage.read_ohlcv(fqsn) + # TODO: this should be used verbatim for the pure + # shm backfiller approach below. - if not tsdb_arrays: - do_legacy_backfill = True + # start history anal and load missing new data via backend. + series, first_dt, last_dt = await storage.load(fqsn) - else: - log.info(f'Loaded tsdb history {tsdb_arrays}') - - fastest = list(tsdb_arrays.values())[0] - times = fastest['Epoch'] - first, last = times[0], times[-1] - first_tsdb_dt, last_tsdb_dt = map( - pendulum.from_timestamp, [first, last] + broker, symbol, expiry = unpack_fqsn(fqsn) + await bus.nursery.start( + partial( + start_backfill, + mod, + bfqsn, + shm, + last_tsdb_dt=last_dt, ) + ) + task_status.started(shm) + some_data_ready.set() - # TODO: this should be used verbatim for the pure - # shm backfiller approach below. + # TODO: see if there's faster multi-field reads: + # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields + # re-index with a `time` and index field + history = list(series.values()) + if history: + fastest = history[0] + shm.push( + fastest[-shm._first.value:], - def diff_history( - array, - start_dt, - end_dt, - last_tsdb_dt: Optional[datetime] = None - - ) -> np.ndarray: - - if last_tsdb_dt: - s_diff = (last_tsdb_dt - start_dt).seconds - - # if we detect a partial frame's worth of data - # that is new, slice out only that history and - # write to shm. - if s_diff > 0: - assert last_tsdb_dt > start_dt - selected = array['time'] > last_tsdb_dt.timestamp() - to_push = array[selected] - log.info( - f'Pushing partial frame {to_push.size} to shm' - ) - return to_push - - return array - - # start history anal and load missing new data via backend. - - broker, symbol, expiry = unpack_fqsn(fqsn) - - async with open_history_client(bfqsn) as hist: - - # get latest query's worth of history all the way - # back to what is recorded in the tsdb - array, start_dt, end_dt = await hist(end_dt=None) - to_push = diff_history( - array, - start_dt, - end_dt, - last_tsdb_dt=last_tsdb_dt, - ) - log.info(f'Pushing {to_push.size} to shm!') - shm.push(to_push) - - for delay_s in sampler.subscribers: - await broadcast(delay_s) - - # let caller unblock and deliver latest history frame - task_status.started(shm) - some_data_ready.set() - - # pull new history frames until we hit latest - # already in the tsdb - # while start_dt > last_tsdb_dt: - while True: - array, start_dt, end_dt = await hist(end_dt=start_dt) - to_push = diff_history( - array, - start_dt, - end_dt, - # last_tsdb_dt=last_tsdb_dt, - # just run indefinitely - last_tsdb_dt=None, - ) - log.info(f'Pushing {to_push.size} to shm!') - shm.push(to_push, prepend=True) - for delay_s in sampler.subscribers: - await broadcast(delay_s) - - # TODO: see if there's faster multi-field reads: - # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields - # re-index with a `time` and index field - shm.push( - fastest[-shm._first.value:], - - # insert the history pre a "days worth" of samples - # to leave some real-time buffer space at the end. - prepend=True, - # start=shm._len - _secs_in_day, - field_map={ - 'Epoch': 'time', - 'Open': 'open', - 'High': 'high', - 'Low': 'low', - 'Close': 'close', - 'Volume': 'volume', - }, - ) - - # TODO: write new data to tsdb to be ready to for next - # read. + # insert the history pre a "days worth" of samples + # to leave some real-time buffer space at the end. + prepend=True, + # start=shm._len - _secs_in_day, + field_map={ + 'Epoch': 'time', + 'Open': 'open', + 'High': 'high', + 'Low': 'low', + 'Close': 'close', + 'Volume': 'volume', + }, + ) + # TODO: write new data to tsdb to be ready to for next read. if do_legacy_backfill: # do a legacy incremental backfill from the provider. @@ -385,6 +393,7 @@ async def manage_history( mod, bfqsn, shm, + do_legacy=True, ) # yield back after client connect with filled shm From d3824c8c0b2744261ccb7d18427390b46f31aee5 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 18 Apr 2022 08:47:25 -0400 Subject: [PATCH 063/105] Start legacy backfill with partial too --- piker/data/feed.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index 0a24f747..78e95fc0 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -389,11 +389,13 @@ async def manage_history( # a required backend func this must block until shm is # filled with first set of ohlc bars await bus.nursery.start( - start_backfill, - mod, - bfqsn, - shm, - do_legacy=True, + partial( + start_backfill, + mod, + bfqsn, + shm, + do_legacy=True, + ) ) # yield back after client connect with filled shm From 3056bc3143bd0cc262a6943d241f815c557659be Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 24 Apr 2022 12:41:35 -0400 Subject: [PATCH 064/105] Don't run legacy backfill when isn't up --- piker/data/feed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index 78e95fc0..ce16f76d 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -394,7 +394,7 @@ async def manage_history( mod, bfqsn, shm, - do_legacy=True, + # do_legacy=True, ) ) From b8b95f10814a1e063a7d8bb60a14796911d1c352 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 23 Apr 2022 17:30:00 -0400 Subject: [PATCH 065/105] Don't open a feed, write or read ohlc in for now --- piker/data/marketstore.py | 64 ++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index 50804031..ef4a9657 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -405,7 +405,7 @@ async def open_storage_client( async def tsdb_history_update( - fqsn: str, + fqsn: Optional[str] = None, ) -> list[str]: @@ -443,50 +443,52 @@ async def tsdb_history_update( async with ( open_storage_client(fqsn) as storage, - maybe_open_feed( - [fqsn], - start_stream=False, + # maybe_open_feed( + # [fqsn], + # start_stream=False, - ) as (feed, stream), + # ) as (feed, stream), ): profiler(f'opened feed for {fqsn}') - symbol = feed.symbols.get(fqsn) - if symbol: - fqsn = symbol.front_fqsn() + + # to_append = feed.shm.array + # to_prepend = None + + if fqsn: + symbol = feed.symbols.get(fqsn) + if symbol: + fqsn = symbol.front_fqsn() + + # diff db history with shm and only write the missing portions + ohlcv = feed.shm.array + + # TODO: use pg profiler + tsdb_arrays = await storage.read_ohlcv(fqsn) + # hist diffing + if tsdb_arrays: + onesec = tsdb_arrays[1] + to_append = ohlcv[ohlcv['time'] > onesec['Epoch'][-1]] + to_prepend = ohlcv[ohlcv['time'] < onesec['Epoch'][0]] + + profiler('Finished db arrays diffs') syms = await storage.client.list_symbols() log.info(f'Existing tsdb symbol set:\n{pformat(syms)}') profiler(f'listed symbols {syms}') - # diff db history with shm and only write the missing portions - ohlcv = feed.shm.array - - # TODO: use pg profiler - tsdb_arrays = await storage.read_ohlcv(fqsn) - - to_append = feed.shm.array - to_prepend = None - from tractor.trionics import ipython_embed await ipython_embed() - # hist diffing - if tsdb_arrays: - onesec = tsdb_arrays[1] - to_append = ohlcv[ohlcv['time'] > onesec['Epoch'][-1]] - to_prepend = ohlcv[ohlcv['time'] < onesec['Epoch'][0]] - profiler('Finished db arrays diffs') + # for array in [to_append, to_prepend]: + # if array is None: + # continue - for array in [to_append, to_prepend]: - if array is None: - continue - - log.info( - f'Writing datums {array.size} -> to tsdb from shm\n' - ) - await storage.write_ohlcv(fqsn, array) + # log.info( + # f'Writing datums {array.size} -> to tsdb from shm\n' + # ) + # await storage.write_ohlcv(fqsn, array) profiler('Finished db writes') From d244af69c931f797d6c9f86ea32703ece1585f91 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 23 Apr 2022 17:30:27 -0400 Subject: [PATCH 066/105] Don't require a symbol to subcmd --- piker/data/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/piker/data/cli.py b/piker/data/cli.py index 554048a4..90992201 100644 --- a/piker/data/cli.py +++ b/piker/data/cli.py @@ -148,7 +148,7 @@ def storesh( enable_modules=['piker.data._ahab'], ): symbol = symbols[0] - await tsdb_history_update(symbol) + await tsdb_history_update() trio.run(main) From 0f683205f4688151483524d79bbf346b5dcfde7c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 24 Apr 2022 11:39:34 -0400 Subject: [PATCH 067/105] Add 16 fetch limit if no tsdb data found --- piker/data/feed.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index ce16f76d..9bca88d8 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -37,6 +37,7 @@ from trio.abc import ReceiveChannel from trio_typing import TaskStatus import tractor from pydantic import BaseModel +import pendulum import numpy as np from ..brokers import get_brokermod @@ -260,10 +261,21 @@ async def start_backfill( # let caller unblock and deliver latest history frame task_status.started(shm) + if last_tsdb_dt is None: + # maybe a better default (they don't seem to define epoch?!) + last_tsdb_dt = pendulum.yesterday() + + # pull new history frames until we hit latest # already in the tsdb - # while start_dt > last_tsdb_dt: - while True: + mx_fills = 16 + count = 0 + while ( + start_dt > last_tsdb_dt + and count > mx_fills + ): + # while True: + count += 1 array, start_dt, end_dt = await hist(end_dt=start_dt) to_push = diff_history( array, From 8bf40ae29936883cbd0db4d3a40932c780a585fd Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 26 Apr 2022 09:28:51 -0400 Subject: [PATCH 068/105] Drop legacy backfilling, load a day's worth of data by default --- piker/data/feed.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index 9bca88d8..e51cfce5 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -226,18 +226,18 @@ async def start_backfill( shm: ShmArray, last_tsdb_dt: Optional[datetime] = None, - do_legacy: bool = False, + # do_legacy: bool = False, task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED, ) -> int: - if do_legacy: - return await mod.backfill_bars( - bfqsn, - shm, - task_status=task_status, - ) + # if do_legacy: + # return await mod.backfill_bars( + # bfqsn, + # shm, + # task_status=task_status, + # ) async with mod.open_history_client(bfqsn) as hist: @@ -263,16 +263,16 @@ async def start_backfill( if last_tsdb_dt is None: # maybe a better default (they don't seem to define epoch?!) - last_tsdb_dt = pendulum.yesterday() + last_tsdb_dt = pendulum.now().subtract(days=1) # pull new history frames until we hit latest - # already in the tsdb + # already in the tsdb or a max count. mx_fills = 16 count = 0 while ( start_dt > last_tsdb_dt - and count > mx_fills + # and count < mx_fills ): # while True: count += 1 @@ -286,6 +286,7 @@ async def start_backfill( # XXX: hacky, just run indefinitely last_tsdb_dt=None, ) + print("fPULLING {count}") log.info(f'Pushing {to_push.size} to shm!') # bail on shm allocation overrun From 2f04a8c9395e2605e7eb701a17e0a3c5dfdd68e5 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 27 Apr 2022 17:13:15 -0400 Subject: [PATCH 069/105] Drop legacy back-filling logic Use the new `open_history_client()` endpoint/API and expect backends to provide a history "getter" routine that can be called to load historical data into shm even when **not** using a tsdb. Add logic for filling in data from the tsdb once the backend has provided data up to the last recorded in the db. Add logic for avoiding overruns of the shm buffer with more-then-necessary queries of tsdb data. --- piker/data/feed.py | 125 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 98 insertions(+), 27 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index e51cfce5..0d0156b6 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -203,21 +203,26 @@ def diff_history( ) -> np.ndarray: if last_tsdb_dt: - s_diff = (last_tsdb_dt - start_dt).seconds + s_diff = (start_dt - last_tsdb_dt).seconds + + to_push = array[:s_diff] # if we detect a partial frame's worth of data # that is new, slice out only that history and # write to shm. - if s_diff > 0: - assert last_tsdb_dt > start_dt - selected = array['time'] > last_tsdb_dt.timestamp() - to_push = array[selected] + if abs(s_diff) < len(array): log.info( f'Pushing partial frame {to_push.size} to shm' ) - return to_push + # assert last_tsdb_dt > start_dt + # selected = array['time'] > last_tsdb_dt.timestamp() + # to_push = array[selected] + # return to_push - return array + return to_push + + else: + return array async def start_backfill( @@ -226,19 +231,11 @@ async def start_backfill( shm: ShmArray, last_tsdb_dt: Optional[datetime] = None, - # do_legacy: bool = False, task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED, ) -> int: - # if do_legacy: - # return await mod.backfill_bars( - # bfqsn, - # shm, - # task_status=task_status, - # ) - async with mod.open_history_client(bfqsn) as hist: # get latest query's worth of history all the way @@ -258,23 +255,23 @@ async def start_backfill( for delay_s in sampler.subscribers: await broadcast(delay_s) + bf_done = trio.Event() # let caller unblock and deliver latest history frame - task_status.started(shm) + task_status.started((shm, start_dt, end_dt, bf_done)) if last_tsdb_dt is None: # maybe a better default (they don't seem to define epoch?!) last_tsdb_dt = pendulum.now().subtract(days=1) - # pull new history frames until we hit latest # already in the tsdb or a max count. - mx_fills = 16 + # mx_fills = 16 count = 0 + # while True: while ( - start_dt > last_tsdb_dt + end_dt > last_tsdb_dt # and count < mx_fills ): - # while True: count += 1 array, start_dt, end_dt = await hist(end_dt=start_dt) to_push = diff_history( @@ -282,22 +279,31 @@ async def start_backfill( start_dt, end_dt, - # last_tsdb_dt=last_tsdb_dt, + last_tsdb_dt=last_tsdb_dt, # XXX: hacky, just run indefinitely - last_tsdb_dt=None, + # last_tsdb_dt=None, ) - print("fPULLING {count}") + print(f"PULLING {count}") log.info(f'Pushing {to_push.size} to shm!') + if to_push.size < 1: + break + # bail on shm allocation overrun try: shm.push(to_push, prepend=True) except ValueError: + await tractor.breakpoint() break for delay_s in sampler.subscribers: await broadcast(delay_s) + bf_done.set() + # update start index to include all tsdb history + # that was pushed in the caller parent task. + # shm._first.value = 0 + async def manage_history( mod: ModuleType, @@ -358,7 +364,12 @@ async def manage_history( series, first_dt, last_dt = await storage.load(fqsn) broker, symbol, expiry = unpack_fqsn(fqsn) - await bus.nursery.start( + ( + shm, + latest_start_dt, + latest_end_dt, + bf_done, + ) = await bus.nursery.start( partial( start_backfill, mod, @@ -370,19 +381,37 @@ async def manage_history( task_status.started(shm) some_data_ready.set() + await bf_done.wait() + # do diff against last start frame of history and only fill + # in from the tsdb an allotment that allows for most recent + # to be loaded into mem *before* tsdb data. + if last_dt: + dt_diff_s = (latest_start_dt - last_dt).seconds + else: + dt_diff_s = 0 + + # await trio.sleep_forever() # TODO: see if there's faster multi-field reads: # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields # re-index with a `time` and index field + prepend_start = shm._first.value + + # sanity check on most-recent-data loading + assert prepend_start > dt_diff_s + history = list(series.values()) if history: fastest = history[0] + to_push = fastest[:prepend_start] + shm.push( - fastest[-shm._first.value:], + to_push, # insert the history pre a "days worth" of samples # to leave some real-time buffer space at the end. prepend=True, - # start=shm._len - _secs_in_day, + # update_first=False, + # start=prepend_start, field_map={ 'Epoch': 'time', 'Open': 'open', @@ -392,6 +421,49 @@ async def manage_history( 'Volume': 'volume', }, ) + + # load as much from storage into shm as spacec will + # allow according to user's shm size settings. + count = 0 + end = fastest['Epoch'][0] + + while shm._first.value > 0: + count += 1 + series = await storage.read_ohlcv( + fqsn, + end=end, + ) + history = list(series.values()) + fastest = history[0] + end = fastest['Epoch'][0] + prepend_start -= len(to_push) + to_push = fastest[:prepend_start] + + shm.push( + to_push, + + # insert the history pre a "days worth" of samples + # to leave some real-time buffer space at the end. + prepend=True, + # update_first=False, + # start=prepend_start, + field_map={ + 'Epoch': 'time', + 'Open': 'open', + 'High': 'high', + 'Low': 'low', + 'Close': 'close', + 'Volume': 'volume', + }, + ) + for delay_s in sampler.subscribers: + await broadcast(delay_s) + + if count > 6: + break + + log.info(f'Loaded {to_push.shape} datums from storage') + # TODO: write new data to tsdb to be ready to for next read. if do_legacy_backfill: @@ -407,7 +479,6 @@ async def manage_history( mod, bfqsn, shm, - # do_legacy=True, ) ) From 0061fabb561f726f3bb562ed7c00f22f8a653115 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 27 Apr 2022 17:17:04 -0400 Subject: [PATCH 070/105] More tolerance for "stream-ended-early" conditions in quote throttler --- piker/data/_sampling.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/piker/data/_sampling.py b/piker/data/_sampling.py index bdc7b4d0..b5a75a67 100644 --- a/piker/data/_sampling.py +++ b/piker/data/_sampling.py @@ -380,7 +380,12 @@ async def uniform_rate_send( if left_to_sleep > 0: with trio.move_on_after(left_to_sleep) as cs: - sym, last_quote = await quote_stream.receive() + try: + sym, last_quote = await quote_stream.receive() + except trio.EndOfChannel: + log.exception(f"feed for {stream} ended?") + break + diff = time.time() - last_send if not first_quote: From 423af373891f46253f85fe128b1adcbb5855d473 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 27 Apr 2022 17:17:40 -0400 Subject: [PATCH 071/105] Truncate trade rate wma window sizes --- piker/fsp/_momo.py | 1 + piker/fsp/_volume.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/piker/fsp/_momo.py b/piker/fsp/_momo.py index 01e41c04..488ae22c 100644 --- a/piker/fsp/_momo.py +++ b/piker/fsp/_momo.py @@ -167,6 +167,7 @@ def _wma( assert length == len(weights) + # lol, for long sequences this is nutso slow and expensive.. return np.convolve(signal, weights, 'valid') diff --git a/piker/fsp/_volume.py b/piker/fsp/_volume.py index 47211234..b5456fac 100644 --- a/piker/fsp/_volume.py +++ b/piker/fsp/_volume.py @@ -309,7 +309,7 @@ async def flow_rates( if period > 1: trade_rate_wma = _wma( - dvlm_shm.array['trade_count'], + dvlm_shm.array['trade_count'][-period:], period, weights=weights, ) @@ -332,7 +332,7 @@ async def flow_rates( if period > 1: dark_trade_rate_wma = _wma( - dvlm_shm.array['dark_trade_count'], + dvlm_shm.array['dark_trade_count'][-period:], period, weights=weights, ) From c6724939980e3a59c88e2740e78ce4143b71925a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 29 Apr 2022 08:12:29 -0400 Subject: [PATCH 072/105] Add , indicates hist size to decrement to storage logic --- piker/brokers/_util.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/piker/brokers/_util.py b/piker/brokers/_util.py index 6fcf11f7..acd75d6c 100644 --- a/piker/brokers/_util.py +++ b/piker/brokers/_util.py @@ -33,7 +33,22 @@ class SymbolNotFound(BrokerError): class NoData(BrokerError): - "Symbol data not permitted" + ''' + Symbol data not permitted or no data + for time range found. + + ''' + def __init__( + self, + *args, + frame_size: int = 1000, + + ) -> None: + super().__init__(*args) + + # when raised, machinery can check if the backend + # set a "frame size" for doing datetime calcs. + self.frame_size: int = 1000 def resproc( @@ -50,12 +65,12 @@ def resproc( if not resp.status_code == 200: raise BrokerError(resp.body) try: - json = resp.json() + msg = resp.json() except json.decoder.JSONDecodeError: log.exception(f"Failed to process {resp}:\n{resp.text}") raise BrokerError(resp.text) if log_resp: - log.debug(f"Received json contents:\n{colorize_json(json)}") + log.debug(f"Received json contents:\n{colorize_json(msg)}") - return json if return_json else resp + return msg if return_json else resp From a3db5d1bdc801fd8a367ecaa4db6103f5cb0b385 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 29 Apr 2022 10:05:52 -0400 Subject: [PATCH 073/105] Relay frame size in `NoData` due to null-result history --- piker/brokers/ib.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/piker/brokers/ib.py b/piker/brokers/ib.py index b337ea6c..ad752cce 100644 --- a/piker/brokers/ib.py +++ b/piker/brokers/ib.py @@ -1482,7 +1482,9 @@ async def get_bars( if 'No market data permissions for' in msg: # TODO: signalling for no permissions searches - raise NoData(f'Symbol: {fqsn}') + raise NoData( + f'Symbol: {fqsn}', + ) break elif ( @@ -1562,7 +1564,10 @@ async def open_history_client( if out == (None, None): # could be trying to retreive bars over weekend log.error(f"Can't grab bars starting at {end_dt}!?!?") - raise NoData(f'{end_dt}') + raise NoData( + f'{end_dt}', + frame_size=2000, + ) bars, bars_array, first_dt, last_dt = out From 6ba3c15c4e230bf1bd3467592e61442198329876 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 29 Apr 2022 11:25:22 -0400 Subject: [PATCH 074/105] Add to signal broker won't deliver more data --- piker/brokers/_util.py | 19 +++++++++++++++++++ piker/data/feed.py | 18 +++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/piker/brokers/_util.py b/piker/brokers/_util.py index acd75d6c..d1b2aac5 100644 --- a/piker/brokers/_util.py +++ b/piker/brokers/_util.py @@ -51,6 +51,25 @@ class NoData(BrokerError): self.frame_size: int = 1000 +class DataUnavailable(BrokerError): + ''' + Signal storage requests to terminate. + + ''' + # TODO: add in a reason that can be displayed in the + # UI (for eg. `kraken` is bs and you should complain + # to them that you can't pull more OHLC data..) + + +class DataThrottle(BrokerError): + ''' + Broker throttled request rate for data. + + ''' + # TODO: add in throttle metrics/feedback + + + def resproc( resp: asks.response_objects.Response, log: logging.Logger, diff --git a/piker/data/feed.py b/piker/data/feed.py index 0d0156b6..b00cf70e 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -67,6 +67,10 @@ from ._sampling import ( sample_and_broadcast, uniform_rate_send, ) +from ..brokers._util import ( + NoData, + DataUnavailable, +) log = get_logger(__name__) @@ -273,7 +277,19 @@ async def start_backfill( # and count < mx_fills ): count += 1 - array, start_dt, end_dt = await hist(end_dt=start_dt) + try: + array, start_dt, end_dt = await hist(end_dt=start_dt) + + except NoData: + # decrement by the diff in time last delivered. + end_dt = start_dt.subtract(seconds=(end_dt - start_dt).seconds) + continue + + except DataUnavailable: + # broker is being a bish and we can't pull + # any more.. + break + to_push = diff_history( array, start_dt, From 49509d55d2d870b0eab1af81ad8b78e3f64d8a07 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 29 Apr 2022 11:26:15 -0400 Subject: [PATCH 075/105] Implement `open_history_client()` correctly for `kraken` --- piker/brokers/kraken.py | 81 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 72 insertions(+), 9 deletions(-) diff --git a/piker/brokers/kraken.py b/piker/brokers/kraken.py index 44e4e6b0..0f5e2f2a 100644 --- a/piker/brokers/kraken.py +++ b/piker/brokers/kraken.py @@ -20,7 +20,8 @@ Kraken backend. ''' from contextlib import asynccontextmanager as acm from dataclasses import asdict, field -from typing import Any, Optional, AsyncIterator, Callable +from datetime import datetime +from typing import Any, Optional, AsyncIterator, Callable, Union import time from trio_typing import TaskStatus @@ -40,7 +41,13 @@ import base64 from .. import config from .._cacheables import open_cached_client -from ._util import resproc, SymbolNotFound, BrokerError +from ._util import ( + resproc, + SymbolNotFound, + BrokerError, + DataThrottle, + DataUnavailable, +) from ..log import get_logger, get_console_log from ..data import ShmArray from ..data._web_bs import open_autorecon_ws, NoBsWs @@ -305,7 +312,7 @@ class Client: action: str, size: float, reqid: str = None, - validate: bool = False # set True test call without a real submission + validate: bool = False # set True test call without a real submission ) -> dict: ''' Place an order and return integer request id provided by client. @@ -391,17 +398,26 @@ class Client: async def bars( self, symbol: str = 'XBTUSD', + # UTC 2017-07-02 12:53:20 - since: int = None, + since: Optional[Union[int, datetime]] = None, count: int = 720, # <- max allowed per query as_np: bool = True, + ) -> dict: + if since is None: since = pendulum.now('UTC').start_of('minute').subtract( minutes=count).timestamp() + elif isinstance(since, int): + since = pendulum.from_timestamp(since).timestamp() + + else: # presumably a pendulum datetime + since = since.timestamp() + # UTC 2017-07-02 12:53:20 is oldest seconds value - since = str(max(1499000000, since)) + since = str(max(1499000000, int(since))) json = await self._public( 'OHLC', data={ @@ -445,7 +461,16 @@ class Client: array = np.array(new_bars, dtype=_ohlc_dtype) if as_np else bars return array except KeyError: - raise SymbolNotFound(json['error'][0] + f': {symbol}') + errmsg = json['error'][0] + + if 'not found' in errmsg: + raise SymbolNotFound(errmsg + f': {symbol}') + + elif 'Too many requests' in errmsg: + raise DataThrottle(f'{symbol}') + + else: + raise BrokerError(errmsg) @acm @@ -668,8 +693,8 @@ async def handle_order_requests( oid=msg.oid, reqid=msg.reqid, symbol=msg.symbol, - # TODO: maybe figure out if pending cancels will - # eventually get cancelled + # TODO: maybe figure out if pending + # cancels will eventually get cancelled reason="Order cancel is still pending?", broker_details=resp ).dict() @@ -1003,7 +1028,45 @@ async def open_history_client( # TODO implement history getter for the new storage layer. async with open_cached_client('kraken') as client: - yield client + + # lol, kraken won't send any more then the "last" + # 720 1m bars.. so we have to just ignore further + # requests of this type.. + queries: int = 0 + + async def get_ohlc( + end_dt: Optional[datetime] = None, + start_dt: Optional[datetime] = None, + + ) -> tuple[ + np.ndarray, + datetime, # start + datetime, # end + ]: + + nonlocal queries + if queries > 0: + raise DataUnavailable + + count = 0 + while count <= 3: + try: + array = await client.bars( + symbol, + since=end_dt, + ) + count += 1 + queries += 1 + break + except DataThrottle: + log.warning(f'kraken OHLC throttle for {symbol}') + await trio.sleep(1) + + start_dt = pendulum.from_timestamp(array[0]['time']) + end_dt = pendulum.from_timestamp(array[-1]['time']) + return array, start_dt, end_dt + + yield get_ohlc async def backfill_bars( From d77cfa3587ee47be5aed415feb7412696663fa21 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 29 Apr 2022 11:26:49 -0400 Subject: [PATCH 076/105] Add back fqsn passthrough and feed opening --- piker/data/cli.py | 2 +- piker/data/marketstore.py | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/piker/data/cli.py b/piker/data/cli.py index 90992201..554048a4 100644 --- a/piker/data/cli.py +++ b/piker/data/cli.py @@ -148,7 +148,7 @@ def storesh( enable_modules=['piker.data._ahab'], ): symbol = symbols[0] - await tsdb_history_update() + await tsdb_history_update(symbol) trio.run(main) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index ef4a9657..95fd80ee 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -270,6 +270,7 @@ class Storage: self, fqsn: str, timeframe: Optional[Union[int, str]] = None, + end: Optional[int] = None, ) -> tuple[ MarketstoreClient, @@ -287,6 +288,7 @@ class Storage: symbols=fqsn, timeframe=tfstr, attrgroup='OHLCV', + end=end, # limit_from_start=True, # TODO: figure the max limit here given the @@ -346,6 +348,7 @@ class Storage: self, fqsn: str, ohlcv: np.ndarray, + append_and_duplicate: bool = True, ) -> None: # build mkts schema compat array for writing @@ -373,7 +376,7 @@ class Storage: # NOTE: will will append duplicates # for the same timestamp-index. # TODO: pre deduplicate? - isvariablelength=True, + isvariablelength=append_and_duplicate, ) log.info( @@ -443,17 +446,17 @@ async def tsdb_history_update( async with ( open_storage_client(fqsn) as storage, - # maybe_open_feed( - # [fqsn], - # start_stream=False, + maybe_open_feed( + [fqsn], + start_stream=False, - # ) as (feed, stream), + ) as (feed, stream), ): profiler(f'opened feed for {fqsn}') - # to_append = feed.shm.array - # to_prepend = None + to_append = feed.shm.array + to_prepend = None if fqsn: symbol = feed.symbols.get(fqsn) @@ -477,10 +480,11 @@ async def tsdb_history_update( log.info(f'Existing tsdb symbol set:\n{pformat(syms)}') profiler(f'listed symbols {syms}') + # TODO: ask if user wants to write history for detected + # available shm buffers? from tractor.trionics import ipython_embed await ipython_embed() - # for array in [to_append, to_prepend]: # if array is None: # continue @@ -490,7 +494,7 @@ async def tsdb_history_update( # ) # await storage.write_ohlcv(fqsn, array) - profiler('Finished db writes') + # profiler('Finished db writes') async def ingest_quote_stream( From 7b1c0939bda468fdc5e4533eda79683fa92ac793 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 1 May 2022 12:40:51 -0400 Subject: [PATCH 077/105] Add first-draft `trimeter` based concurrent ohlc history fetching --- piker/data/feed.py | 131 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 111 insertions(+), 20 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index b00cf70e..88bf8810 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -35,6 +35,7 @@ from typing import ( import trio from trio.abc import ReceiveChannel from trio_typing import TaskStatus +import trimeter import tractor from pydantic import BaseModel import pendulum @@ -263,32 +264,66 @@ async def start_backfill( # let caller unblock and deliver latest history frame task_status.started((shm, start_dt, end_dt, bf_done)) + times = array['time'] + step_size_s = ( + pendulum.from_timestamp(times[-1]) - + pendulum.from_timestamp(times[-2]) + ).seconds + frame_step_s = (end_dt - start_dt).seconds + if last_tsdb_dt is None: # maybe a better default (they don't seem to define epoch?!) - last_tsdb_dt = pendulum.now().subtract(days=1) + + # based on the sample step size load a certain amount + # history + if step_size_s == 1: + last_tsdb_dt = pendulum.now().subtract(weeks=2) + + elif step_size_s == 60: + last_tsdb_dt = pendulum.now().subtract(years=2) + + else: + raise ValueError( + '`piker` only needs to support 1m and 1s sampling ' + 'but ur api is trying to deliver a longer ' + f'timeframe of {step_size_s} ' 'seconds.. so ye, dun ' + 'do dat bruh.' + ) + + hist_period = pendulum.period( + start_dt.subtract(seconds=step_size_s), + last_tsdb_dt, + ) + end_dts = list(hist_period.range('seconds', frame_step_s)) # pull new history frames until we hit latest # already in the tsdb or a max count. - # mx_fills = 16 count = 0 - # while True: - while ( - end_dt > last_tsdb_dt - # and count < mx_fills - ): + frames = {} + + async def get_ohlc_frame( + input_end_dt: datetime, + + ) -> np.ndarray: + + nonlocal count count += 1 try: - array, start_dt, end_dt = await hist(end_dt=start_dt) + array, start_dt, end_dt = await hist(end_dt=input_end_dt) + # if input_end_dt.timestamp() == end_dts[0].timestamp(): + # await tractor.breakpoint() except NoData: # decrement by the diff in time last delivered. end_dt = start_dt.subtract(seconds=(end_dt - start_dt).seconds) - continue + log.warning('no data for range {(end_dt - start_dt)} ?!?') + # continue except DataUnavailable: # broker is being a bish and we can't pull # any more.. - break + log.warning('backend halted on data deliver !?!?') + # break to_push = diff_history( array, @@ -302,18 +337,74 @@ async def start_backfill( print(f"PULLING {count}") log.info(f'Pushing {to_push.size} to shm!') - if to_push.size < 1: - break + frames[input_end_dt.timestamp()] = (to_push, start_dt, end_dt) - # bail on shm allocation overrun - try: - shm.push(to_push, prepend=True) - except ValueError: - await tractor.breakpoint() - break + return to_push, start_dt, end_dt - for delay_s in sampler.subscribers: - await broadcast(delay_s) + # if to_push.size < 1: + # print('UHHH SIZE <1 BREAKING!?') + # break + + rate = erlangs = 5 + async with trimeter.amap( + + get_ohlc_frame, + end_dts, + + capture_outcome=True, + include_value=True, + max_at_once=erlangs, + max_per_second=rate, + + ) as outcomes: + + # Then iterate over the return values, as they become available + # (i.e., not necessarily in the original order) + async for input_end_dt, outcome in outcomes: + try: + out = outcome.unwrap() + except Exception: + log.exception('uhh trimeter bail') + raise + else: + to_push, start_dt, end_dt = out + + # pipeline-style pull frames until we need to wait for + # the next in order to arrive. + i = end_dts.index(input_end_dt) + print(f'latest end_dt {end_dt} found at index {i}') + + for epoch in reversed(sorted(frames)): + start = shm.array['time'][0] + + # we don't yet have the next frame to push + # so break back to the async request loop. + diff = epoch - start + if abs(diff) > step_size_s: + if len(frames) > 20: + log.warning( + f'there appears to be a history gap of {diff}?' + ) + # from pprint import pprint + # await tractor.breakpoint() + else: + break + + to_push, start_dt, end_dt = frames.pop(epoch) + print(f'pushing frame ending at {end_dt}') + + if not len(to_push): + break + + # bail on shm allocation overrun + try: + shm.push(to_push, prepend=True) + except ValueError: + await tractor.breakpoint() + break + + for delay_s in sampler.subscribers: + await broadcast(delay_s) bf_done.set() # update start index to include all tsdb history From fcb85873de28fa1b7a4b360e2f011671da707291 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 1 May 2022 19:12:48 -0400 Subject: [PATCH 078/105] Terminate early on data unavailable errors --- piker/data/feed.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index 88bf8810..eb57e71b 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -323,7 +323,7 @@ async def start_backfill( # broker is being a bish and we can't pull # any more.. log.warning('backend halted on data deliver !?!?') - # break + return input_end_dt, None to_push = diff_history( array, @@ -361,6 +361,11 @@ async def start_backfill( # Then iterate over the return values, as they become available # (i.e., not necessarily in the original order) async for input_end_dt, outcome in outcomes: + + # no data available case.. + if outcome is None: + break + try: out = outcome.unwrap() except Exception: From 7e951f17cadbc4c308d38c91b8ef871d33be04cc Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 3 May 2022 11:22:47 -0400 Subject: [PATCH 079/105] Support large ohlcv writes via slicing, add struct array keymap --- piker/data/marketstore.py | 72 ++++++++++++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index 95fd80ee..5d930c3f 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -108,6 +108,16 @@ _ohlcv_dt = [ ] +ohlc_key_map = bidict({ + 'Epoch': 'time', + 'Open': 'open', + 'High': 'high', + 'Low': 'low', + 'Close': 'close', + 'Volume': 'volume', +}) + + def mk_tbk(keys: tuple[str, str, str]) -> str: ''' Generate a marketstore table key from a tuple. @@ -201,6 +211,7 @@ class MarketStoreError(Exception): # raise MarketStoreError(err) +# map of seconds ints to "time frame" accepted keys tf_in_1s = bidict({ 1: '1Sec', 60: '1Min', @@ -349,6 +360,7 @@ class Storage: fqsn: str, ohlcv: np.ndarray, append_and_duplicate: bool = True, + limit: int = int(800e3), ) -> None: # build mkts schema compat array for writing @@ -368,25 +380,53 @@ class Storage: 'volume', ]] - # write to db - resp = await self.client.write( - mkts_array, - tbk=f'{fqsn}/1Sec/OHLCV', + m, r = divmod(len(mkts_array), limit) - # NOTE: will will append duplicates - # for the same timestamp-index. - # TODO: pre deduplicate? - isvariablelength=append_and_duplicate, - ) + for i in range(m, 1): + to_push = mkts_array[i-1:i*limit] - log.info( - f'Wrote {mkts_array.size} datums to tsdb\n' - ) + # write to db + resp = await self.client.write( + to_push, + tbk=f'{fqsn}/1Sec/OHLCV', - for resp in resp.responses: - err = resp.error - if err: - raise MarketStoreError(err) + # NOTE: will will append duplicates + # for the same timestamp-index. + # TODO: pre deduplicate? + isvariablelength=append_and_duplicate, + ) + + log.info( + f'Wrote {mkts_array.size} datums to tsdb\n' + ) + + for resp in resp.responses: + err = resp.error + if err: + raise MarketStoreError(err) + + if r: + to_push = mkts_array[m*limit:] + + # write to db + resp = await self.client.write( + to_push, + tbk=f'{fqsn}/1Sec/OHLCV', + + # NOTE: will will append duplicates + # for the same timestamp-index. + # TODO: pre deduplicate? + isvariablelength=append_and_duplicate, + ) + + log.info( + f'Wrote {mkts_array.size} datums to tsdb\n' + ) + + for resp in resp.responses: + err = resp.error + if err: + raise MarketStoreError(err) @acm From b44786e5b7263bc906a450ae38021e89f0fab1bf Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 3 May 2022 13:19:49 -0400 Subject: [PATCH 080/105] Support async-batched ohlc queries in all backends Expect each backend to deliver a `config: dict[str, Any]` which provides concurrency controls to `trimeter`'s batch task scheduler such that backends can define their own concurrency limits. The dirty deats in this patch include handling history "gaps" where a query returns a history-frame-result which spans more then the typical frame size (in seconds). In such cases we reset the target frame index (datetime index sequence implemented with a `pendulum.Period`) using a generator protocol `.send()` such that the sequence can be dynamically re-indexed starting at the new (possibly) pre-gap datetime. The new gap logic also allows us to detect out of order frames easier and thus wait for the next-in-order to arrive before making more requests. --- piker/brokers/binance.py | 2 +- piker/brokers/ib.py | 35 +++-- piker/brokers/kraken.py | 2 +- piker/data/feed.py | 271 +++++++++++++++++++++++++++++---------- 4 files changed, 232 insertions(+), 78 deletions(-) diff --git a/piker/brokers/binance.py b/piker/brokers/binance.py index 7678c173..5b6a3da6 100644 --- a/piker/brokers/binance.py +++ b/piker/brokers/binance.py @@ -402,7 +402,7 @@ async def open_history_client( end_dt = pendulum.from_timestamp(array[-1]['time']) return array, start_dt, end_dt - yield get_ohlc + yield get_ohlc, {'erlangs': 4, 'rate': 4} async def backfill_bars( diff --git a/piker/brokers/ib.py b/piker/brokers/ib.py index ad752cce..20865e30 100644 --- a/piker/brokers/ib.py +++ b/piker/brokers/ib.py @@ -57,6 +57,8 @@ from ib_insync.wrapper import Wrapper from ib_insync.client import Client as ib_Client from fuzzywuzzy import process as fuzzy import numpy as np +import pendulum + from .. import config from ..log import get_logger, get_console_log @@ -1442,8 +1444,6 @@ async def get_bars( a ``MethoProxy``. ''' - import pendulum - fails = 0 bars: Optional[list] = None first_dt: datetime = None @@ -1471,7 +1471,9 @@ async def get_bars( time = bars_array['time'] assert time[-1] == last_dt.timestamp() assert time[0] == first_dt.timestamp() - log.info(f'bars retreived for dts {first_dt}:{last_dt}') + log.info( + f'{len(bars)} bars retreived for {first_dt} -> {last_dt}' + ) return (bars, bars_array, first_dt, last_dt), fails @@ -1485,20 +1487,27 @@ async def get_bars( raise NoData( f'Symbol: {fqsn}', ) - break elif ( err.code == 162 and 'HMDS query returned no data' in err.message ): - # try to decrement start point and look further back - end_dt = last_dt = last_dt.subtract(seconds=2000) + # XXX: this is now done in the storage mgmt layer + # and we shouldn't implicitly decrement the frame dt + # index since the upper layer may be doing so + # concurrently and we don't want to be delivering frames + # that weren't asked for. log.warning( - f'No data found ending @ {end_dt}\n' - f'Starting another request for {end_dt}' + f'NO DATA found ending @ {end_dt}\n' ) - continue + # try to decrement start point and look further back + # end_dt = last_dt = last_dt.subtract(seconds=2000) + + raise NoData( + f'Symbol: {fqsn}', + frame_size=2000, + ) elif _pacing in msg: @@ -1578,7 +1587,12 @@ async def open_history_client( return bars_array, first_dt, last_dt - yield get_hist + # TODO: it seems like we can do async queries for ohlc + # but getting the order right still isn't working and I'm not + # quite sure why.. needs some tinkering and probably + # a lookthrough of the ``ib_insync`` machinery, for eg. maybe + # we have to do the batch queries on the `asyncio` side? + yield get_hist, {'erlangs': 1, 'rate': 6} async def backfill_bars( @@ -1840,6 +1854,7 @@ async def stream_quotes( symbol=sym, ) first_quote = normalize(first_ticker) + # print(f'first quote: {first_quote}') def mk_init_msgs() -> dict[str, dict]: # pass back some symbol info like min_tick, trading_hours, etc. diff --git a/piker/brokers/kraken.py b/piker/brokers/kraken.py index 0f5e2f2a..30e57b9e 100644 --- a/piker/brokers/kraken.py +++ b/piker/brokers/kraken.py @@ -1066,7 +1066,7 @@ async def open_history_client( end_dt = pendulum.from_timestamp(array[-1]['time']) return array, start_dt, end_dt - yield get_ohlc + yield get_ohlc, {'erlangs': 1, 'rate': 1} async def backfill_bars( diff --git a/piker/data/feed.py b/piker/data/feed.py index eb57e71b..272add05 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -29,6 +29,7 @@ from types import ModuleType from typing import ( Any, AsyncIterator, Optional, + Generator, Awaitable, ) @@ -241,7 +242,7 @@ async def start_backfill( ) -> int: - async with mod.open_history_client(bfqsn) as hist: + async with mod.open_history_client(bfqsn) as (hist, config): # get latest query's worth of history all the way # back to what is recorded in the tsdb @@ -260,7 +261,9 @@ async def start_backfill( for delay_s in sampler.subscribers: await broadcast(delay_s) + # signal that backfilling to tsdb's end datum is complete bf_done = trio.Event() + # let caller unblock and deliver latest history frame task_status.started((shm, start_dt, end_dt, bf_done)) @@ -269,7 +272,7 @@ async def start_backfill( pendulum.from_timestamp(times[-1]) - pendulum.from_timestamp(times[-2]) ).seconds - frame_step_s = (end_dt - start_dt).seconds + frame_size_s = len(to_push) * step_size_s if last_tsdb_dt is None: # maybe a better default (they don't seem to define epoch?!) @@ -277,7 +280,7 @@ async def start_backfill( # based on the sample step size load a certain amount # history if step_size_s == 1: - last_tsdb_dt = pendulum.now().subtract(weeks=2) + last_tsdb_dt = pendulum.now().subtract(days=6) elif step_size_s == 60: last_tsdb_dt = pendulum.now().subtract(years=2) @@ -290,69 +293,159 @@ async def start_backfill( 'do dat bruh.' ) - hist_period = pendulum.period( - start_dt.subtract(seconds=step_size_s), - last_tsdb_dt, - ) - end_dts = list(hist_period.range('seconds', frame_step_s)) + # configure async query throttling + erlangs = config.get('erlangs', 1) + rate = config.get('rate', 1) + frames = {} + + def iter_dts(start: datetime): + while True: + + hist_period = pendulum.period( + start.subtract(seconds=step_size_s), + last_tsdb_dt, + ) + dtrange = hist_period.range('seconds', frame_size_s) + + for end_dt in dtrange: + log.warning(f'Yielding next frame start {end_dt}') + start = yield end_dt + + # if caller sends a new start date, reset to that + if start is not None: + log.warning(f'Resetting date range: {start}') + # import pdbpp + # pdbpp.set_trace() + break + else: + # from while + return # pull new history frames until we hit latest # already in the tsdb or a max count. count = 0 - frames = {} + + # NOTE: when gaps are detected in the retreived history (by + # comparisor of the end - start versus the expected "frame size" + # in seconds) we need a way to alert the async request code not + # to continue to query for data "within the gap". This var is + # set in such cases such that further requests in that period + # are discarded and further we reset the "datetimem query frame + # index" in such cases to avoid needless noop requests. + earliest_end_dt: Optional[datetime] = start_dt async def get_ohlc_frame( input_end_dt: datetime, + iter_dts_gen: Generator[datetime], ) -> np.ndarray: - nonlocal count + nonlocal count, frames, earliest_end_dt, frame_size_s count += 1 + + if input_end_dt > earliest_end_dt: + # if a request comes in for an inter-gap frame we + # discard it since likely this request is still + # lingering from before the reset of ``iter_dts()`` via + # ``.send()`` below. + log.info(f'Discarding request history ending @ {input_end_dt}') + + # signals to ``trimeter`` loop to discard and + # ``continue`` in it's schedule loop. + return None + try: + log.info( + f'Requesting {step_size_s}s frame ending in {input_end_dt}' + ) array, start_dt, end_dt = await hist(end_dt=input_end_dt) - # if input_end_dt.timestamp() == end_dts[0].timestamp(): - # await tractor.breakpoint() + assert array['time'][0] == start_dt.timestamp() except NoData: - # decrement by the diff in time last delivered. - end_dt = start_dt.subtract(seconds=(end_dt - start_dt).seconds) - log.warning('no data for range {(end_dt - start_dt)} ?!?') - # continue + log.warning( + f'NO DATA for {frame_size_s}s frame @ {end_dt} ?!?' + ) + return None # discard signal - except DataUnavailable: + except DataUnavailable as duerr: # broker is being a bish and we can't pull # any more.. log.warning('backend halted on data deliver !?!?') - return input_end_dt, None + + # ugh, what's a better way? + # TODO: fwiw, we probably want a way to signal a throttle + # condition (eg. with ib) so that we can halt the + # request loop until the condition is resolved? + return duerr + + diff = end_dt - start_dt + frame_time_diff_s = diff.seconds + expected_frame_size_s = frame_size_s + step_size_s + + if frame_time_diff_s > expected_frame_size_s: + + # XXX: query result includes a start point prior to our + # expected "frame size" and thus is likely some kind of + # history gap (eg. market closed period, outage, etc.) + # so indicate to the request loop that this gap is + # expected by both, + # - resetting the ``iter_dts()`` generator to start at + # the new start point delivered in this result + # - setting the non-locally scoped ``earliest_end_dt`` + # to this new value so that the request loop doesn't + # get tripped up thinking there's an out of order + # request-result condition. + + log.warning( + f'History frame ending @ {end_dt} appears to have a gap:\n' + f'{diff} ~= {frame_time_diff_s} seconds' + ) + + # reset dtrange gen to new start point + next_end = iter_dts_gen.send(start_dt) + log.info( + f'Reset frame index to start at {start_dt}\n' + f'Was at {next_end}' + ) + + # TODO: can we avoid this? + earliest_end_dt = start_dt to_push = diff_history( array, start_dt, end_dt, - last_tsdb_dt=last_tsdb_dt, - # XXX: hacky, just run indefinitely - # last_tsdb_dt=None, ) - print(f"PULLING {count}") - log.info(f'Pushing {to_push.size} to shm!') + ln = len(to_push) + if ln: + log.info(f'{ln} bars for {start_dt} -> {end_dt}') + frames[input_end_dt.timestamp()] = (to_push, start_dt, end_dt) + return to_push, start_dt, end_dt - frames[input_end_dt.timestamp()] = (to_push, start_dt, end_dt) + else: + log.warning( + f'{ln} BARS TO PUSH after diff?!: {start_dt} -> {end_dt}' + ) + return None - return to_push, start_dt, end_dt + # initial dt index starts at the start of the first query result + idts = iter_dts(start_dt) - # if to_push.size < 1: - # print('UHHH SIZE <1 BREAKING!?') - # break - - rate = erlangs = 5 async with trimeter.amap( - - get_ohlc_frame, - end_dts, + partial( + get_ohlc_frame, + # we close in the ``iter_dt()`` gen in so we can send + # reset signals as needed for gap dection in the + # history. + iter_dts_gen=idts, + ), + idts, capture_outcome=True, include_value=True, + + # better technical names bruv... max_at_once=erlangs, max_per_second=rate, @@ -362,59 +455,101 @@ async def start_backfill( # (i.e., not necessarily in the original order) async for input_end_dt, outcome in outcomes: - # no data available case.. - if outcome is None: - break - try: out = outcome.unwrap() + + if out is None: + # skip signal + continue + + elif isinstance(out, DataUnavailable): + # no data available case signal.. so just kill + # further requests and basically just stop + # trying... + break + except Exception: log.exception('uhh trimeter bail') raise else: to_push, start_dt, end_dt = out + if not len(to_push): + # diff returned no new data (i.e. we probablyl hit + # the ``last_tsdb_dt`` point). + # TODO: raise instead? + log.warning(f'No history for range {start_dt} -> {end_dt}') + continue + # pipeline-style pull frames until we need to wait for # the next in order to arrive. - i = end_dts.index(input_end_dt) - print(f'latest end_dt {end_dt} found at index {i}') + # i = end_dts.index(input_end_dt) + # print(f'latest end_dt {end_dt} found at index {i}') - for epoch in reversed(sorted(frames)): + epochs = list(reversed(sorted(frames))) + for epoch in epochs: start = shm.array['time'][0] - # we don't yet have the next frame to push - # so break back to the async request loop. diff = epoch - start if abs(diff) > step_size_s: - if len(frames) > 20: + + if earliest_end_dt < end_dt: + # XXX: an expected gap was encountered (see + # logic in ``get_ohlc_frame()``, so allow + # this frame through to the storage layer. log.warning( - f'there appears to be a history gap of {diff}?' + f'there is an expected history gap of {diff}s:' + ) + + elif ( + erlangs > 1 + and len(epochs) < erlangs + ): + # we don't yet have the next frame to push + # so break back to the async request loop + # while we wait for more async frame-results + # to arrive. + expect_end = pendulum.from_timestamp(start) + expect_start = expect_end.subtract( + seconds=frame_size_s) + log.warning( + 'waiting on out-of-order history frame:\n' + f'{expect_end - expect_start}' ) - # from pprint import pprint - # await tractor.breakpoint() - else: break to_push, start_dt, end_dt = frames.pop(epoch) - print(f'pushing frame ending at {end_dt}') - if not len(to_push): - break - - # bail on shm allocation overrun + # bail gracefully on shm allocation overrun/full condition try: shm.push(to_push, prepend=True) except ValueError: - await tractor.breakpoint() + log.info( + f'Shm buffer overrun on: {start_dt} -> {end_dt}?' + ) + # await tractor.breakpoint() break - for delay_s in sampler.subscribers: - await broadcast(delay_s) + log.info( + f'Shm pushed {len(to_push)} frame:\n' + f'{start_dt} -> {end_dt}' + ) + + # keep track of most recent "prepended" ``start_dt`` + # both for detecting gaps and ensuring async + # frame-result order. + earliest_end_dt = start_dt + + # TODO: can we only trigger this if the respective + # history in "in view"?!? + # XXX: extremely important, there can be no checkpoints + # in the block above to avoid entering new ``frames`` + # values while we're pipelining the current ones to + # memory... + for delay_s in sampler.subscribers: + await broadcast(delay_s) bf_done.set() - # update start index to include all tsdb history - # that was pushed in the caller parent task. - # shm._first.value = 0 async def manage_history( @@ -490,6 +625,17 @@ async def manage_history( last_tsdb_dt=last_dt, ) ) + + # if len(shm.array) < 2: + # TODO: there's an edge case here to solve where if the last + # frame before market close (at least on ib) was pushed and + # there was only "1 new" row pushed from the first backfill + # query-iteration, then the sample step sizing calcs will + # break upstream from here since you can't diff on at least + # 2 steps... probably should also add logic to compute from + # the tsdb series and stash that somewhere as meta data on + # the shm buffer?.. no se. + task_status.started(shm) some_data_ready.set() @@ -524,14 +670,7 @@ async def manage_history( prepend=True, # update_first=False, # start=prepend_start, - field_map={ - 'Epoch': 'time', - 'Open': 'open', - 'High': 'high', - 'Low': 'low', - 'Close': 'close', - 'Volume': 'volume', - }, + field_map=marketstore.ohlc_key_map, ) # load as much from storage into shm as spacec will From 9b5f052597705e9d29ebae903805f19f769d294b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 3 May 2022 13:52:23 -0400 Subject: [PATCH 081/105] Handle no sampler subs case on history broadcasts When the market isn't open the feed layer won't create a subscriber entry in the sampler broadcast loop and so if a manual call to ``broadcast()`` is made (like when trying to update a chart from a history prepend) we need to handle that case and just broadcast a random `-1` for now..BD --- piker/data/_sampling.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/piker/data/_sampling.py b/piker/data/_sampling.py index b5a75a67..8bc677cf 100644 --- a/piker/data/_sampling.py +++ b/piker/data/_sampling.py @@ -146,13 +146,21 @@ async def broadcast( # a given sample period. subs = sampler.subscribers.get(delay_s, ()) + last = -1 + if shm is None: - lowest = min(sampler.ohlcv_shms.keys()) - shm = sampler.ohlcv_shms[lowest][0] + periods = sampler.ohlcv_shms.keys() + # if this is an update triggered by a history update there + # might not actually be any sampling bus setup since there's + # no "live feed" active yet. + if periods: + lowest = min(periods) + shm = sampler.ohlcv_shms[lowest][0] + last = shm._last.value for stream in subs: try: - await stream.send({'index': shm._last.value}) + await stream.send({'index': last}) except ( trio.BrokenResourceError, trio.ClosedResourceError From 969530ba196317e6017d8c25d37f9e8585c8fc7c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 3 May 2022 16:01:02 -0400 Subject: [PATCH 082/105] Fix slice logic for less-then-frame tsdb overlap When the tsdb has a last datum that is in the past less then a "frame's worth" of sample steps we need to slice out only the data from the latest frame that doesn't overlap; this fixes that slice logic.. Previously i dunno wth it was doing.. --- piker/data/feed.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index 272add05..8c88f04b 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -208,27 +208,24 @@ def diff_history( ) -> np.ndarray: + to_push = array + if last_tsdb_dt: s_diff = (start_dt - last_tsdb_dt).seconds - to_push = array[:s_diff] - # if we detect a partial frame's worth of data # that is new, slice out only that history and # write to shm. - if abs(s_diff) < len(array): + if ( + s_diff < 0 + and abs(s_diff) < len(array) + ): log.info( f'Pushing partial frame {to_push.size} to shm' ) - # assert last_tsdb_dt > start_dt - # selected = array['time'] > last_tsdb_dt.timestamp() - # to_push = array[selected] - # return to_push + to_push = array[abs(s_diff):] - return to_push - - else: - return array + return to_push async def start_backfill( @@ -248,6 +245,17 @@ async def start_backfill( # back to what is recorded in the tsdb array, start_dt, end_dt = await hist(end_dt=None) + times = array['time'] + + # sample period step size in seconds + step_size_s = ( + pendulum.from_timestamp(times[-1]) - + pendulum.from_timestamp(times[-2]) + ).seconds + + # "frame"'s worth of sample period steps in seconds + frame_size_s = len(array) * step_size_s + to_push = diff_history( array, start_dt, @@ -267,13 +275,6 @@ async def start_backfill( # let caller unblock and deliver latest history frame task_status.started((shm, start_dt, end_dt, bf_done)) - times = array['time'] - step_size_s = ( - pendulum.from_timestamp(times[-1]) - - pendulum.from_timestamp(times[-2]) - ).seconds - frame_size_s = len(to_push) * step_size_s - if last_tsdb_dt is None: # maybe a better default (they don't seem to define epoch?!) From 40753ae93cc17a928fc982cf09139233eb33dcc7 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 3 May 2022 16:22:01 -0400 Subject: [PATCH 083/105] Always write newly pulled frames to tsdb --- piker/data/feed.py | 44 ++++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index 8c88f04b..a95b87b4 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -31,6 +31,7 @@ from typing import ( AsyncIterator, Optional, Generator, Awaitable, + TYPE_CHECKING, ) import trio @@ -74,6 +75,8 @@ from ..brokers._util import ( DataUnavailable, ) +if TYPE_CHECKING: + from .marketstore import Storage log = get_logger(__name__) @@ -234,6 +237,7 @@ async def start_backfill( shm: ShmArray, last_tsdb_dt: Optional[datetime] = None, + storage: Optional[Storage] = None, task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED, @@ -520,6 +524,7 @@ async def start_backfill( break to_push, start_dt, end_dt = frames.pop(epoch) + ln = len(to_push) # bail gracefully on shm allocation overrun/full condition try: @@ -528,19 +533,27 @@ async def start_backfill( log.info( f'Shm buffer overrun on: {start_dt} -> {end_dt}?' ) - # await tractor.breakpoint() break log.info( - f'Shm pushed {len(to_push)} frame:\n' + f'Shm pushed {ln} frame:\n' f'{start_dt} -> {end_dt}' ) - # keep track of most recent "prepended" ``start_dt`` # both for detecting gaps and ensuring async # frame-result order. earliest_end_dt = start_dt + if storage is not None: + log.info( + f'Writing {ln} frame to storage:\n' + f'{start_dt} -> {end_dt}' + ) + await storage.write_ohlcv( + f'{bfqsn}.{mod.name}', # lul.. + to_push, + ) + # TODO: can we only trigger this if the respective # history in "in view"?!? # XXX: extremely important, there can be no checkpoints @@ -609,7 +622,7 @@ async def manage_history( # shm backfiller approach below. # start history anal and load missing new data via backend. - series, first_dt, last_dt = await storage.load(fqsn) + series, _, last_tsdb_dt = await storage.load(fqsn) broker, symbol, expiry = unpack_fqsn(fqsn) ( @@ -623,7 +636,8 @@ async def manage_history( mod, bfqsn, shm, - last_tsdb_dt=last_dt, + last_tsdb_dt=last_tsdb_dt, + storage=storage, ) ) @@ -644,8 +658,10 @@ async def manage_history( # do diff against last start frame of history and only fill # in from the tsdb an allotment that allows for most recent # to be loaded into mem *before* tsdb data. - if last_dt: - dt_diff_s = (latest_start_dt - last_dt).seconds + if last_tsdb_dt: + dt_diff_s = ( + latest_start_dt - last_tsdb_dt + ).seconds else: dt_diff_s = 0 @@ -674,7 +690,7 @@ async def manage_history( field_map=marketstore.ohlc_key_map, ) - # load as much from storage into shm as spacec will + # load as much from storage into shm as space will # allow according to user's shm size settings. count = 0 end = fastest['Epoch'][0] @@ -699,15 +715,11 @@ async def manage_history( prepend=True, # update_first=False, # start=prepend_start, - field_map={ - 'Epoch': 'time', - 'Open': 'open', - 'High': 'high', - 'Low': 'low', - 'Close': 'close', - 'Volume': 'volume', - }, + field_map=marketstore.ohlc_key_map, ) + + # manually trigger step update to update charts/fsps + # which need an incremental update. for delay_s in sampler.subscribers: await broadcast(delay_s) From e4a900168d19f3446f64fdd77b1bb61bb877a638 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 3 May 2022 16:30:10 -0400 Subject: [PATCH 084/105] Add timeframe key to seconds map --- piker/data/_source.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/piker/data/_source.py b/piker/data/_source.py index b7f0ecd7..2f5f61ed 100644 --- a/piker/data/_source.py +++ b/piker/data/_source.py @@ -21,6 +21,7 @@ from __future__ import annotations from typing import Any import decimal +from bidict import bidict import numpy as np from pydantic import BaseModel # from numba import from_dtype @@ -47,16 +48,16 @@ base_ohlc_dtype = np.dtype(ohlc_fields) # https://github.com/numba/numba/issues/4511 # numba_ohlc_dtype = from_dtype(base_ohlc_dtype) -# map time frame "keys" to minutes values -tf_in_1m = { - '1m': 1, - '5m': 5, - '15m': 15, - '30m': 30, - '1h': 60, - '4h': 240, - '1d': 1440, -} +# map time frame "keys" to seconds values +tf_in_1s = bidict({ + 1: '1s', + 60: '1m', + 60*5: '5m', + 60*15: '15m', + 60*30: '30m', + 60*60: '1h', + 60*60*24: '1d', +}) def mk_fqsn( From 4a6f01747c3ac1ae5edce77741a7fdd924e89160 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 3 May 2022 17:27:38 -0400 Subject: [PATCH 085/105] Label "humanized" sample period in window title-bar" --- piker/ui/_display.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/piker/ui/_display.py b/piker/ui/_display.py index 0a0b8fd6..927ce5df 100644 --- a/piker/ui/_display.py +++ b/piker/ui/_display.py @@ -29,6 +29,7 @@ from typing import Optional, Any, Callable import numpy as np import tractor import trio +import pendulum import pyqtgraph as pg from .. import brokers @@ -47,6 +48,7 @@ from ._fsp import ( open_vlm_displays, ) from ..data._sharedmem import ShmArray +from ..data._source import tf_in_1s from ._forms import ( FieldsForm, mk_order_pane_layout, @@ -664,11 +666,17 @@ async def display_symbol_data( symbol = feed.symbols[sym] fqsn = symbol.front_fqsn() + times = bars['time'] + end = pendulum.from_timestamp(times[-1]) + start = pendulum.from_timestamp(times[times != times[-1]][-1]) + step_size_s = (end - start).seconds + tf_key = tf_in_1s[step_size_s] + # load in symbol's ohlc data godwidget.window.setWindowTitle( f'{fqsn} ' f'tick:{symbol.tick_size} ' - f'step:1s ' + f'step:{tf_key} ' ) linked = godwidget.linkedsplits From 61e9db32297aeadb0301c0b2ccbd0c408ba64462 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 3 May 2022 19:07:32 -0400 Subject: [PATCH 086/105] Handle ``iter_dts()`` already exhausted edge case --- piker/data/feed.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index a95b87b4..b877ed65 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -407,14 +407,27 @@ async def start_backfill( ) # reset dtrange gen to new start point - next_end = iter_dts_gen.send(start_dt) - log.info( - f'Reset frame index to start at {start_dt}\n' - f'Was at {next_end}' - ) + try: + next_end = iter_dts_gen.send(start_dt) + log.info( + f'Reset frame index to start at {start_dt}\n' + f'Was at {next_end}' + ) + + # NOTE: manually set "earliest end datetime" index-value + # to avoid the request loop getting confused about + # new frames that are earlier in history - i.e. this + # **is not** the case of out-of-order frames from + # an async batch request. + earliest_end_dt = start_dt + + except StopIteration: + # gen already terminated meaning we probably already + # exhausted it via frame requests. + log.info( + f"Datetime index already exhausted, can't reset.." + ) - # TODO: can we avoid this? - earliest_end_dt = start_dt to_push = diff_history( array, From 0324404b032c1961759702691ace9688f07895bd Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 5 May 2022 17:09:17 -0400 Subject: [PATCH 087/105] Include epoch timestamp in quote label for now --- piker/ui/_cursor.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/piker/ui/_cursor.py b/piker/ui/_cursor.py index c902fcb8..a34c15c1 100644 --- a/piker/ui/_cursor.py +++ b/piker/ui/_cursor.py @@ -191,6 +191,9 @@ class ContentsLabel(pg.LabelItem): self.setText( "i:{index}
" + # NB: these fields must be indexed in the correct order via + # the slice syntax below. + "epoch:{}
" "O:{}
" "H:{}
" "L:{}
" @@ -198,7 +201,15 @@ class ContentsLabel(pg.LabelItem): "V:{}
" "wap:{}".format( *array[index - first][ - ['open', 'high', 'low', 'close', 'volume', 'bar_wap'] + [ + 'time', + 'open', + 'high', + 'low', + 'close', + 'volume', + 'bar_wap', + ] ], name=name, index=index, From c9a621fc2a360a1e17776f1c2111614c5043f25e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 6 May 2022 13:09:30 -0400 Subject: [PATCH 088/105] Fix less-then-frame off by one slice, add db write toggle and disable --- piker/data/feed.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index b877ed65..9f56cfd5 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -223,10 +223,13 @@ def diff_history( s_diff < 0 and abs(s_diff) < len(array) ): + # the + 1 is because ``last_tsdb_dt`` is pulled from + # the last row entry for the ``'time'`` field retreived + # from the tsdb. + to_push = array[abs(s_diff)+1:] log.info( f'Pushing partial frame {to_push.size} to shm' ) - to_push = array[abs(s_diff):] return to_push @@ -238,6 +241,7 @@ async def start_backfill( last_tsdb_dt: Optional[datetime] = None, storage: Optional[Storage] = None, + write_tsdb: bool = False, task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED, @@ -285,7 +289,7 @@ async def start_backfill( # based on the sample step size load a certain amount # history if step_size_s == 1: - last_tsdb_dt = pendulum.now().subtract(days=6) + last_tsdb_dt = pendulum.now().subtract(days=2) elif step_size_s == 60: last_tsdb_dt = pendulum.now().subtract(years=2) @@ -368,7 +372,7 @@ async def start_backfill( except NoData: log.warning( - f'NO DATA for {frame_size_s}s frame @ {end_dt} ?!?' + f'NO DATA for {frame_size_s}s frame @ {input_end_dt} ?!?' ) return None # discard signal @@ -425,10 +429,9 @@ async def start_backfill( # gen already terminated meaning we probably already # exhausted it via frame requests. log.info( - f"Datetime index already exhausted, can't reset.." + "Datetime index already exhausted, can't reset.." ) - to_push = diff_history( array, start_dt, @@ -557,7 +560,10 @@ async def start_backfill( # frame-result order. earliest_end_dt = start_dt - if storage is not None: + if ( + storage is not None + and write_tsdb + ): log.info( f'Writing {ln} frame to storage:\n' f'{start_dt} -> {end_dt}' From 1676bceee1c7ca6fae7f67d1f525b26c01776644 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 7 May 2022 15:24:26 -0400 Subject: [PATCH 089/105] Don't offset the start index by a step --- piker/data/feed.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index 9f56cfd5..3564e154 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -308,10 +308,11 @@ async def start_backfill( frames = {} def iter_dts(start: datetime): + while True: hist_period = pendulum.period( - start.subtract(seconds=step_size_s), + start, last_tsdb_dt, ) dtrange = hist_period.range('seconds', frame_size_s) @@ -323,8 +324,6 @@ async def start_backfill( # if caller sends a new start date, reset to that if start is not None: log.warning(f'Resetting date range: {start}') - # import pdbpp - # pdbpp.set_trace() break else: # from while From fb9b6990ae47847cce60a5df94453dfd0d5e3e09 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 8 May 2022 15:31:36 -0400 Subject: [PATCH 090/105] Drop unneeded/commented cancel-by-msg code; roots perms wasn't the problem --- piker/data/_ahab.py | 49 +-------------------------------------------- 1 file changed, 1 insertion(+), 48 deletions(-) diff --git a/piker/data/_ahab.py b/piker/data/_ahab.py index 14e278a7..08da0fff 100644 --- a/piker/data/_ahab.py +++ b/piker/data/_ahab.py @@ -128,7 +128,6 @@ async def open_docker( finally: if client: client.close() - # client.api._custom_adapter.close() for c in client.containers.list(): c.kill() @@ -210,7 +209,6 @@ class Container: return True except docker.errors.APIError as err: - # _err = err if 'is not running' in err.explanation: return False @@ -223,17 +221,13 @@ class Container: with trio.move_on_after(0.5) as cs: cs.shield = True - # print('PROCESSINGN LOGS') await self.process_logs_until('initiating graceful shutdown') - # print('SHUTDOWN REPORTED BY CONTAINER') await self.process_logs_until('exiting...',) for _ in range(10): with trio.move_on_after(0.5) as cs: cs.shield = True - # print('waiting on EXITING') await self.process_logs_until('exiting...',) - # print('got EXITING') break if cs.cancelled_caught: @@ -280,7 +274,7 @@ async def open_marketstored( 5993:5993 alpacamarkets/marketstore:latest ''' - log = get_console_log('info', name=__name__) + get_console_log('info', name=__name__) async with open_docker() as client: @@ -335,8 +329,6 @@ async def open_marketstored( await ctx.started((cntr.cntr.id, os.getpid())) - # async with ctx.open_stream() as stream: - try: # TODO: we might eventually want a proxy-style msg-prot here @@ -344,25 +336,6 @@ async def open_marketstored( # callers to have root perms? await trio.sleep_forever() - # await cntr.cancel() - # with trio.CancelScope(shield=True): - # # block for the expected "teardown log msg".. - # # await cntr.process_logs_until('exiting...',) - - # # only msg should be to signal killing the - # # container and this super daemon. - # msg = await stream.receive() - # # print("GOT CANCEL MSG") - - # cid = msg['cancel'] - # log.cancel(f'Cancelling container {cid}') - - # # print("CANCELLING CONTAINER") - # await cntr.cancel() - - # # print("SENDING ACK") - # await stream.send('ack') - except ( BaseException, # trio.Cancelled, @@ -371,7 +344,6 @@ async def open_marketstored( with trio.CancelScope(shield=True): await cntr.cancel() - # await stream.send('ack') raise @@ -426,15 +398,6 @@ async def start_ahab( cid, pid = first await trio.sleep_forever() - # async with ctx.open_stream() as stream: - # try: - # # run till cancelled - # await trio.sleep_forever() - # finally: - # with trio.CancelScope(shield=True): - # # print('SENDING CANCEL TO MARKETSTORED') - # await stream.send({'cancel': (cid, pid)}) - # assert await stream.receive() == 'ack' # since we demoted root perms in this parent # we'll get a perms error on proc cleanup in @@ -445,7 +408,6 @@ async def start_ahab( # TODO: we could also consider adding # a ``tractor.ZombieDetected`` or something that we could raise # if we find the child didn't terminate. - # await tractor.breakpoint() except PermissionError: log.warning('Failed to cancel root permsed container') @@ -458,12 +420,3 @@ async def start_ahab( return else: raise - - -async def main(): - await start_ahab() - await trio.sleep_forever() - - -if __name__ == '__main__': - trio.run(main) From 8e08fb7b237a711a30fd084252a0beca4915008a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 8 May 2022 15:39:25 -0400 Subject: [PATCH 091/105] Add comment about un-reffed vars meant for use in shell --- piker/data/marketstore.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index 5d930c3f..92d860da 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -494,7 +494,6 @@ async def tsdb_history_update( ): profiler(f'opened feed for {fqsn}') - to_append = feed.shm.array to_prepend = None @@ -511,6 +510,9 @@ async def tsdb_history_update( # hist diffing if tsdb_arrays: onesec = tsdb_arrays[1] + + # these aren't currently used but can be referenced from + # within the embedded ipython shell below. to_append = ohlcv[ohlcv['time'] > onesec['Epoch'][-1]] to_prepend = ohlcv[ohlcv['time'] < onesec['Epoch'][0]] From 30ddf63ec0a38a9dd411bff52fbfc78a4bf84439 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 8 May 2022 19:04:24 -0400 Subject: [PATCH 092/105] Handle gaps greater then a frame within a frame --- piker/data/feed.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index 3564e154..1843b302 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -221,12 +221,19 @@ def diff_history( # write to shm. if ( s_diff < 0 - and abs(s_diff) < len(array) ): - # the + 1 is because ``last_tsdb_dt`` is pulled from - # the last row entry for the ``'time'`` field retreived - # from the tsdb. - to_push = array[abs(s_diff)+1:] + if abs(s_diff) < len(array): + # the + 1 is because ``last_tsdb_dt`` is pulled from + # the last row entry for the ``'time'`` field retreived + # from the tsdb. + to_push = array[abs(s_diff)+1:] + + else: + # pass back only the portion of the array that is + # greater then the last time stamp in the tsdb. + time = array['time'] + to_push = array[time >= last_tsdb_dt.timestamp()] + log.info( f'Pushing partial frame {to_push.size} to shm' ) From 4b6ecbfc79b51a5a8e747be687d2e7bd700ee0d6 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 10 May 2022 10:46:48 -0400 Subject: [PATCH 093/105] Bring binance requests down to 3/sec; seems faster? --- piker/brokers/binance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/piker/brokers/binance.py b/piker/brokers/binance.py index 5b6a3da6..bac81bb0 100644 --- a/piker/brokers/binance.py +++ b/piker/brokers/binance.py @@ -402,7 +402,7 @@ async def open_history_client( end_dt = pendulum.from_timestamp(array[-1]['time']) return array, start_dt, end_dt - yield get_ohlc, {'erlangs': 4, 'rate': 4} + yield get_ohlc, {'erlangs': 3, 'rate': 3} async def backfill_bars( From 26fddae3c0c17444cf084e273c638850cf4c5d76 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 10 May 2022 10:47:41 -0400 Subject: [PATCH 094/105] Fix earliest frame-end not-yet-pushed check Bleh/:facepalm:, the ``end_dt`` in scope is not the "earliest" frame's `end_dt` in the async response queue.. Parse the queue's latest epoch and use **that** to compare to the last last pushed datetime index.. Add more detailed logging to help debug any (un)expected datetime index gaps. --- piker/data/feed.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index 1843b302..e4f4844a 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -25,6 +25,7 @@ from dataclasses import dataclass, field from datetime import datetime from contextlib import asynccontextmanager from functools import partial +from pprint import pformat from types import ModuleType from typing import ( Any, @@ -322,7 +323,8 @@ async def start_backfill( start, last_tsdb_dt, ) - dtrange = hist_period.range('seconds', frame_size_s) + dtrange = list(hist_period.range('seconds', frame_size_s)) + log.debug(f'New datetime index:\n{pformat(dtrange)}') for end_dt in dtrange: log.warning(f'Yielding next frame start {end_dt}') @@ -395,7 +397,7 @@ async def start_backfill( diff = end_dt - start_dt frame_time_diff_s = diff.seconds - expected_frame_size_s = frame_size_s + step_size_s + expected_frame_size_s = frame_size_s # + step_size_s if frame_time_diff_s > expected_frame_size_s: @@ -515,27 +517,40 @@ async def start_backfill( epochs = list(reversed(sorted(frames))) for epoch in epochs: + start = shm.array['time'][0] + last_shm_prepend_dt = pendulum.from_timestamp(start) + earliest_frame_queue_dt = pendulum.from_timestamp(epoch) diff = epoch - start if abs(diff) > step_size_s: - if earliest_end_dt < end_dt: + if earliest_end_dt < earliest_frame_queue_dt: # XXX: an expected gap was encountered (see # logic in ``get_ohlc_frame()``, so allow # this frame through to the storage layer. log.warning( - f'there is an expected history gap of {diff}s:' + f'Expected history gap of {diff}s:\n' + f'{earliest_frame_queue_dt} <- ' + f'{earliest_end_dt}' ) elif ( erlangs > 1 - and len(epochs) < erlangs ): # we don't yet have the next frame to push # so break back to the async request loop # while we wait for more async frame-results # to arrive. + if len(frames) >= erlangs: + log.warning( + 'Frame count in async-queue is greater ' + 'then erlangs?\n' + 'There seems to be a gap between:\n' + f'{earliest_frame_queue_dt} <- ' + f'{last_shm_prepend_dt}' + ) + expect_end = pendulum.from_timestamp(start) expect_start = expect_end.subtract( seconds=frame_size_s) From 277ca290181a6f62ab46d67618b45a2a022cbcc9 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 10 May 2022 11:52:17 -0400 Subject: [PATCH 095/105] Always write missing history frames to tsdb (again) --- piker/data/feed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index e4f4844a..e4a998d5 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -249,7 +249,7 @@ async def start_backfill( last_tsdb_dt: Optional[datetime] = None, storage: Optional[Storage] = None, - write_tsdb: bool = False, + write_tsdb: bool = True, task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED, From 9ddfae44d245df9ac81931b71ab62fae4f1e226a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 10 May 2022 13:32:56 -0400 Subject: [PATCH 096/105] Parametrize and deliver (relevant) mkts config in `start_ahab()` --- piker/cli/__init__.py | 31 ++++++++++++++++++++++++++----- piker/data/_ahab.py | 34 ++++++++++++++++++++++++---------- 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/piker/cli/__init__.py b/piker/cli/__init__.py index 721b2123..7d58ff4d 100644 --- a/piker/cli/__init__.py +++ b/piker/cli/__init__.py @@ -1,7 +1,25 @@ -""" +# piker: trading gear for hackers +# Copyright (C) 2018-present Tyler Goodlet (in stewardship of pikers) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' CLI commons. -""" + +''' import os +from pprint import pformat import click import trio @@ -59,12 +77,15 @@ def pikerd(loglevel, host, tl, pdb, tsdb): from piker.data._ahab import start_ahab log.info('Spawning `marketstore` supervisor') - ctn_ready = await n.start( + ctn_ready, config, (cid, pid) = await n.start( start_ahab, 'marketstored', ) - await ctn_ready.wait() - log.info('`marketstore` container:{uid} up') + log.info( + f'`marketstored` up pid:{pid}\n' + f'container up cid:{cid} live with config:\n' + f'{pformat(config)}' + ) await trio.sleep_forever() diff --git a/piker/data/_ahab.py b/piker/data/_ahab.py index 08da0fff..87f7ef59 100644 --- a/piker/data/_ahab.py +++ b/piker/data/_ahab.py @@ -1,5 +1,5 @@ # piker: trading gear for hackers -# Copyright (C) 2018-present Tyler Goodlet (in stewardship of piker0) +# Copyright (C) 2018-present Tyler Goodlet (in stewardship of pikers) # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -21,7 +21,7 @@ Supervisor for docker with included specific-image service helpers. import os from typing import ( Optional, - # Any, + Any, ) from contextlib import asynccontextmanager as acm @@ -39,8 +39,13 @@ from .. import config log = get_logger(__name__) +_config = { + 'grpc_listen_port': 5995, + 'ws_listen_port': 5993, + 'log_level': 'debug', +} -_config = ''' +_yaml_config = ''' # piker's ``marketstore`` config. # mount this config using: @@ -49,9 +54,9 @@ _config = ''' # 5993:5993 alpacamarkets/marketstore:latest root_directory: data -listen_port: 5993 -grpc_listen_port: 5995 -log_level: debug +listen_port: {ws_listen_port} +grpc_listen_port: {grpc_listen_port} +log_level: {log_level} queryable: true stop_grace_period: 0 wal_rotate_interval: 5 @@ -76,7 +81,7 @@ triggers: # config: # filter: "nasdaq" -''' +'''.format(**_config) class DockerNotStarted(Exception): @@ -350,7 +355,12 @@ async def open_marketstored( async def start_ahab( service_name: str, - task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED, + task_status: TaskStatus[ + tuple[ + trio.Event, + dict[str, Any], + ], + ] = trio.TASK_STATUS_IGNORED, ) -> None: ''' @@ -389,14 +399,18 @@ async def start_ahab( )[2] # named user's uid ) - task_status.started(cn_ready) - async with portal.open_context( open_marketstored, ) as (ctx, first): cid, pid = first + task_status.started(( + cn_ready, + _config, + (cid, pid), + )) + await trio.sleep_forever() # since we demoted root perms in this parent From e196e9d1a0894e4308acc23f393139ec0ad616cf Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 10 May 2022 14:38:38 -0400 Subject: [PATCH 097/105] Factor `marketstore` container specifics into `piker.data.marketstore` --- piker/cli/__init__.py | 7 ++- piker/data/_ahab.py | 117 +++++++------------------------------ piker/data/marketstore.py | 119 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 143 insertions(+), 100 deletions(-) diff --git a/piker/cli/__init__.py b/piker/cli/__init__.py index 7d58ff4d..516a1b96 100644 --- a/piker/cli/__init__.py +++ b/piker/cli/__init__.py @@ -72,14 +72,15 @@ def pikerd(loglevel, host, tl, pdb, tsdb): trio.open_nursery() as n, ): if tsdb: - # TODO: - # async with maybe_open_marketstored(): - from piker.data._ahab import start_ahab + from piker.data.marketstore import start_marketstore + log.info('Spawning `marketstore` supervisor') ctn_ready, config, (cid, pid) = await n.start( start_ahab, 'marketstored', + start_marketstore, + ) log.info( f'`marketstored` up pid:{pid}\n' diff --git a/piker/data/_ahab.py b/piker/data/_ahab.py index 87f7ef59..e0f79be4 100644 --- a/piker/data/_ahab.py +++ b/piker/data/_ahab.py @@ -21,6 +21,7 @@ Supervisor for docker with included specific-image service helpers. import os from typing import ( Optional, + Callable, Any, ) from contextlib import asynccontextmanager as acm @@ -28,6 +29,7 @@ from contextlib import asynccontextmanager as acm import trio from trio_typing import TaskStatus import tractor +from tractor.msg import NamespacePath import docker import json from docker.models.containers import Container as DockerContainer @@ -39,50 +41,6 @@ from .. import config log = get_logger(__name__) -_config = { - 'grpc_listen_port': 5995, - 'ws_listen_port': 5993, - 'log_level': 'debug', -} - -_yaml_config = ''' -# piker's ``marketstore`` config. - -# mount this config using: -# sudo docker run --mount \ -# type=bind,source="$HOME/.config/piker/",target="/etc" -i -p \ -# 5993:5993 alpacamarkets/marketstore:latest - -root_directory: data -listen_port: {ws_listen_port} -grpc_listen_port: {grpc_listen_port} -log_level: {log_level} -queryable: true -stop_grace_period: 0 -wal_rotate_interval: 5 -stale_threshold: 5 -enable_add: true -enable_remove: false - -triggers: - - module: ondiskagg.so - on: "*/1Sec/OHLCV" - config: - # filter: "nasdaq" - destinations: - - 1Min - - 5Min - - 15Min - - 1H - - 1D - - - module: stream.so - on: '*/*/*' - # config: - # filter: "nasdaq" - -'''.format(**_config) - class DockerNotStarted(Exception): 'Prolly you dint start da daemon bruh' @@ -263,63 +221,22 @@ class Container: @tractor.context -async def open_marketstored( +async def open_ahabd( ctx: tractor.Context, + endpoint: str, # ns-pointer str-msg-type + **kwargs, ) -> None: - ''' - Start and supervise a marketstore instance with its config bind-mounted - in from the piker config directory on the system. - - The equivalent cli cmd to this code is: - - sudo docker run --mount \ - type=bind,source="$HOME/.config/piker/",target="/etc" -i -p \ - 5993:5993 alpacamarkets/marketstore:latest - - ''' get_console_log('info', name=__name__) async with open_docker() as client: - # create a mount from user's local piker config dir into container - config_dir_mnt = docker.types.Mount( - target='/etc', - source=config._config_dir, - type='bind', - ) - - # create a user config subdir where the marketstore - # backing filesystem database can be persisted. - persistent_data_dir = os.path.join( - config._config_dir, 'data', - ) - if not os.path.isdir(persistent_data_dir): - os.mkdir(persistent_data_dir) - - data_dir_mnt = docker.types.Mount( - target='/data', - source=persistent_data_dir, - type='bind', - ) - - dcntr: DockerContainer = client.containers.run( - 'alpacamarkets/marketstore:latest', - # do we need this for cmds? - # '-i', - - # '-p 5993:5993', - ports={ - '5993/tcp': 5993, # jsonrpc - '5995/tcp': 5995, # grpc - }, - mounts=[config_dir_mnt, data_dir_mnt], - detach=True, - # stop_signal='SIGINT', - init=True, - # remove=True, - ) + # TODO: eventually offer a config-oriented API to do the mounts, + # params, etc. passing to ``Containter.run()``? + # call into endpoint for container config/init + ep_func = NamespacePath(endpoint).load_ref() + dcntr, cntr_config = ep_func(client) cntr = Container(dcntr) with trio.move_on_after(1): @@ -332,7 +249,11 @@ async def open_marketstored( 'Failed to start `marketstore` check logs deats' ) - await ctx.started((cntr.cntr.id, os.getpid())) + await ctx.started(( + cntr.cntr.id, + os.getpid(), + cntr_config, + )) try: @@ -355,6 +276,7 @@ async def open_marketstored( async def start_ahab( service_name: str, + endpoint: Callable[docker.DockerClient, DockerContainer], task_status: TaskStatus[ tuple[ trio.Event, @@ -400,14 +322,15 @@ async def start_ahab( ) async with portal.open_context( - open_marketstored, + open_ahabd, + endpoint=str(NamespacePath.from_ref(endpoint)), ) as (ctx, first): - cid, pid = first + cid, pid, cntr_config = first task_status.started(( cn_ready, - _config, + cntr_config, (cid, pid), )) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index 92d860da..fd0e9318 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -23,6 +23,7 @@ - todo: tick sequence stream-cloning for testing ''' +from __future__ import annotations from contextlib import asynccontextmanager as acm from datetime import datetime from pprint import pformat @@ -30,6 +31,7 @@ from typing import ( Any, Optional, Union, + TYPE_CHECKING, ) import time from math import isnan @@ -49,12 +51,129 @@ from anyio_marketstore import ( import pendulum import purerpc +if TYPE_CHECKING: + import docker + from ._ahab import DockerContainer + from .feed import maybe_open_feed from ..log import get_logger, get_console_log log = get_logger(__name__) + +# container level config +_config = { + 'grpc_listen_port': 5995, + 'ws_listen_port': 5993, + 'log_level': 'debug', +} + +_yaml_config = ''' +# piker's ``marketstore`` config. + +# mount this config using: +# sudo docker run --mount \ +# type=bind,source="$HOME/.config/piker/",target="/etc" -i -p \ +# 5993:5993 alpacamarkets/marketstore:latest + +root_directory: data +listen_port: {ws_listen_port} +grpc_listen_port: {grpc_listen_port} +log_level: {log_level} +queryable: true +stop_grace_period: 0 +wal_rotate_interval: 5 +stale_threshold: 5 +enable_add: true +enable_remove: false + +triggers: + - module: ondiskagg.so + on: "*/1Sec/OHLCV" + config: + # filter: "nasdaq" + destinations: + - 1Min + - 5Min + - 15Min + - 1H + - 1D + + - module: stream.so + on: '*/*/*' + # config: + # filter: "nasdaq" + +'''.format(**_config) + + +def start_marketstore( + client: docker.DockerClient, + + **kwargs, + +) -> tuple[DockerContainer, dict[str, Any]]: + ''' + Start and supervise a marketstore instance with its config bind-mounted + in from the piker config directory on the system. + + The equivalent cli cmd to this code is: + + sudo docker run --mount \ + type=bind,source="$HOME/.config/piker/",target="/etc" -i -p \ + 5993:5993 alpacamarkets/marketstore:latest + + ''' + import os + import docker + from .. import config + + get_console_log('info', name=__name__) + + # create a mount from user's local piker config dir into container + config_dir_mnt = docker.types.Mount( + target='/etc', + source=config._config_dir, + type='bind', + ) + + # create a user config subdir where the marketstore + # backing filesystem database can be persisted. + persistent_data_dir = os.path.join( + config._config_dir, 'data', + ) + if not os.path.isdir(persistent_data_dir): + os.mkdir(persistent_data_dir) + + data_dir_mnt = docker.types.Mount( + target='/data', + source=persistent_data_dir, + type='bind', + ) + + dcntr: DockerContainer = client.containers.run( + 'alpacamarkets/marketstore:latest', + # do we need this for cmds? + # '-i', + + # '-p 5993:5993', + ports={ + '5993/tcp': 5993, # jsonrpc / ws? + '5995/tcp': 5995, # grpc + }, + mounts=[ + config_dir_mnt, + data_dir_mnt, + ], + detach=True, + # stop_signal='SIGINT', + init=True, + # remove=True, + ) + return dcntr, _config + + _tick_tbk_ids: tuple[str, str] = ('1Sec', 'TICK') _tick_tbk: str = '{}/' + '/'.join(_tick_tbk_ids) From 769e803695a996b3f4896bd164b5c7f128a131d6 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 10 May 2022 14:51:24 -0400 Subject: [PATCH 098/105] Write `mkts.yml` from template if one dne --- piker/data/marketstore.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/piker/data/marketstore.py b/piker/data/marketstore.py index fd0e9318..39fe1b70 100644 --- a/piker/data/marketstore.py +++ b/piker/data/marketstore.py @@ -131,6 +131,16 @@ def start_marketstore( get_console_log('info', name=__name__) + yml_file = os.path.join(config._config_dir, 'mkts.yml') + if not os.path.isfile(yml_file): + log.warning( + f'No `marketstore` config exists?: {yml_file}\n' + 'Generating new file from template:\n' + f'{_yaml_config}\n' + ) + with open(yml_file, 'w') as yf: + yf.write(_yaml_config) + # create a mount from user's local piker config dir into container config_dir_mnt = docker.types.Mount( target='/etc', From 083a3296e704196a601b9316657cee985363c41d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 10 May 2022 14:54:37 -0400 Subject: [PATCH 099/105] Better formatted startup logging output --- piker/cli/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/piker/cli/__init__.py b/piker/cli/__init__.py index 516a1b96..e9512322 100644 --- a/piker/cli/__init__.py +++ b/piker/cli/__init__.py @@ -83,9 +83,10 @@ def pikerd(loglevel, host, tl, pdb, tsdb): ) log.info( - f'`marketstored` up pid:{pid}\n' - f'container up cid:{cid} live with config:\n' - f'{pformat(config)}' + f'`marketstore` up!\n' + f'`marketstored` pid: {pid}\n' + f'docker container id: {cid}\n' + f'config: {pformat(config)}' ) await trio.sleep_forever() From b1246446c2ef8706241be47af73525ec75a48329 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 10 May 2022 15:20:00 -0400 Subject: [PATCH 100/105] Raise error on 'fatal' and 'error' log levels --- piker/data/_ahab.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/piker/data/_ahab.py b/piker/data/_ahab.py index e0f79be4..0f96ecaa 100644 --- a/piker/data/_ahab.py +++ b/piker/data/_ahab.py @@ -33,7 +33,10 @@ from tractor.msg import NamespacePath import docker import json from docker.models.containers import Container as DockerContainer -from docker.errors import DockerException, APIError +from docker.errors import ( + DockerException, + APIError, +) from requests.exceptions import ConnectionError, ReadTimeout from ..log import get_logger, get_console_log @@ -46,6 +49,10 @@ class DockerNotStarted(Exception): 'Prolly you dint start da daemon bruh' +class ContainerError(RuntimeError): + 'Error reported via app-container logging level' + + @acm async def open_docker( url: Optional[str] = None, @@ -148,6 +155,10 @@ class Container: getattr(log, level, log.error)(f'{msg}') + # print(f'level: {level}') + if level in ('error', 'fatal'): + raise ContainerError(msg) + if patt in msg: return True From 1657f51edc8be0d8aa3ff7c3bb94a3aea00692a4 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 10 May 2022 17:21:11 -0400 Subject: [PATCH 101/105] Manually fetch missing out-of-order history frames It seems once in a while a frame can get missed or dropped (at least with binance?) so in those cases, when the request erlangs is already at max, we just manually request the missing frame and presume things will work out XD Further, discard out of order frames that are "from the future" that somehow end up in the async queue once in a while? Not sure why this happens but it seems thus far just discarding them is nbd. --- piker/data/feed.py | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/piker/data/feed.py b/piker/data/feed.py index e4a998d5..e77052bf 100644 --- a/piker/data/feed.py +++ b/piker/data/feed.py @@ -397,7 +397,7 @@ async def start_backfill( diff = end_dt - start_dt frame_time_diff_s = diff.seconds - expected_frame_size_s = frame_size_s # + step_size_s + expected_frame_size_s = frame_size_s + step_size_s if frame_time_diff_s > expected_frame_size_s: @@ -522,8 +522,18 @@ async def start_backfill( last_shm_prepend_dt = pendulum.from_timestamp(start) earliest_frame_queue_dt = pendulum.from_timestamp(epoch) - diff = epoch - start - if abs(diff) > step_size_s: + diff = start - epoch + + if diff < 0: + log.warning( + 'Discarding out of order frame:\n' + f'{earliest_frame_queue_dt}' + ) + frames.pop(epoch) + continue + # await tractor.breakpoint() + + if diff > step_size_s: if earliest_end_dt < earliest_frame_queue_dt: # XXX: an expected gap was encountered (see @@ -548,8 +558,32 @@ async def start_backfill( 'then erlangs?\n' 'There seems to be a gap between:\n' f'{earliest_frame_queue_dt} <- ' + f'{last_shm_prepend_dt}\n' + 'Conducting manual call for frame ending: ' f'{last_shm_prepend_dt}' ) + ( + to_push, + start_dt, + end_dt, + ) = await get_ohlc_frame( + input_end_dt=last_shm_prepend_dt, + iter_dts_gen=idts, + ) + last_epoch = to_push['time'][-1] + diff = start - last_epoch + + if diff > step_size_s: + await tractor.breakpoint() + raise DataUnavailable( + 'An awkward frame was found:\n' + f'{start_dt} -> {end_dt}:\n{to_push}' + ) + + else: + frames[last_epoch] = ( + to_push, start_dt, end_dt) + break expect_end = pendulum.from_timestamp(start) expect_start = expect_end.subtract( From b910eceb3b617c8cb55b63cad0bd31e7d4c7e0d0 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 10 May 2022 17:53:46 -0400 Subject: [PATCH 102/105] Add `ShmArray.ustruct()`: return an unstructured array copy We return a copy (since since a view doesn't seem to work..) of the (field filtered) shm array contents which is the same index-length as the source data. Further, fence off the resource tracker disable-hack into a helper routine. --- piker/data/_sharedmem.py | 95 ++++++++++++++++++++++++++++++++-------- 1 file changed, 76 insertions(+), 19 deletions(-) diff --git a/piker/data/_sharedmem.py b/piker/data/_sharedmem.py index 6bc69eb4..fbdb351e 100644 --- a/piker/data/_sharedmem.py +++ b/piker/data/_sharedmem.py @@ -22,7 +22,6 @@ from __future__ import annotations from sys import byteorder from typing import Optional from multiprocessing.shared_memory import SharedMemory, _USE_POSIX -from multiprocessing import resource_tracker as mantracker if _USE_POSIX: from _posixshmem import shm_unlink @@ -30,6 +29,7 @@ if _USE_POSIX: import tractor import numpy as np from pydantic import BaseModel +from numpy.lib import recfunctions as rfn from ..log import get_logger from ._source import base_iohlc_dtype @@ -46,26 +46,33 @@ _default_size = 10 * _secs_in_day _rt_buffer_start = int(9*_secs_in_day) -# Tell the "resource tracker" thing to fuck off. -class ManTracker(mantracker.ResourceTracker): - def register(self, name, rtype): - pass +def cuckoff_mantracker(): - def unregister(self, name, rtype): - pass + from multiprocessing import resource_tracker as mantracker - def ensure_running(self): - pass + # Tell the "resource tracker" thing to fuck off. + class ManTracker(mantracker.ResourceTracker): + def register(self, name, rtype): + pass + + def unregister(self, name, rtype): + pass + + def ensure_running(self): + pass -# "know your land and know your prey" -# https://www.dailymotion.com/video/x6ozzco -mantracker._resource_tracker = ManTracker() -mantracker.register = mantracker._resource_tracker.register -mantracker.ensure_running = mantracker._resource_tracker.ensure_running -ensure_running = mantracker._resource_tracker.ensure_running -mantracker.unregister = mantracker._resource_tracker.unregister -mantracker.getfd = mantracker._resource_tracker.getfd + # "know your land and know your prey" + # https://www.dailymotion.com/video/x6ozzco + mantracker._resource_tracker = ManTracker() + mantracker.register = mantracker._resource_tracker.register + mantracker.ensure_running = mantracker._resource_tracker.ensure_running + ensure_running = mantracker._resource_tracker.ensure_running + mantracker.unregister = mantracker._resource_tracker.unregister + mantracker.getfd = mantracker._resource_tracker.getfd + + +cuckoff_mantracker() class SharedInt: @@ -191,7 +198,11 @@ class ShmArray: self._post_init: bool = False # pushing data does not write the index (aka primary key) - self._write_fields = list(shmarr.dtype.fields.keys())[1:] + dtype = shmarr.dtype + if dtype.fields: + self._write_fields = list(shmarr.dtype.fields.keys())[1:] + else: + self._write_fields = None # TODO: ringbuf api? @@ -237,6 +248,48 @@ class ShmArray: return a + def ustruct( + self, + fields: Optional[list[str]] = None, + + # type that all field values will be cast to + # in the returned view. + common_dtype: np.dtype = np.float, + + ) -> np.ndarray: + + array = self._array + + if fields: + selection = array[fields] + fcount = len(fields) + else: + selection = array + fcount = len(array.dtype.fields) + + # XXX: manual ``.view()`` attempt that also doesn't work. + # uview = selection.view( + # dtype=' Date: Tue, 26 Apr 2022 09:29:09 -0400 Subject: [PATCH 103/105] Double up shm buffer size --- piker/data/_sharedmem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/piker/data/_sharedmem.py b/piker/data/_sharedmem.py index fbdb351e..c830d051 100644 --- a/piker/data/_sharedmem.py +++ b/piker/data/_sharedmem.py @@ -41,7 +41,7 @@ log = get_logger(__name__) # how much is probably dependent on lifestyle _secs_in_day = int(60 * 60 * 24) # we try for 3 times but only on a run-every-other-day kinda week. -_default_size = 10 * _secs_in_day +_default_size = 20 * _secs_in_day # where to start the new data append index _rt_buffer_start = int(9*_secs_in_day) From 09431aad85ee98799fd4916ade4aa61b324bb452 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 27 Apr 2022 14:51:20 -0400 Subject: [PATCH 104/105] Add support for no `._first.value` update shm prepends --- piker/data/_sharedmem.py | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/piker/data/_sharedmem.py b/piker/data/_sharedmem.py index c830d051..f1e89fca 100644 --- a/piker/data/_sharedmem.py +++ b/piker/data/_sharedmem.py @@ -40,10 +40,11 @@ log = get_logger(__name__) # how much is probably dependent on lifestyle _secs_in_day = int(60 * 60 * 24) -# we try for 3 times but only on a run-every-other-day kinda week. -_default_size = 20 * _secs_in_day +# we try for a buncha times, but only on a run-every-other-day kinda week. +_days_worth = 16 +_default_size = _days_worth * _secs_in_day # where to start the new data append index -_rt_buffer_start = int(9*_secs_in_day) +_rt_buffer_start = int((_days_worth - 1) * _secs_in_day) def cuckoff_mantracker(): @@ -61,13 +62,12 @@ def cuckoff_mantracker(): def ensure_running(self): pass - # "know your land and know your prey" # https://www.dailymotion.com/video/x6ozzco mantracker._resource_tracker = ManTracker() mantracker.register = mantracker._resource_tracker.register mantracker.ensure_running = mantracker._resource_tracker.ensure_running - ensure_running = mantracker._resource_tracker.ensure_running + # ensure_running = mantracker._resource_tracker.ensure_running mantracker.unregister = mantracker._resource_tracker.unregister mantracker.getfd = mantracker._resource_tracker.getfd @@ -262,10 +262,10 @@ class ShmArray: if fields: selection = array[fields] - fcount = len(fields) + # fcount = len(fields) else: selection = array - fcount = len(array.dtype.fields) + # fcount = len(array.dtype.fields) # XXX: manual ``.view()`` attempt that also doesn't work. # uview = selection.view( @@ -308,6 +308,7 @@ class ShmArray: field_map: Optional[dict[str, str]] = None, prepend: bool = False, + update_first: bool = True, start: Optional[int] = None, ) -> int: @@ -320,10 +321,9 @@ class ShmArray: ''' length = len(data) - index = start if start is not None else self._last.value if prepend: - index = self._first.value - length + index = (start or self._first.value) - length if index < 0: raise ValueError( @@ -331,6 +331,9 @@ class ShmArray: f'You have passed {abs(index)} too many datums.' ) + else: + index = start if start is not None else self._last.value + end = index + length if field_map: @@ -348,12 +351,17 @@ class ShmArray: # tries to access ``.array`` (which due to the index # overlap will be empty). Pretty sure we've fixed it now # but leaving this here as a reminder. - if prepend: + if prepend and update_first: assert index < self._first.value - if index < self._first.value: + if ( + index < self._first.value + and update_first + ): + assert prepend, 'prepend=True not passed but index decreased?' self._first.value = index - else: + + elif not prepend: self._last.value = end self._post_init = True @@ -389,6 +397,7 @@ class ShmArray: f"Input array has unknown field(s): {only_in_theirs}" ) + # TODO: support "silent" prepends that don't update ._first.value? def prepend( self, data: np.ndarray, From b3f9c4f93d78f3c81f46dbcce2a873701e158492 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 3 May 2022 13:51:29 -0400 Subject: [PATCH 105/105] Only assert if input array actually has a size --- piker/data/_sharedmem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/piker/data/_sharedmem.py b/piker/data/_sharedmem.py index f1e89fca..8848ec1c 100644 --- a/piker/data/_sharedmem.py +++ b/piker/data/_sharedmem.py @@ -351,7 +351,7 @@ class ShmArray: # tries to access ``.array`` (which due to the index # overlap will be empty). Pretty sure we've fixed it now # but leaving this here as a reminder. - if prepend and update_first: + if prepend and update_first and length: assert index < self._first.value if (