From 83bdca46a2b2a2ca6c71e0de4cc9273bfc426865 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 13 Dec 2023 18:29:06 -0500 Subject: [PATCH] Wrap null-gap detect and fill in async gen Call it `iter_null_segs()` (for now?) and use in the final (sequential) stage of the `.history.start_backfill()` task-func. Delivers abs, frame-relative, and equiv time stamps on each iteration pertaining to each detected null-segment to make it easy to do piece-wise history queries for each. Further, - handle edge case in `get_null_segs()` where there is only 1 zeroed row value, in which case we deliver `absi_zsegs` as a single pair of the same index value and, - when this occurs `iter_null_seqs()` delivers `None` for all the `start_` related indices/timestamps since all `get_hist()` routines (delivered by `open_history_client()`) should handle it as being a "get max history from this end_dt" type query. - add note about needing to do time gap handling where there's a gap in the timeseries-history that isn't actually IN the data-history. --- piker/data/history.py | 121 ++++++++++++++++++++---------------------- piker/data/tsp.py | 107 ++++++++++++++++++++++++++++++++----- 2 files changed, 154 insertions(+), 74 deletions(-) diff --git a/piker/data/history.py b/piker/data/history.py index dbd7c26b..4b7737b0 100644 --- a/piker/data/history.py +++ b/piker/data/history.py @@ -59,9 +59,10 @@ from ._sampling import ( from .tsp import ( dedupe, get_null_segs, + iter_null_segs, sort_diff, Frame, - Seq, + # Seq, ) from ..brokers._util import ( DataUnavailable, @@ -174,75 +175,71 @@ async def maybe_fill_null_segments( ) -> list[Frame]: frame: Frame = shm.array - - null_segs: tuple | None = get_null_segs( + async for ( + absi_start, absi_end, + fi_start, fi_end, + start_t, end_t, + start_dt, end_dt, + ) in iter_null_segs( frame, - period=timeframe, - ) - if null_segs: - absi_pairs_zsegs: list[list[float, float]] - izeros: Seq - zero_t: Frame + timeframe=timeframe, + ): + + # XXX NOTE: ?if we get a badly ordered timestamp + # pair, immediately stop backfilling? + if ( + start_dt + and end_dt < start_dt + ): + break + ( - absi_pairs_zsegs, - izeros, - zero_t, - ) = null_segs + array, + next_start_dt, + next_end_dt, + ) = await get_hist( + timeframe, + start_dt=start_dt, + end_dt=end_dt, + ) - absi_first: int = frame[0]['index'] - for absi_start, absi_end in absi_pairs_zsegs: - # await tractor.pause() - fi_start = absi_start - absi_first - fi_end = absi_end - absi_first - start_row: Seq = frame[fi_start] - end_row: Seq = frame[fi_end] + # XXX TODO: pretty sure if i plot tsla, btcusdt.binance + # and mnq.cme.ib this causes a Qt crash XXDDD - start_t: float = start_row['time'] - end_t: float = end_row['time'] + # make sure we don't overrun the buffer start + len_to_push: int = min(absi_end, array.size) + to_push: np.ndarray = array[-len_to_push:] - start_dt = from_timestamp(start_t) - end_dt = from_timestamp(end_t) + await shm_push_in_between( + shm, + to_push, + prepend_index=absi_end, + update_start_on_prepend=False, + ) + # TODO: UI side needs IPC event to update.. + # - make sure the UI actually always handles + # this update! + # - remember that in the display side, only refersh this + # if the respective history is actually "in view". + # loop + await sampler_stream.send({ + 'broadcast_all': { - # if we get a badly ordered timestamp - # pair, immediately stop backfilling. - if end_dt < start_dt: - break + # XXX NOTE XXX: see the + # `.ui._display.increment_history_view()` if block + # that looks for this info to FORCE a hard viz + # redraw! + 'backfilling': (mkt.fqme, timeframe), + }, + }) - ( - array, - next_start_dt, - next_end_dt, - ) = await get_hist( - timeframe, - start_dt=start_dt, - end_dt=end_dt, - ) + await tractor.pause() - # XXX TODO: pretty sure if i plot tsla, btcusdt.binance - # and mnq.cme.ib this causes a Qt crash XXDDD - - # make sure we don't overrun the buffer start - len_to_push: int = min(absi_end, array.size) - to_push: np.ndarray = array[-len_to_push:] - - await shm_push_in_between( - shm, - to_push, - prepend_index=absi_end, - update_start_on_prepend=False, - ) - - # TODO: UI side needs IPC event to update.. - # - make sure the UI actually always handles - # this update! - # - remember that in the display side, only refersh this - # if the respective history is actually "in view". - # loop - await sampler_stream.send({ - 'broadcast_all': { - 'backfilling': (mkt.fqme, timeframe), - }, - }) + # TODO: interatively step through any remaining time gaps? + # if ( + # next_end_dt not in frame[ + # ): + # pass # RECHECK for more null-gaps frame: Frame = shm.array diff --git a/piker/data/tsp.py b/piker/data/tsp.py index 34cc794a..8027908b 100644 --- a/piker/data/tsp.py +++ b/piker/data/tsp.py @@ -29,10 +29,17 @@ from math import ( floor, ) import time -from typing import Literal +from typing import ( + Literal, + AsyncGenerator, +) import numpy as np import polars as pl +from pendulum import ( + DateTime, + from_timestamp, +) from ..toolz.profile import ( Profiler, @@ -223,7 +230,10 @@ def get_null_segs( col: str = 'time', ) -> tuple[ - Seq, + # Seq, # TODO: can we make it an array-type instead? + list[ + list[int, int], + ], Seq, Frame ] | None: @@ -285,13 +295,27 @@ def get_null_segs( # select out slice index pairs for each null-segment # portion detected throughout entire input frame. + # import pdbp; pdbp.set_trace() + + # only one null-segment in entire frame? if not fi_zgaps.size: + + # check for number null rows # TODO: use ndarray for this! - absi_zsegs = [[ - absi_zeros[0], # - 1, # - ifirst, - # TODO: need the + 1 or no? - absi_zeros[-1] + 1, # - ifirst, - ]] + if absi_zeros.size > 1: + absi_zsegs = [[ + absi_zeros[0], # - 1, # - ifirst, + # TODO: need the + 1 or no? + absi_zeros[-1] + 1, # - ifirst, + ]] + else: + absi_zsegs = [[ + # absi_zeros[0] + 1, + # see `get_hist()` in backend, should ALWAYS be + # able to handle a `start_dt=None`! + None, + absi_zeros[0] + 1, + ]] else: absi_zsegs.append([ absi_zeros[0] - 1, # - ifirst, @@ -305,15 +329,12 @@ def get_null_segs( ) in enumerate(zip( fi_zgaps, fi_zseg_start_rows, - # fi_zgaps, - # start=1, )): assert (zseg_start_row == zero_t[fi]).all() - absi: int = zseg_start_row['index'][0] # row = zero_t[fi] # absi_pre_zseg = row['index'][0] - 1 - absi_pre_zseg = absi - 1 + # absi_pre_zseg = absi - 1 if i > 0: prev_zseg_row = zero_t[fi - 1] @@ -330,7 +351,6 @@ def get_null_segs( assert end assert start < end - # import pdbp; pdbp.set_trace() return ( absi_zsegs, # start indices of null absi_zeros, @@ -338,6 +358,69 @@ def get_null_segs( ) +async def iter_null_segs( + frame: Frame, + timeframe: float, +) -> AsyncGenerator[ + tuple[ + int, int, + int, int, + float, float, + float, float, + + # Seq, # TODO: can we make it an array-type instead? + # list[ + # list[int, int], + # ], + # Seq, + # Frame + ], + None, +]: + if null_segs := get_null_segs( + frame, + period=timeframe, + ): + absi_pairs_zsegs: list[list[float, float]] + izeros: Seq + zero_t: Frame + ( + absi_pairs_zsegs, + izeros, + zero_t, + ) = null_segs + + absi_first: int = frame[0]['index'] + for ( + absi_start, + absi_end, + ) in absi_pairs_zsegs: + + fi_end: int = absi_end - absi_first + end_row: Seq = frame[fi_end] + end_t: float = end_row['time'] + end_dt: DateTime = from_timestamp(end_t) + + if absi_start is not None: + fi_start: int = absi_start - absi_first + start_row: Seq = frame[fi_start] + start_t: float = start_row['time'] + start_dt: DateTime = from_timestamp(start_t) + + else: + fi_start = None + start_row = None + start_t = None + start_dt = None + + yield ( + absi_start, absi_end, # abs indices + fi_start, fi_end, # relative "frame" indices + start_t, end_t, + start_dt, end_dt, + ) + + def with_dts( df: pl.DataFrame, time_col: str = 'time',