Wrap null-gap detect and fill in async gen

Call it `iter_null_segs()` (for now?) and use in the final (sequential) stage of the `.history.start_backfill()` task-func. Delivers abs, frame-relative, and equiv time stamps on each iteration pertaining to each detected null-segment to make it easy to do piece-wise history queries for each. Further, - handle edge case in `get_null_segs()` where there is only 1 zeroed row value, in which case we deliver `absi_zsegs` as a single pair of the same index value and, - when this occurs `iter_null_seqs()` delivers `None` for all the `start_` related indices/timestamps since all `get_hist()` routines (delivered by `open_history_client()`) should handle it as being a "get max history from this end_dt" type query. - add note about needing to do time gap handling where there's a gap in the timeseries-history that isn't actually IN the data-history.
2023-12-13 18:29:06 -05:00 · 2023-12-13 18:29:06 -05:00 · 83bdca46a2
parent c129f5bb4a
commit 83bdca46a2
2 changed files with 154 additions and 74 deletions
--- a/piker/data/history.py
+++ b/piker/data/history.py
@ -59,9 +59,10 @@ from ._sampling import (
 from .tsp import (
    dedupe,
    get_null_segs,
    iter_null_segs,
    sort_diff,
    Frame,
-    Seq,
+    # Seq,
 )
 from ..brokers._util import (
    DataUnavailable,
@ -174,75 +175,71 @@ async def maybe_fill_null_segments(
 ) -> list[Frame]:
    frame: Frame = shm.array
-
+    async for (
-    null_segs: tuple | None = get_null_segs(
+        absi_start, absi_end,
        fi_start, fi_end, 
        start_t, end_t,
        start_dt, end_dt,
    ) in iter_null_segs(
        frame,
-        period=timeframe,
+        timeframe=timeframe,
-    )
+    ):
-    if null_segs:
+
-        absi_pairs_zsegs: list[list[float, float]]
+        # XXX NOTE: ?if we get a badly ordered timestamp
-        izeros: Seq
+        # pair, immediately stop backfilling?
-        zero_t: Frame
+        if (
            start_dt
            and end_dt < start_dt
        ):
            break
        (
-            absi_pairs_zsegs,
+            array,
-            izeros,
+            next_start_dt,
-            zero_t,
+            next_end_dt,
-        ) = null_segs
+        ) = await get_hist(
            timeframe,
            start_dt=start_dt,
            end_dt=end_dt,
        )
-        absi_first: int = frame[0]['index']
+        # XXX TODO: pretty sure if i plot tsla, btcusdt.binance
-        for absi_start, absi_end in absi_pairs_zsegs:
+        # and mnq.cme.ib this causes a Qt crash XXDDD
            # await tractor.pause()
            fi_start = absi_start - absi_first
            fi_end = absi_end - absi_first
            start_row: Seq = frame[fi_start]
            end_row: Seq = frame[fi_end]
-            start_t: float = start_row['time']
+        # make sure we don't overrun the buffer start
-            end_t: float = end_row['time']
+        len_to_push: int = min(absi_end, array.size)
        to_push: np.ndarray = array[-len_to_push:]
-            start_dt = from_timestamp(start_t)
+        await shm_push_in_between(
-            end_dt = from_timestamp(end_t)
+            shm,
            to_push,
            prepend_index=absi_end,
            update_start_on_prepend=False,
        )
        # TODO: UI side needs IPC event to update..
        # - make sure the UI actually always handles
        #  this update!
        # - remember that in the display side, only refersh this
        #   if the respective history is actually "in view".
        #   loop
        await sampler_stream.send({
            'broadcast_all': {
-            # if we get a badly ordered timestamp
+                # XXX NOTE XXX: see the
-            # pair, immediately stop backfilling.
+                # `.ui._display.increment_history_view()` if block
-            if end_dt < start_dt:
+                # that looks for this info to FORCE a hard viz
-                break
+                # redraw!
                'backfilling': (mkt.fqme, timeframe),
            },
        })
-            (
+        await tractor.pause()
                array,
                next_start_dt,
                next_end_dt,
            ) = await get_hist(
                timeframe,
                start_dt=start_dt,
                end_dt=end_dt,
            )
-            # XXX TODO: pretty sure if i plot tsla, btcusdt.binance
+        # TODO: interatively step through any remaining time gaps?
-            # and mnq.cme.ib this causes a Qt crash XXDDD
+        # if (
-
+        #     next_end_dt not in frame[
-            # make sure we don't overrun the buffer start
+        # ):
-            len_to_push: int = min(absi_end, array.size)
+        #     pass
            to_push: np.ndarray = array[-len_to_push:]
            await shm_push_in_between(
                shm,
                to_push,
                prepend_index=absi_end,
                update_start_on_prepend=False,
            )
            # TODO: UI side needs IPC event to update..
            # - make sure the UI actually always handles
            #  this update!
            # - remember that in the display side, only refersh this
            #   if the respective history is actually "in view".
            #   loop
            await sampler_stream.send({
                'broadcast_all': {
                    'backfilling': (mkt.fqme, timeframe),
                },
            })
        # RECHECK for more null-gaps
        frame: Frame = shm.array
--- a/piker/data/tsp.py
+++ b/piker/data/tsp.py
@ -29,10 +29,17 @@ from math import (
    floor,
 )
 import time
-from typing import Literal
+from typing import (
    Literal,
    AsyncGenerator,
 )
 import numpy as np
 import polars as pl
 from pendulum import (
    DateTime,
    from_timestamp,
 )
 from ..toolz.profile import (
    Profiler,
@ -223,7 +230,10 @@ def get_null_segs(
    col: str = 'time',
 ) -> tuple[
-    Seq,
+    # Seq,  # TODO: can we make it an array-type instead?
    list[
        list[int, int],
    ],
    Seq,
    Frame
 ] | None:
@ -285,13 +295,27 @@ def get_null_segs(
    # select out slice index pairs for each null-segment
    # portion detected throughout entire input frame.
    # import pdbp; pdbp.set_trace()
    # only one null-segment in entire frame?
    if not fi_zgaps.size:
        # check for number null rows
        # TODO: use ndarray for this!
-        absi_zsegs = [[
+        if absi_zeros.size > 1:
-            absi_zeros[0], # - 1, # - ifirst,
+            absi_zsegs = [[
-            # TODO: need the + 1 or no?
+                absi_zeros[0], # - 1, # - ifirst,
-            absi_zeros[-1] + 1, # - ifirst,
+                # TODO: need the + 1 or no?
-        ]]
+                absi_zeros[-1] + 1, # - ifirst,
            ]]
        else:
            absi_zsegs = [[
                # absi_zeros[0] + 1,
                # see `get_hist()` in backend, should ALWAYS be
                # able to handle a `start_dt=None`!
                None,
                absi_zeros[0] + 1,
            ]]
    else:
        absi_zsegs.append([
            absi_zeros[0] - 1, # - ifirst,
@ -305,15 +329,12 @@ def get_null_segs(
        ) in enumerate(zip(
            fi_zgaps,
            fi_zseg_start_rows,
            # fi_zgaps,
            # start=1,
        )):
            assert (zseg_start_row == zero_t[fi]).all()
            absi: int = zseg_start_row['index'][0]
            # row = zero_t[fi]
            # absi_pre_zseg = row['index'][0] - 1
-            absi_pre_zseg = absi - 1
+            # absi_pre_zseg = absi - 1
            if i > 0:
                prev_zseg_row = zero_t[fi - 1]
@ -330,7 +351,6 @@ def get_null_segs(
                assert end
                assert start < end
            # import pdbp; pdbp.set_trace()
    return (
        absi_zsegs,  # start indices of null
        absi_zeros,
@ -338,6 +358,69 @@ def get_null_segs(
    )
 async def iter_null_segs(
    frame: Frame,
    timeframe: float,
 ) -> AsyncGenerator[
    tuple[
        int, int,
        int, int,
        float, float,
        float, float,
        # Seq,  # TODO: can we make it an array-type instead?
        # list[
        #     list[int, int],
        # ],
        # Seq,
        # Frame
    ],
    None,
 ]:
    if null_segs := get_null_segs(
        frame,
        period=timeframe,
    ):
        absi_pairs_zsegs: list[list[float, float]]
        izeros: Seq
        zero_t: Frame
        (
            absi_pairs_zsegs,
            izeros,
            zero_t,
        ) = null_segs
        absi_first: int = frame[0]['index']
        for (
            absi_start,
            absi_end,
        ) in absi_pairs_zsegs:
            fi_end: int = absi_end - absi_first
            end_row: Seq = frame[fi_end]
            end_t: float = end_row['time']
            end_dt: DateTime = from_timestamp(end_t)
            if absi_start is not None:
                fi_start: int = absi_start - absi_first
                start_row: Seq = frame[fi_start]
                start_t: float = start_row['time']
                start_dt: DateTime = from_timestamp(start_t)
            else:
                fi_start = None
                start_row = None
                start_t = None
                start_dt = None
            yield (
                absi_start, absi_end,  # abs indices
                fi_start, fi_end,  # relative "frame" indices
                start_t, end_t,
                start_dt, end_dt,
            )
 def with_dts(
    df: pl.DataFrame,
    time_col: str = 'time',