Wrap null-gap detect and fill in async gen

Call it `iter_null_segs()` (for now?) and use in the final (sequential) stage of the `.history.start_backfill()` task-func. Delivers abs, frame-relative, and equiv time stamps on each iteration pertaining to each detected null-segment to make it easy to do piece-wise history queries for each. Further, - handle edge case in `get_null_segs()` where there is only 1 zeroed row value, in which case we deliver `absi_zsegs` as a single pair of the same index value and, - when this occurs `iter_null_seqs()` delivers `None` for all the `start_` related indices/timestamps since all `get_hist()` routines (delivered by `open_history_client()`) should handle it as being a "get max history from this end_dt" type query. - add note about needing to do time gap handling where there's a gap in the timeseries-history that isn't actually IN the data-history.
2023-12-13 18:29:06 -05:00 · 2023-12-13 18:29:06 -05:00 · 83bdca46a2
parent c129f5bb4a
commit 83bdca46a2
2 changed files with 154 additions and 74 deletions
--- a/piker/data/history.py
+++ b/piker/data/history.py
@ -59,9 +59,10 @@ from ._sampling import (
 from .tsp import (
    dedupe,
    get_null_segs,
    iter_null_segs,
    sort_diff,
    Frame,
-    Seq,
+    # Seq,
 )
 from ..brokers._util import (
    DataUnavailable,
@ -174,38 +175,22 @@ async def maybe_fill_null_segments(
 ) -> list[Frame]:
    frame: Frame = shm.array
-
+    async for (
-    null_segs: tuple | None = get_null_segs(
+        absi_start, absi_end,
        fi_start, fi_end, 
        start_t, end_t,
        start_dt, end_dt,
    ) in iter_null_segs(
        frame,
-        period=timeframe,
+        timeframe=timeframe,
-    )
+    ):
    if null_segs:
        absi_pairs_zsegs: list[list[float, float]]
        izeros: Seq
        zero_t: Frame
        (
            absi_pairs_zsegs,
            izeros,
            zero_t,
        ) = null_segs
-        absi_first: int = frame[0]['index']
+        # XXX NOTE: ?if we get a badly ordered timestamp
-        for absi_start, absi_end in absi_pairs_zsegs:
+        # pair, immediately stop backfilling?
-            # await tractor.pause()
+        if (
-            fi_start = absi_start - absi_first
+            start_dt
-            fi_end = absi_end - absi_first
+            and end_dt < start_dt
-            start_row: Seq = frame[fi_start]
+        ):
            end_row: Seq = frame[fi_end]
            start_t: float = start_row['time']
            end_t: float = end_row['time']
            start_dt = from_timestamp(start_t)
            end_dt = from_timestamp(end_t)
            # if we get a badly ordered timestamp
            # pair, immediately stop backfilling.
            if end_dt < start_dt:
            break
        (
@ -231,7 +216,6 @@ async def maybe_fill_null_segments(
            prepend_index=absi_end,
            update_start_on_prepend=False,
        )
        # TODO: UI side needs IPC event to update..
        # - make sure the UI actually always handles
        #  this update!
@ -240,10 +224,23 @@ async def maybe_fill_null_segments(
        #   loop
        await sampler_stream.send({
            'broadcast_all': {
                # XXX NOTE XXX: see the
                # `.ui._display.increment_history_view()` if block
                # that looks for this info to FORCE a hard viz
                # redraw!
                'backfilling': (mkt.fqme, timeframe),
            },
        })
        await tractor.pause()
        # TODO: interatively step through any remaining time gaps?
        # if (
        #     next_end_dt not in frame[
        # ):
        #     pass
        # RECHECK for more null-gaps
        frame: Frame = shm.array
        null_segs: tuple | None = get_null_segs(
--- a/piker/data/tsp.py
+++ b/piker/data/tsp.py
@ -29,10 +29,17 @@ from math import (
    floor,
 )
 import time
-from typing import Literal
+from typing import (
    Literal,
    AsyncGenerator,
 )
 import numpy as np
 import polars as pl
 from pendulum import (
    DateTime,
    from_timestamp,
 )
 from ..toolz.profile import (
    Profiler,
@ -223,7 +230,10 @@ def get_null_segs(
    col: str = 'time',
 ) -> tuple[
-    Seq,
+    # Seq,  # TODO: can we make it an array-type instead?
    list[
        list[int, int],
    ],
    Seq,
    Frame
 ] | None:
@ -285,13 +295,27 @@ def get_null_segs(
    # select out slice index pairs for each null-segment
    # portion detected throughout entire input frame.
    # import pdbp; pdbp.set_trace()
    # only one null-segment in entire frame?
    if not fi_zgaps.size:
        # check for number null rows
        # TODO: use ndarray for this!
        if absi_zeros.size > 1:
            absi_zsegs = [[
                absi_zeros[0], # - 1, # - ifirst,
                # TODO: need the + 1 or no?
                absi_zeros[-1] + 1, # - ifirst,
            ]]
        else:
            absi_zsegs = [[
                # absi_zeros[0] + 1,
                # see `get_hist()` in backend, should ALWAYS be
                # able to handle a `start_dt=None`!
                None,
                absi_zeros[0] + 1,
            ]]
    else:
        absi_zsegs.append([
            absi_zeros[0] - 1, # - ifirst,
@ -305,15 +329,12 @@ def get_null_segs(
        ) in enumerate(zip(
            fi_zgaps,
            fi_zseg_start_rows,
            # fi_zgaps,
            # start=1,
        )):
            assert (zseg_start_row == zero_t[fi]).all()
            absi: int = zseg_start_row['index'][0]
            # row = zero_t[fi]
            # absi_pre_zseg = row['index'][0] - 1
-            absi_pre_zseg = absi - 1
+            # absi_pre_zseg = absi - 1
            if i > 0:
                prev_zseg_row = zero_t[fi - 1]
@ -330,7 +351,6 @@ def get_null_segs(
                assert end
                assert start < end
            # import pdbp; pdbp.set_trace()
    return (
        absi_zsegs,  # start indices of null
        absi_zeros,
@ -338,6 +358,69 @@ def get_null_segs(
    )
 async def iter_null_segs(
    frame: Frame,
    timeframe: float,
 ) -> AsyncGenerator[
    tuple[
        int, int,
        int, int,
        float, float,
        float, float,
        # Seq,  # TODO: can we make it an array-type instead?
        # list[
        #     list[int, int],
        # ],
        # Seq,
        # Frame
    ],
    None,
 ]:
    if null_segs := get_null_segs(
        frame,
        period=timeframe,
    ):
        absi_pairs_zsegs: list[list[float, float]]
        izeros: Seq
        zero_t: Frame
        (
            absi_pairs_zsegs,
            izeros,
            zero_t,
        ) = null_segs
        absi_first: int = frame[0]['index']
        for (
            absi_start,
            absi_end,
        ) in absi_pairs_zsegs:
            fi_end: int = absi_end - absi_first
            end_row: Seq = frame[fi_end]
            end_t: float = end_row['time']
            end_dt: DateTime = from_timestamp(end_t)
            if absi_start is not None:
                fi_start: int = absi_start - absi_first
                start_row: Seq = frame[fi_start]
                start_t: float = start_row['time']
                start_dt: DateTime = from_timestamp(start_t)
            else:
                fi_start = None
                start_row = None
                start_t = None
                start_dt = None
            yield (
                absi_start, absi_end,  # abs indices
                fi_start, fi_end,  # relative "frame" indices
                start_t, end_t,
                start_dt, end_dt,
            )
 def with_dts(
    df: pl.DataFrame,
    time_col: str = 'time',