Wrap null-gap detect and fill in async gen

Call it `iter_null_segs()` (for now?) and use in the final (sequential)
stage of the `.history.start_backfill()` task-func. Delivers abs,
frame-relative, and equiv time stamps on each iteration pertaining to
each detected null-segment to make it easy to do piece-wise history
queries for each.

Further,
- handle edge case in `get_null_segs()` where there is only 1 zeroed
  row value, in which case we deliver `absi_zsegs` as a single pair of
  the same index value and,
  - when this occurs `iter_null_seqs()` delivers `None` for all the
    `start_` related indices/timestamps since all `get_hist()` routines
    (delivered by `open_history_client()`) should handle it as being a
    "get max history from this end_dt" type query.
- add note about needing to do time gap handling where there's a gap in
  the timeseries-history that isn't actually IN the data-history.
distribute_dis
Tyler Goodlet 2023-12-13 18:29:06 -05:00
parent c129f5bb4a
commit 83bdca46a2
2 changed files with 154 additions and 74 deletions

View File

@ -59,9 +59,10 @@ from ._sampling import (
from .tsp import ( from .tsp import (
dedupe, dedupe,
get_null_segs, get_null_segs,
iter_null_segs,
sort_diff, sort_diff,
Frame, Frame,
Seq, # Seq,
) )
from ..brokers._util import ( from ..brokers._util import (
DataUnavailable, DataUnavailable,
@ -174,38 +175,22 @@ async def maybe_fill_null_segments(
) -> list[Frame]: ) -> list[Frame]:
frame: Frame = shm.array frame: Frame = shm.array
async for (
null_segs: tuple | None = get_null_segs( absi_start, absi_end,
fi_start, fi_end,
start_t, end_t,
start_dt, end_dt,
) in iter_null_segs(
frame, frame,
period=timeframe, timeframe=timeframe,
) ):
if null_segs:
absi_pairs_zsegs: list[list[float, float]]
izeros: Seq
zero_t: Frame
(
absi_pairs_zsegs,
izeros,
zero_t,
) = null_segs
absi_first: int = frame[0]['index'] # XXX NOTE: ?if we get a badly ordered timestamp
for absi_start, absi_end in absi_pairs_zsegs: # pair, immediately stop backfilling?
# await tractor.pause() if (
fi_start = absi_start - absi_first start_dt
fi_end = absi_end - absi_first and end_dt < start_dt
start_row: Seq = frame[fi_start] ):
end_row: Seq = frame[fi_end]
start_t: float = start_row['time']
end_t: float = end_row['time']
start_dt = from_timestamp(start_t)
end_dt = from_timestamp(end_t)
# if we get a badly ordered timestamp
# pair, immediately stop backfilling.
if end_dt < start_dt:
break break
( (
@ -231,7 +216,6 @@ async def maybe_fill_null_segments(
prepend_index=absi_end, prepend_index=absi_end,
update_start_on_prepend=False, update_start_on_prepend=False,
) )
# TODO: UI side needs IPC event to update.. # TODO: UI side needs IPC event to update..
# - make sure the UI actually always handles # - make sure the UI actually always handles
# this update! # this update!
@ -240,10 +224,23 @@ async def maybe_fill_null_segments(
# loop # loop
await sampler_stream.send({ await sampler_stream.send({
'broadcast_all': { 'broadcast_all': {
# XXX NOTE XXX: see the
# `.ui._display.increment_history_view()` if block
# that looks for this info to FORCE a hard viz
# redraw!
'backfilling': (mkt.fqme, timeframe), 'backfilling': (mkt.fqme, timeframe),
}, },
}) })
await tractor.pause()
# TODO: interatively step through any remaining time gaps?
# if (
# next_end_dt not in frame[
# ):
# pass
# RECHECK for more null-gaps # RECHECK for more null-gaps
frame: Frame = shm.array frame: Frame = shm.array
null_segs: tuple | None = get_null_segs( null_segs: tuple | None = get_null_segs(

View File

@ -29,10 +29,17 @@ from math import (
floor, floor,
) )
import time import time
from typing import Literal from typing import (
Literal,
AsyncGenerator,
)
import numpy as np import numpy as np
import polars as pl import polars as pl
from pendulum import (
DateTime,
from_timestamp,
)
from ..toolz.profile import ( from ..toolz.profile import (
Profiler, Profiler,
@ -223,7 +230,10 @@ def get_null_segs(
col: str = 'time', col: str = 'time',
) -> tuple[ ) -> tuple[
Seq, # Seq, # TODO: can we make it an array-type instead?
list[
list[int, int],
],
Seq, Seq,
Frame Frame
] | None: ] | None:
@ -285,13 +295,27 @@ def get_null_segs(
# select out slice index pairs for each null-segment # select out slice index pairs for each null-segment
# portion detected throughout entire input frame. # portion detected throughout entire input frame.
# import pdbp; pdbp.set_trace()
# only one null-segment in entire frame?
if not fi_zgaps.size: if not fi_zgaps.size:
# check for number null rows
# TODO: use ndarray for this! # TODO: use ndarray for this!
if absi_zeros.size > 1:
absi_zsegs = [[ absi_zsegs = [[
absi_zeros[0], # - 1, # - ifirst, absi_zeros[0], # - 1, # - ifirst,
# TODO: need the + 1 or no? # TODO: need the + 1 or no?
absi_zeros[-1] + 1, # - ifirst, absi_zeros[-1] + 1, # - ifirst,
]] ]]
else:
absi_zsegs = [[
# absi_zeros[0] + 1,
# see `get_hist()` in backend, should ALWAYS be
# able to handle a `start_dt=None`!
None,
absi_zeros[0] + 1,
]]
else: else:
absi_zsegs.append([ absi_zsegs.append([
absi_zeros[0] - 1, # - ifirst, absi_zeros[0] - 1, # - ifirst,
@ -305,15 +329,12 @@ def get_null_segs(
) in enumerate(zip( ) in enumerate(zip(
fi_zgaps, fi_zgaps,
fi_zseg_start_rows, fi_zseg_start_rows,
# fi_zgaps,
# start=1,
)): )):
assert (zseg_start_row == zero_t[fi]).all() assert (zseg_start_row == zero_t[fi]).all()
absi: int = zseg_start_row['index'][0] absi: int = zseg_start_row['index'][0]
# row = zero_t[fi] # row = zero_t[fi]
# absi_pre_zseg = row['index'][0] - 1 # absi_pre_zseg = row['index'][0] - 1
absi_pre_zseg = absi - 1 # absi_pre_zseg = absi - 1
if i > 0: if i > 0:
prev_zseg_row = zero_t[fi - 1] prev_zseg_row = zero_t[fi - 1]
@ -330,7 +351,6 @@ def get_null_segs(
assert end assert end
assert start < end assert start < end
# import pdbp; pdbp.set_trace()
return ( return (
absi_zsegs, # start indices of null absi_zsegs, # start indices of null
absi_zeros, absi_zeros,
@ -338,6 +358,69 @@ def get_null_segs(
) )
async def iter_null_segs(
frame: Frame,
timeframe: float,
) -> AsyncGenerator[
tuple[
int, int,
int, int,
float, float,
float, float,
# Seq, # TODO: can we make it an array-type instead?
# list[
# list[int, int],
# ],
# Seq,
# Frame
],
None,
]:
if null_segs := get_null_segs(
frame,
period=timeframe,
):
absi_pairs_zsegs: list[list[float, float]]
izeros: Seq
zero_t: Frame
(
absi_pairs_zsegs,
izeros,
zero_t,
) = null_segs
absi_first: int = frame[0]['index']
for (
absi_start,
absi_end,
) in absi_pairs_zsegs:
fi_end: int = absi_end - absi_first
end_row: Seq = frame[fi_end]
end_t: float = end_row['time']
end_dt: DateTime = from_timestamp(end_t)
if absi_start is not None:
fi_start: int = absi_start - absi_first
start_row: Seq = frame[fi_start]
start_t: float = start_row['time']
start_dt: DateTime = from_timestamp(start_t)
else:
fi_start = None
start_row = None
start_t = None
start_dt = None
yield (
absi_start, absi_end, # abs indices
fi_start, fi_end, # relative "frame" indices
start_t, end_t,
start_dt, end_dt,
)
def with_dts( def with_dts(
df: pl.DataFrame, df: pl.DataFrame,
time_col: str = 'time', time_col: str = 'time',