Wrap null-gap detect and fill in async gen
Call it `iter_null_segs()` (for now?) and use in the final (sequential) stage of the `.history.start_backfill()` task-func. Delivers abs, frame-relative, and equiv time stamps on each iteration pertaining to each detected null-segment to make it easy to do piece-wise history queries for each. Further, - handle edge case in `get_null_segs()` where there is only 1 zeroed row value, in which case we deliver `absi_zsegs` as a single pair of the same index value and, - when this occurs `iter_null_seqs()` delivers `None` for all the `start_` related indices/timestamps since all `get_hist()` routines (delivered by `open_history_client()`) should handle it as being a "get max history from this end_dt" type query. - add note about needing to do time gap handling where there's a gap in the timeseries-history that isn't actually IN the data-history.distribute_dis
parent
c129f5bb4a
commit
83bdca46a2
|
@ -59,9 +59,10 @@ from ._sampling import (
|
||||||
from .tsp import (
|
from .tsp import (
|
||||||
dedupe,
|
dedupe,
|
||||||
get_null_segs,
|
get_null_segs,
|
||||||
|
iter_null_segs,
|
||||||
sort_diff,
|
sort_diff,
|
||||||
Frame,
|
Frame,
|
||||||
Seq,
|
# Seq,
|
||||||
)
|
)
|
||||||
from ..brokers._util import (
|
from ..brokers._util import (
|
||||||
DataUnavailable,
|
DataUnavailable,
|
||||||
|
@ -174,38 +175,22 @@ async def maybe_fill_null_segments(
|
||||||
) -> list[Frame]:
|
) -> list[Frame]:
|
||||||
|
|
||||||
frame: Frame = shm.array
|
frame: Frame = shm.array
|
||||||
|
async for (
|
||||||
null_segs: tuple | None = get_null_segs(
|
absi_start, absi_end,
|
||||||
|
fi_start, fi_end,
|
||||||
|
start_t, end_t,
|
||||||
|
start_dt, end_dt,
|
||||||
|
) in iter_null_segs(
|
||||||
frame,
|
frame,
|
||||||
period=timeframe,
|
timeframe=timeframe,
|
||||||
)
|
):
|
||||||
if null_segs:
|
|
||||||
absi_pairs_zsegs: list[list[float, float]]
|
|
||||||
izeros: Seq
|
|
||||||
zero_t: Frame
|
|
||||||
(
|
|
||||||
absi_pairs_zsegs,
|
|
||||||
izeros,
|
|
||||||
zero_t,
|
|
||||||
) = null_segs
|
|
||||||
|
|
||||||
absi_first: int = frame[0]['index']
|
# XXX NOTE: ?if we get a badly ordered timestamp
|
||||||
for absi_start, absi_end in absi_pairs_zsegs:
|
# pair, immediately stop backfilling?
|
||||||
# await tractor.pause()
|
if (
|
||||||
fi_start = absi_start - absi_first
|
start_dt
|
||||||
fi_end = absi_end - absi_first
|
and end_dt < start_dt
|
||||||
start_row: Seq = frame[fi_start]
|
):
|
||||||
end_row: Seq = frame[fi_end]
|
|
||||||
|
|
||||||
start_t: float = start_row['time']
|
|
||||||
end_t: float = end_row['time']
|
|
||||||
|
|
||||||
start_dt = from_timestamp(start_t)
|
|
||||||
end_dt = from_timestamp(end_t)
|
|
||||||
|
|
||||||
# if we get a badly ordered timestamp
|
|
||||||
# pair, immediately stop backfilling.
|
|
||||||
if end_dt < start_dt:
|
|
||||||
break
|
break
|
||||||
|
|
||||||
(
|
(
|
||||||
|
@ -231,7 +216,6 @@ async def maybe_fill_null_segments(
|
||||||
prepend_index=absi_end,
|
prepend_index=absi_end,
|
||||||
update_start_on_prepend=False,
|
update_start_on_prepend=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO: UI side needs IPC event to update..
|
# TODO: UI side needs IPC event to update..
|
||||||
# - make sure the UI actually always handles
|
# - make sure the UI actually always handles
|
||||||
# this update!
|
# this update!
|
||||||
|
@ -240,10 +224,23 @@ async def maybe_fill_null_segments(
|
||||||
# loop
|
# loop
|
||||||
await sampler_stream.send({
|
await sampler_stream.send({
|
||||||
'broadcast_all': {
|
'broadcast_all': {
|
||||||
|
|
||||||
|
# XXX NOTE XXX: see the
|
||||||
|
# `.ui._display.increment_history_view()` if block
|
||||||
|
# that looks for this info to FORCE a hard viz
|
||||||
|
# redraw!
|
||||||
'backfilling': (mkt.fqme, timeframe),
|
'backfilling': (mkt.fqme, timeframe),
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
|
await tractor.pause()
|
||||||
|
|
||||||
|
# TODO: interatively step through any remaining time gaps?
|
||||||
|
# if (
|
||||||
|
# next_end_dt not in frame[
|
||||||
|
# ):
|
||||||
|
# pass
|
||||||
|
|
||||||
# RECHECK for more null-gaps
|
# RECHECK for more null-gaps
|
||||||
frame: Frame = shm.array
|
frame: Frame = shm.array
|
||||||
null_segs: tuple | None = get_null_segs(
|
null_segs: tuple | None = get_null_segs(
|
||||||
|
|
|
@ -29,10 +29,17 @@ from math import (
|
||||||
floor,
|
floor,
|
||||||
)
|
)
|
||||||
import time
|
import time
|
||||||
from typing import Literal
|
from typing import (
|
||||||
|
Literal,
|
||||||
|
AsyncGenerator,
|
||||||
|
)
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import polars as pl
|
import polars as pl
|
||||||
|
from pendulum import (
|
||||||
|
DateTime,
|
||||||
|
from_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
from ..toolz.profile import (
|
from ..toolz.profile import (
|
||||||
Profiler,
|
Profiler,
|
||||||
|
@ -223,7 +230,10 @@ def get_null_segs(
|
||||||
col: str = 'time',
|
col: str = 'time',
|
||||||
|
|
||||||
) -> tuple[
|
) -> tuple[
|
||||||
Seq,
|
# Seq, # TODO: can we make it an array-type instead?
|
||||||
|
list[
|
||||||
|
list[int, int],
|
||||||
|
],
|
||||||
Seq,
|
Seq,
|
||||||
Frame
|
Frame
|
||||||
] | None:
|
] | None:
|
||||||
|
@ -285,13 +295,27 @@ def get_null_segs(
|
||||||
|
|
||||||
# select out slice index pairs for each null-segment
|
# select out slice index pairs for each null-segment
|
||||||
# portion detected throughout entire input frame.
|
# portion detected throughout entire input frame.
|
||||||
|
# import pdbp; pdbp.set_trace()
|
||||||
|
|
||||||
|
# only one null-segment in entire frame?
|
||||||
if not fi_zgaps.size:
|
if not fi_zgaps.size:
|
||||||
|
|
||||||
|
# check for number null rows
|
||||||
# TODO: use ndarray for this!
|
# TODO: use ndarray for this!
|
||||||
|
if absi_zeros.size > 1:
|
||||||
absi_zsegs = [[
|
absi_zsegs = [[
|
||||||
absi_zeros[0], # - 1, # - ifirst,
|
absi_zeros[0], # - 1, # - ifirst,
|
||||||
# TODO: need the + 1 or no?
|
# TODO: need the + 1 or no?
|
||||||
absi_zeros[-1] + 1, # - ifirst,
|
absi_zeros[-1] + 1, # - ifirst,
|
||||||
]]
|
]]
|
||||||
|
else:
|
||||||
|
absi_zsegs = [[
|
||||||
|
# absi_zeros[0] + 1,
|
||||||
|
# see `get_hist()` in backend, should ALWAYS be
|
||||||
|
# able to handle a `start_dt=None`!
|
||||||
|
None,
|
||||||
|
absi_zeros[0] + 1,
|
||||||
|
]]
|
||||||
else:
|
else:
|
||||||
absi_zsegs.append([
|
absi_zsegs.append([
|
||||||
absi_zeros[0] - 1, # - ifirst,
|
absi_zeros[0] - 1, # - ifirst,
|
||||||
|
@ -305,15 +329,12 @@ def get_null_segs(
|
||||||
) in enumerate(zip(
|
) in enumerate(zip(
|
||||||
fi_zgaps,
|
fi_zgaps,
|
||||||
fi_zseg_start_rows,
|
fi_zseg_start_rows,
|
||||||
# fi_zgaps,
|
|
||||||
# start=1,
|
|
||||||
)):
|
)):
|
||||||
assert (zseg_start_row == zero_t[fi]).all()
|
assert (zseg_start_row == zero_t[fi]).all()
|
||||||
|
|
||||||
absi: int = zseg_start_row['index'][0]
|
absi: int = zseg_start_row['index'][0]
|
||||||
# row = zero_t[fi]
|
# row = zero_t[fi]
|
||||||
# absi_pre_zseg = row['index'][0] - 1
|
# absi_pre_zseg = row['index'][0] - 1
|
||||||
absi_pre_zseg = absi - 1
|
# absi_pre_zseg = absi - 1
|
||||||
|
|
||||||
if i > 0:
|
if i > 0:
|
||||||
prev_zseg_row = zero_t[fi - 1]
|
prev_zseg_row = zero_t[fi - 1]
|
||||||
|
@ -330,7 +351,6 @@ def get_null_segs(
|
||||||
assert end
|
assert end
|
||||||
assert start < end
|
assert start < end
|
||||||
|
|
||||||
# import pdbp; pdbp.set_trace()
|
|
||||||
return (
|
return (
|
||||||
absi_zsegs, # start indices of null
|
absi_zsegs, # start indices of null
|
||||||
absi_zeros,
|
absi_zeros,
|
||||||
|
@ -338,6 +358,69 @@ def get_null_segs(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def iter_null_segs(
|
||||||
|
frame: Frame,
|
||||||
|
timeframe: float,
|
||||||
|
) -> AsyncGenerator[
|
||||||
|
tuple[
|
||||||
|
int, int,
|
||||||
|
int, int,
|
||||||
|
float, float,
|
||||||
|
float, float,
|
||||||
|
|
||||||
|
# Seq, # TODO: can we make it an array-type instead?
|
||||||
|
# list[
|
||||||
|
# list[int, int],
|
||||||
|
# ],
|
||||||
|
# Seq,
|
||||||
|
# Frame
|
||||||
|
],
|
||||||
|
None,
|
||||||
|
]:
|
||||||
|
if null_segs := get_null_segs(
|
||||||
|
frame,
|
||||||
|
period=timeframe,
|
||||||
|
):
|
||||||
|
absi_pairs_zsegs: list[list[float, float]]
|
||||||
|
izeros: Seq
|
||||||
|
zero_t: Frame
|
||||||
|
(
|
||||||
|
absi_pairs_zsegs,
|
||||||
|
izeros,
|
||||||
|
zero_t,
|
||||||
|
) = null_segs
|
||||||
|
|
||||||
|
absi_first: int = frame[0]['index']
|
||||||
|
for (
|
||||||
|
absi_start,
|
||||||
|
absi_end,
|
||||||
|
) in absi_pairs_zsegs:
|
||||||
|
|
||||||
|
fi_end: int = absi_end - absi_first
|
||||||
|
end_row: Seq = frame[fi_end]
|
||||||
|
end_t: float = end_row['time']
|
||||||
|
end_dt: DateTime = from_timestamp(end_t)
|
||||||
|
|
||||||
|
if absi_start is not None:
|
||||||
|
fi_start: int = absi_start - absi_first
|
||||||
|
start_row: Seq = frame[fi_start]
|
||||||
|
start_t: float = start_row['time']
|
||||||
|
start_dt: DateTime = from_timestamp(start_t)
|
||||||
|
|
||||||
|
else:
|
||||||
|
fi_start = None
|
||||||
|
start_row = None
|
||||||
|
start_t = None
|
||||||
|
start_dt = None
|
||||||
|
|
||||||
|
yield (
|
||||||
|
absi_start, absi_end, # abs indices
|
||||||
|
fi_start, fi_end, # relative "frame" indices
|
||||||
|
start_t, end_t,
|
||||||
|
start_dt, end_dt,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def with_dts(
|
def with_dts(
|
||||||
df: pl.DataFrame,
|
df: pl.DataFrame,
|
||||||
time_col: str = 'time',
|
time_col: str = 'time',
|
||||||
|
|
Loading…
Reference in New Issue