Always reload shm data before annotating gaps, so they line up..

distribute_dis
Tyler Goodlet 2024-01-09 15:55:16 -05:00
parent 6959429af8
commit 52b349fe79
1 changed files with 54 additions and 45 deletions

View File

@ -286,6 +286,11 @@ async def markup_gaps(
prev_r: pl.DataFrame = wdts.filter( prev_r: pl.DataFrame = wdts.filter(
pl.col('index') == iend - 1 pl.col('index') == iend - 1
) )
# XXX: probably a gap in the (newly sorted or de-duplicated)
# dt-df, so we might need to re-index first..
if prev_r.is_empty():
await tractor.pause()
istart: int = prev_r['index'][0] istart: int = prev_r['index'][0]
# dt_start_t: float = dt_prev.timestamp() # dt_start_t: float = dt_prev.timestamp()
@ -403,41 +408,11 @@ def ldshm(
period_s: float = float(max(d1, d2, med)) period_s: float = float(max(d1, d2, med))
# over-write back to shm?
wdts: pl.DataFrame # with dts
deduped: pl.DataFrame # deduplicated dts
(
wdts,
deduped,
diff,
) = tsp.dedupe(
shm_df,
period=period_s,
)
null_segs: tuple = tsp.get_null_segs( null_segs: tuple = tsp.get_null_segs(
frame=shm.array, frame=shm.array,
period=period_s, period=period_s,
) )
# detect gaps from in expected (uniform OHLC) sample period
step_gaps: pl.DataFrame = tsp.detect_time_gaps(
wdts,
expect_period=period_s,
)
# TODO: by default we always want to mark these up
# with rects showing up/down gaps Bo
venue_gaps: pl.DataFrame = tsp.detect_time_gaps(
wdts,
expect_period=period_s,
# TODO: actually pull the exact duration
# expected for each venue operational period?
gap_dt_unit='days',
gap_thresh=1,
)
# TODO: call null-seg fixer somehow? # TODO: call null-seg fixer somehow?
if null_segs: if null_segs:
await tractor.pause() await tractor.pause()
@ -457,25 +432,47 @@ def ldshm(
# mkt=mkt, # mkt=mkt,
# )) # ))
# over-write back to shm?
wdts: pl.DataFrame # with dts
deduped: pl.DataFrame # deduplicated dts
(
wdts,
deduped,
diff,
) = tsp.dedupe(
shm_df,
period=period_s,
)
# detect gaps from in expected (uniform OHLC) sample period
step_gaps: pl.DataFrame = tsp.detect_time_gaps(
deduped,
expect_period=period_s,
)
# TODO: by default we always want to mark these up
# with rects showing up/down gaps Bo
venue_gaps: pl.DataFrame = tsp.detect_time_gaps(
deduped,
expect_period=period_s,
# TODO: actually pull the exact duration
# expected for each venue operational period?
gap_dt_unit='days',
gap_thresh=1,
)
# TODO: find the disjoint set of step gaps from # TODO: find the disjoint set of step gaps from
# venue (closure) set! # venue (closure) set!
# -[ ] do a set diff by checking for the unique # -[ ] do a set diff by checking for the unique
# gap set only in the step_gaps? # gap set only in the step_gaps?
if ( if (
not venue_gaps.is_empty() not venue_gaps.is_empty()
# and not step_gaps.is_empty() or (
): period_s < 60
do_markup_gaps: bool = True and not step_gaps.is_empty()
if do_markup_gaps:
aids: dict = await markup_gaps(
fqme,
period_s,
actl,
wdts,
step_gaps,
) )
assert aids ):
# write repaired ts to parquet-file? # write repaired ts to parquet-file?
if write_parquet: if write_parquet:
start: float = time.time() start: float = time.time()
@ -512,8 +509,6 @@ def ldshm(
shm._array.setflags( shm._array.setflags(
write=int(1), write=int(1),
) )
# last chance manual overwrites in REPL
await tractor.pause()
shm.push( shm.push(
new, new,
prepend=True, prepend=True,
@ -521,11 +516,25 @@ def ldshm(
update_first=False, # don't update ._first update_first=False, # don't update ._first
) )
do_markup_gaps: bool = True
if do_markup_gaps:
new_df: pl.DataFrame = tsp.np2pl(new)
aids: dict = await markup_gaps(
fqme,
period_s,
actl,
new_df,
step_gaps,
)
# last chance manual overwrites in REPL
await tractor.pause()
assert aids
else: else:
# allow interaction even when no ts problems. # allow interaction even when no ts problems.
await tractor.pause() await tractor.pause()
assert not diff # assert not diff
if shm_df is None: if shm_df is None: