Oof, fix no-tsdb-entry since needs full backfill case!
Got borked by the logic re-factoring to get more conc going around tsdb vs. latest frame loads with nested nurseries. So, repair all that such that we can still backfill symbols previously not loaded as well as drop all the `_FeedBus` instance passing to subtasks where it's definitely not needed. Toss in a pause point around sampler stream `'backfilling'` msgs as well since there's seems to be a weird ctx-cancelled propagation going on when a feed client disconnects during backfill and this might be where the src `tractor.ContextCancelled` is getting bubbled from?distribute_dis
parent
b064a5f94d
commit
61e52213b2
|
@ -269,16 +269,20 @@ async def maybe_fill_null_segments(
|
||||||
# - remember that in the display side, only refersh this
|
# - remember that in the display side, only refersh this
|
||||||
# if the respective history is actually "in view".
|
# if the respective history is actually "in view".
|
||||||
# loop
|
# loop
|
||||||
await sampler_stream.send({
|
try:
|
||||||
'broadcast_all': {
|
await sampler_stream.send({
|
||||||
|
'broadcast_all': {
|
||||||
|
|
||||||
# XXX NOTE XXX: see the
|
# XXX NOTE XXX: see the
|
||||||
# `.ui._display.increment_history_view()` if block
|
# `.ui._display.increment_history_view()` if block
|
||||||
# that looks for this info to FORCE a hard viz
|
# that looks for this info to FORCE a hard viz
|
||||||
# redraw!
|
# redraw!
|
||||||
'backfilling': (mkt.fqme, timeframe),
|
'backfilling': (mkt.fqme, timeframe),
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
except tractor.ContextCancelled:
|
||||||
|
# log.exception
|
||||||
|
await tractor.pause()
|
||||||
|
|
||||||
null_segs_detected.set()
|
null_segs_detected.set()
|
||||||
# RECHECK for more null-gaps
|
# RECHECK for more null-gaps
|
||||||
|
@ -354,7 +358,6 @@ async def maybe_fill_null_segments(
|
||||||
|
|
||||||
|
|
||||||
async def start_backfill(
|
async def start_backfill(
|
||||||
tn: trio.Nursery,
|
|
||||||
get_hist,
|
get_hist,
|
||||||
mod: ModuleType,
|
mod: ModuleType,
|
||||||
mkt: MktPair,
|
mkt: MktPair,
|
||||||
|
@ -408,7 +411,6 @@ async def start_backfill(
|
||||||
# settings above when the tsdb is detected as being empty.
|
# settings above when the tsdb is detected as being empty.
|
||||||
backfill_until_dt = backfill_from_dt.subtract(**period_duration)
|
backfill_until_dt = backfill_from_dt.subtract(**period_duration)
|
||||||
|
|
||||||
|
|
||||||
# STAGE NOTE: "backward history gap filling":
|
# STAGE NOTE: "backward history gap filling":
|
||||||
# - we push to the shm buffer until we have history back
|
# - we push to the shm buffer until we have history back
|
||||||
# until the latest entry loaded from the tsdb's table B)
|
# until the latest entry loaded from the tsdb's table B)
|
||||||
|
@ -752,6 +754,8 @@ async def back_load_from_tsdb(
|
||||||
|
|
||||||
|
|
||||||
async def push_latest_frame(
|
async def push_latest_frame(
|
||||||
|
# box-type only that should get packed with the datetime
|
||||||
|
# objects received for the latest history frame
|
||||||
dt_eps: list[DateTime, DateTime],
|
dt_eps: list[DateTime, DateTime],
|
||||||
shm: ShmArray,
|
shm: ShmArray,
|
||||||
get_hist: Callable[
|
get_hist: Callable[
|
||||||
|
@ -761,8 +765,11 @@ async def push_latest_frame(
|
||||||
timeframe: float,
|
timeframe: float,
|
||||||
config: dict,
|
config: dict,
|
||||||
|
|
||||||
task_status: TaskStatus[None] = trio.TASK_STATUS_IGNORED,
|
task_status: TaskStatus[
|
||||||
):
|
Exception | list[datetime, datetime]
|
||||||
|
] = trio.TASK_STATUS_IGNORED,
|
||||||
|
|
||||||
|
) -> list[datetime, datetime] | None:
|
||||||
# get latest query's worth of history all the way
|
# get latest query's worth of history all the way
|
||||||
# back to what is recorded in the tsdb
|
# back to what is recorded in the tsdb
|
||||||
try:
|
try:
|
||||||
|
@ -779,17 +786,19 @@ async def push_latest_frame(
|
||||||
mr_start_dt,
|
mr_start_dt,
|
||||||
mr_end_dt,
|
mr_end_dt,
|
||||||
])
|
])
|
||||||
|
task_status.started(dt_eps)
|
||||||
|
|
||||||
# XXX: timeframe not supported for backend (since
|
# XXX: timeframe not supported for backend (since
|
||||||
# above exception type), terminate immediately since
|
# above exception type), terminate immediately since
|
||||||
# there's no backfilling possible.
|
# there's no backfilling possible.
|
||||||
except DataUnavailable:
|
except DataUnavailable:
|
||||||
task_status.started()
|
task_status.started(None)
|
||||||
|
|
||||||
if timeframe > 1:
|
if timeframe > 1:
|
||||||
await tractor.pause()
|
await tractor.pause()
|
||||||
|
|
||||||
return
|
# prolly tf not supported
|
||||||
|
return None
|
||||||
|
|
||||||
# NOTE: on the first history, most recent history
|
# NOTE: on the first history, most recent history
|
||||||
# frame we PREPEND from the current shm ._last index
|
# frame we PREPEND from the current shm ._last index
|
||||||
|
@ -801,11 +810,16 @@ async def push_latest_frame(
|
||||||
prepend=True, # append on first frame
|
prepend=True, # append on first frame
|
||||||
)
|
)
|
||||||
|
|
||||||
|
return dt_eps
|
||||||
|
|
||||||
|
|
||||||
async def load_tsdb_hist(
|
async def load_tsdb_hist(
|
||||||
storage: StorageClient,
|
storage: StorageClient,
|
||||||
mkt: MktPair,
|
mkt: MktPair,
|
||||||
timeframe: float,
|
timeframe: float,
|
||||||
|
|
||||||
|
task_status: TaskStatus[None] = trio.TASK_STATUS_IGNORED,
|
||||||
|
|
||||||
) -> tuple[
|
) -> tuple[
|
||||||
np.ndarray,
|
np.ndarray,
|
||||||
DateTime,
|
DateTime,
|
||||||
|
@ -909,25 +923,36 @@ async def tsdb_backfill(
|
||||||
# mem!
|
# mem!
|
||||||
backfill_gap_from_shm_index: int = shm._first.value + 1
|
backfill_gap_from_shm_index: int = shm._first.value + 1
|
||||||
|
|
||||||
# Prepend any tsdb history to the shm buffer which should
|
# Prepend any tsdb history into the rt-shm-buffer which
|
||||||
# now be full of the most recent history pulled from the
|
# should NOW be getting filled with the most recent history
|
||||||
# backend's last frame.
|
# pulled from the data-backend.
|
||||||
if (
|
if dt_eps:
|
||||||
dt_eps
|
# well then, unpack the latest (gap) backfilled frame dts
|
||||||
and tsdb_entry
|
|
||||||
):
|
|
||||||
# unpack both the latest (gap) backfilled frame dts
|
|
||||||
(
|
(
|
||||||
mr_start_dt,
|
mr_start_dt,
|
||||||
mr_end_dt,
|
mr_end_dt,
|
||||||
) = dt_eps
|
) = dt_eps
|
||||||
|
|
||||||
# AND the tsdb history from (offline) storage)
|
# NOTE: when there's no offline data, there's 2 cases:
|
||||||
(
|
# - data backend doesn't support timeframe/sample
|
||||||
tsdb_history,
|
# period (in which case `dt_eps` should be `None` and
|
||||||
first_tsdb_dt,
|
# we shouldn't be here!), or
|
||||||
last_tsdb_dt,
|
# - no prior history has been stored (yet) and we need
|
||||||
) = tsdb_entry
|
# todo full backfill of the history now.
|
||||||
|
if tsdb_entry is None:
|
||||||
|
# indicate to backfill task to fill the whole
|
||||||
|
# shm buffer as much as it can!
|
||||||
|
last_tsdb_dt = None
|
||||||
|
|
||||||
|
# there's existing tsdb history from (offline) storage
|
||||||
|
# so only backfill the gap between the
|
||||||
|
# most-recent-frame (mrf) and that latest sample.
|
||||||
|
else:
|
||||||
|
(
|
||||||
|
tsdb_history,
|
||||||
|
first_tsdb_dt,
|
||||||
|
last_tsdb_dt,
|
||||||
|
) = tsdb_entry
|
||||||
|
|
||||||
# if there is a gap to backfill from the first
|
# if there is a gap to backfill from the first
|
||||||
# history frame until the last datum loaded from the tsdb
|
# history frame until the last datum loaded from the tsdb
|
||||||
|
@ -937,7 +962,6 @@ async def tsdb_backfill(
|
||||||
bf_done = await tn.start(
|
bf_done = await tn.start(
|
||||||
partial(
|
partial(
|
||||||
start_backfill,
|
start_backfill,
|
||||||
tn=tn,
|
|
||||||
get_hist=get_hist,
|
get_hist=get_hist,
|
||||||
mod=mod,
|
mod=mod,
|
||||||
mkt=mkt,
|
mkt=mkt,
|
||||||
|
@ -954,95 +978,98 @@ async def tsdb_backfill(
|
||||||
write_tsdb=True,
|
write_tsdb=True,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
nulls_detected: trio.Event | None = None
|
||||||
|
if last_tsdb_dt is not None:
|
||||||
|
# calc the index from which the tsdb data should be
|
||||||
|
# prepended, presuming there is a gap between the
|
||||||
|
# latest frame (loaded/read above) and the latest
|
||||||
|
# sample loaded from the tsdb.
|
||||||
|
backfill_diff: Duration = mr_start_dt - last_tsdb_dt
|
||||||
|
offset_s: float = backfill_diff.in_seconds()
|
||||||
|
|
||||||
# calc the index from which the tsdb data should be
|
# XXX EDGE CASEs: the most recent frame overlaps with
|
||||||
# prepended, presuming there is a gap between the
|
# prior tsdb history!!
|
||||||
# latest frame (loaded/read above) and the latest
|
# - so the latest frame's start time is earlier then
|
||||||
# sample loaded from the tsdb.
|
# the tsdb's latest sample.
|
||||||
backfill_diff: Duration = mr_start_dt - last_tsdb_dt
|
# - alternatively this may also more generally occur
|
||||||
offset_s: float = backfill_diff.in_seconds()
|
# when the venue was closed (say over the weeknd)
|
||||||
|
# causing a timeseries gap, AND the query frames size
|
||||||
|
# (eg. for ib's 1s we rx 2k datums ~= 33.33m) IS
|
||||||
|
# GREATER THAN the current venue-market's operating
|
||||||
|
# session (time) we will receive datums from BEFORE THE
|
||||||
|
# CLOSURE GAP and thus the `offset_s` value will be
|
||||||
|
# NEGATIVE! In this case we need to ensure we don't try
|
||||||
|
# to push datums that have already been recorded in the
|
||||||
|
# tsdb. In this case we instead only retreive and push
|
||||||
|
# the series portion missing from the db's data set.
|
||||||
|
# if offset_s < 0:
|
||||||
|
# non_overlap_diff: Duration = mr_end_dt - last_tsdb_dt
|
||||||
|
# non_overlap_offset_s: float = backfill_diff.in_seconds()
|
||||||
|
|
||||||
# XXX EDGE CASEs: the most recent frame overlaps with
|
offset_samples: int = round(offset_s / timeframe)
|
||||||
# prior tsdb history!!
|
|
||||||
# - so the latest frame's start time is earlier then
|
|
||||||
# the tsdb's latest sample.
|
|
||||||
# - alternatively this may also more generally occur
|
|
||||||
# when the venue was closed (say over the weeknd)
|
|
||||||
# causing a timeseries gap, AND the query frames size
|
|
||||||
# (eg. for ib's 1s we rx 2k datums ~= 33.33m) IS
|
|
||||||
# GREATER THAN the current venue-market's operating
|
|
||||||
# session (time) we will receive datums from BEFORE THE
|
|
||||||
# CLOSURE GAP and thus the `offset_s` value will be
|
|
||||||
# NEGATIVE! In this case we need to ensure we don't try
|
|
||||||
# to push datums that have already been recorded in the
|
|
||||||
# tsdb. In this case we instead only retreive and push
|
|
||||||
# the series portion missing from the db's data set.
|
|
||||||
# if offset_s < 0:
|
|
||||||
# non_overlap_diff: Duration = mr_end_dt - last_tsdb_dt
|
|
||||||
# non_overlap_offset_s: float = backfill_diff.in_seconds()
|
|
||||||
|
|
||||||
offset_samples: int = round(offset_s / timeframe)
|
# TODO: see if there's faster multi-field reads:
|
||||||
|
# https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields
|
||||||
|
# re-index with a `time` and index field
|
||||||
|
if offset_s > 0:
|
||||||
|
# NOTE XXX: ONLY when there is an actual gap
|
||||||
|
# between the earliest sample in the latest history
|
||||||
|
# frame do we want to NOT stick the latest tsdb
|
||||||
|
# history adjacent to that latest frame!
|
||||||
|
prepend_start = shm._first.value - offset_samples + 1
|
||||||
|
to_push = tsdb_history[-prepend_start:]
|
||||||
|
else:
|
||||||
|
# when there is overlap we want to remove the
|
||||||
|
# overlapping samples from the tsdb portion (taking
|
||||||
|
# instead the latest frame's values since THEY
|
||||||
|
# SHOULD BE THE SAME) and prepend DIRECTLY adjacent
|
||||||
|
# to the latest frame!
|
||||||
|
# TODO: assert the overlap segment array contains
|
||||||
|
# the same values!?!
|
||||||
|
prepend_start = shm._first.value
|
||||||
|
to_push = tsdb_history[-(shm._first.value):offset_samples - 1]
|
||||||
|
|
||||||
# TODO: see if there's faster multi-field reads:
|
# tsdb history is so far in the past we can't fit it in
|
||||||
# https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields
|
# shm buffer space so simply don't load it!
|
||||||
# re-index with a `time` and index field
|
if prepend_start > 0:
|
||||||
if offset_s > 0:
|
shm.push(
|
||||||
# NOTE XXX: ONLY when there is an actual gap
|
to_push,
|
||||||
# between the earliest sample in the latest history
|
|
||||||
# frame do we want to NOT stick the latest tsdb
|
|
||||||
# history adjacent to that latest frame!
|
|
||||||
prepend_start = shm._first.value - offset_samples + 1
|
|
||||||
to_push = tsdb_history[-prepend_start:]
|
|
||||||
else:
|
|
||||||
# when there is overlap we want to remove the
|
|
||||||
# overlapping samples from the tsdb portion (taking
|
|
||||||
# instead the latest frame's values since THEY
|
|
||||||
# SHOULD BE THE SAME) and prepend DIRECTLY adjacent
|
|
||||||
# to the latest frame!
|
|
||||||
# TODO: assert the overlap segment array contains
|
|
||||||
# the same values!?!
|
|
||||||
prepend_start = shm._first.value
|
|
||||||
to_push = tsdb_history[-(shm._first.value):offset_samples - 1]
|
|
||||||
|
|
||||||
# tsdb history is so far in the past we can't fit it in
|
# insert the history pre a "days worth" of samples
|
||||||
# shm buffer space so simply don't load it!
|
# to leave some real-time buffer space at the end.
|
||||||
if prepend_start > 0:
|
prepend=True,
|
||||||
shm.push(
|
# update_first=False,
|
||||||
to_push,
|
start=prepend_start,
|
||||||
|
field_map=storemod.ohlc_key_map,
|
||||||
|
)
|
||||||
|
|
||||||
# insert the history pre a "days worth" of samples
|
log.info(f'Loaded {to_push.shape} datums from storage')
|
||||||
# to leave some real-time buffer space at the end.
|
|
||||||
prepend=True,
|
|
||||||
# update_first=False,
|
|
||||||
start=prepend_start,
|
|
||||||
field_map=storemod.ohlc_key_map,
|
|
||||||
)
|
|
||||||
|
|
||||||
log.info(f'Loaded {to_push.shape} datums from storage')
|
# NOTE: ASYNC-conduct tsdb timestamp gap detection and backfill any
|
||||||
|
# seemingly missing (null-time) segments..
|
||||||
|
# TODO: ideally these can never exist!
|
||||||
|
# -[ ] somehow it seems sometimes we're writing zero-ed
|
||||||
|
# segments to tsdbs during teardown?
|
||||||
|
# -[ ] can we ensure that the backcfiller tasks do this
|
||||||
|
# work PREVENTAVELY instead?
|
||||||
|
# -[ ] fill in non-zero epoch time values ALWAYS!
|
||||||
|
# await maybe_fill_null_segments(
|
||||||
|
nulls_detected: trio.Event = await tn.start(partial(
|
||||||
|
maybe_fill_null_segments,
|
||||||
|
|
||||||
# NOTE: ASYNC-conduct tsdb timestamp gap detection and backfill any
|
shm=shm,
|
||||||
# seemingly missing (null-time) segments..
|
timeframe=timeframe,
|
||||||
# TODO: ideally these can never exist!
|
get_hist=get_hist,
|
||||||
# -[ ] somehow it seems sometimes we're writing zero-ed
|
sampler_stream=sampler_stream,
|
||||||
# segments to tsdbs during teardown?
|
mkt=mkt,
|
||||||
# -[ ] can we ensure that the backcfiller tasks do this
|
))
|
||||||
# work PREVENTAVELY instead?
|
|
||||||
# -[ ] fill in non-zero epoch time values ALWAYS!
|
|
||||||
# await maybe_fill_null_segments(
|
|
||||||
nulls_detected: trio.Event = await tn.start(partial(
|
|
||||||
maybe_fill_null_segments,
|
|
||||||
|
|
||||||
shm=shm,
|
|
||||||
timeframe=timeframe,
|
|
||||||
get_hist=get_hist,
|
|
||||||
sampler_stream=sampler_stream,
|
|
||||||
mkt=mkt,
|
|
||||||
))
|
|
||||||
|
|
||||||
# 2nd nursery END
|
# 2nd nursery END
|
||||||
|
|
||||||
# TODO: who would want to?
|
# TODO: who would want to?
|
||||||
await nulls_detected.wait()
|
if nulls_detected:
|
||||||
|
await nulls_detected.wait()
|
||||||
|
|
||||||
await bf_done.wait()
|
await bf_done.wait()
|
||||||
# TODO: maybe start history anal and load missing "history
|
# TODO: maybe start history anal and load missing "history
|
||||||
# gaps" via backend..
|
# gaps" via backend..
|
||||||
|
@ -1087,7 +1114,6 @@ async def tsdb_backfill(
|
||||||
|
|
||||||
async def manage_history(
|
async def manage_history(
|
||||||
mod: ModuleType,
|
mod: ModuleType,
|
||||||
bus: _FeedsBus,
|
|
||||||
mkt: MktPair,
|
mkt: MktPair,
|
||||||
some_data_ready: trio.Event,
|
some_data_ready: trio.Event,
|
||||||
feed_is_live: trio.Event,
|
feed_is_live: trio.Event,
|
||||||
|
@ -1244,7 +1270,6 @@ async def manage_history(
|
||||||
tsdb_backfill,
|
tsdb_backfill,
|
||||||
mod=mod,
|
mod=mod,
|
||||||
storemod=storemod,
|
storemod=storemod,
|
||||||
# bus,
|
|
||||||
storage=client,
|
storage=client,
|
||||||
mkt=mkt,
|
mkt=mkt,
|
||||||
shm=tf2mem[timeframe],
|
shm=tf2mem[timeframe],
|
||||||
|
@ -1337,5 +1362,3 @@ def iter_dfs_from_shms(
|
||||||
shm,
|
shm,
|
||||||
df,
|
df,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -526,7 +526,7 @@ def with_dts(
|
||||||
pl.from_epoch(pl.col(time_col)).alias('dt'),
|
pl.from_epoch(pl.col(time_col)).alias('dt'),
|
||||||
]).with_columns([
|
]).with_columns([
|
||||||
pl.from_epoch(
|
pl.from_epoch(
|
||||||
pl.col(f'{time_col}_prev')
|
column=pl.col(f'{time_col}_prev'),
|
||||||
).alias('dt_prev'),
|
).alias('dt_prev'),
|
||||||
pl.col('dt').diff().alias('dt_diff'),
|
pl.col('dt').diff().alias('dt_diff'),
|
||||||
]) #.with_columns(
|
]) #.with_columns(
|
||||||
|
|
Loading…
Reference in New Issue