`.data.history`: run `.tsp.dedupe()` in backloader
In an effort to catch out-of-order and/or partial-frame-duplicated segments, add some `.tsp` calls throughout the backloader tasks including a call to the new `.sort_diff()` to catch the out-of-order history cases.distribute_dis
parent
e8bf4c6e04
commit
b95932ea09
|
@ -1,18 +1,19 @@
|
||||||
# piker: trading gear for hackers
|
# piker: trading gear for hackers
|
||||||
# Copyright (C) Tyler Goodlet (in stewardship for pikers)
|
# Copyright (C) Tyler Goodlet (in stewardship for pikers)
|
||||||
|
|
||||||
# This program is free software: you can redistribute it and/or modify
|
# This program is free software: you can redistribute it and/or
|
||||||
# it under the terms of the GNU Affero General Public License as published by
|
# modify it under the terms of the GNU Affero General Public
|
||||||
# the Free Software Foundation, either version 3 of the License, or
|
# License as published by the Free Software Foundation, either
|
||||||
# (at your option) any later version.
|
# version 3 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
# This program is distributed in the hope that it will be useful,
|
# This program is distributed in the hope that it will be useful,
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
# GNU Affero General Public License for more details.
|
# Affero General Public License for more details.
|
||||||
|
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public
|
||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
# License along with this program. If not, see
|
||||||
|
# <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Historical data business logic for load, backfill and tsdb storage.
|
Historical data business logic for load, backfill and tsdb storage.
|
||||||
|
@ -39,6 +40,7 @@ from pendulum import (
|
||||||
from_timestamp,
|
from_timestamp,
|
||||||
)
|
)
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import polars as pl
|
||||||
|
|
||||||
from ..accounting import (
|
from ..accounting import (
|
||||||
MktPair,
|
MktPair,
|
||||||
|
@ -54,6 +56,7 @@ from ._source import def_iohlcv_fields
|
||||||
from ._sampling import (
|
from ._sampling import (
|
||||||
open_sample_stream,
|
open_sample_stream,
|
||||||
)
|
)
|
||||||
|
from . import tsp
|
||||||
from ..brokers._util import (
|
from ..brokers._util import (
|
||||||
DataUnavailable,
|
DataUnavailable,
|
||||||
)
|
)
|
||||||
|
@ -197,7 +200,7 @@ async def start_backfill(
|
||||||
|
|
||||||
# do a decently sized backfill and load it into storage.
|
# do a decently sized backfill and load it into storage.
|
||||||
periods = {
|
periods = {
|
||||||
1: {'days': 6},
|
1: {'days': 2},
|
||||||
60: {'years': 6},
|
60: {'years': 6},
|
||||||
}
|
}
|
||||||
period_duration: int = periods[timeframe]
|
period_duration: int = periods[timeframe]
|
||||||
|
@ -246,13 +249,16 @@ async def start_backfill(
|
||||||
# broker says there never was or is no more history to pull
|
# broker says there never was or is no more history to pull
|
||||||
except DataUnavailable:
|
except DataUnavailable:
|
||||||
log.warning(
|
log.warning(
|
||||||
f'NO-MORE-DATA: backend {mod.name} halted history!?'
|
f'NO-MORE-DATA: backend {mod.name} halted history:\n'
|
||||||
|
f'{timeframe}@{mkt.fqme}'
|
||||||
)
|
)
|
||||||
|
|
||||||
# ugh, what's a better way?
|
# ugh, what's a better way?
|
||||||
# TODO: fwiw, we probably want a way to signal a throttle
|
# TODO: fwiw, we probably want a way to signal a throttle
|
||||||
# condition (eg. with ib) so that we can halt the
|
# condition (eg. with ib) so that we can halt the
|
||||||
# request loop until the condition is resolved?
|
# request loop until the condition is resolved?
|
||||||
|
if timeframe > 1:
|
||||||
|
await tractor.pause()
|
||||||
return
|
return
|
||||||
|
|
||||||
# TODO: drop this? see todo above..
|
# TODO: drop this? see todo above..
|
||||||
|
@ -300,9 +306,11 @@ async def start_backfill(
|
||||||
array,
|
array,
|
||||||
prepend_until_dt=backfill_until_dt,
|
prepend_until_dt=backfill_until_dt,
|
||||||
)
|
)
|
||||||
ln = len(to_push)
|
ln: int = len(to_push)
|
||||||
if ln:
|
if ln:
|
||||||
log.info(f'{ln} bars for {next_start_dt} -> {last_start_dt}')
|
log.info(
|
||||||
|
f'{ln} bars for {next_start_dt} -> {last_start_dt}'
|
||||||
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.warning(
|
log.warning(
|
||||||
|
@ -388,14 +396,29 @@ async def start_backfill(
|
||||||
without_src=True,
|
without_src=True,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
col_sym_key: str = mkt.get_fqme(delim_char='')
|
col_sym_key: str = mkt.get_fqme(
|
||||||
|
delim_char='',
|
||||||
|
)
|
||||||
|
|
||||||
# TODO: implement parquet append!?
|
|
||||||
await storage.write_ohlcv(
|
await storage.write_ohlcv(
|
||||||
col_sym_key,
|
col_sym_key,
|
||||||
shm.array,
|
shm.array,
|
||||||
timeframe,
|
timeframe,
|
||||||
)
|
)
|
||||||
|
df: pl.DataFrame = await storage.as_df(
|
||||||
|
fqme=mkt.fqme,
|
||||||
|
period=timeframe,
|
||||||
|
load_from_offline=False,
|
||||||
|
)
|
||||||
|
(
|
||||||
|
df,
|
||||||
|
gaps,
|
||||||
|
deduped,
|
||||||
|
diff,
|
||||||
|
) = tsp.dedupe(df)
|
||||||
|
if diff:
|
||||||
|
tsp.sort_diff(df)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# finally filled gap
|
# finally filled gap
|
||||||
log.info(
|
log.info(
|
||||||
|
@ -634,12 +657,19 @@ async def tsdb_backfill(
|
||||||
async with mod.open_history_client(
|
async with mod.open_history_client(
|
||||||
mkt,
|
mkt,
|
||||||
) as (get_hist, config):
|
) as (get_hist, config):
|
||||||
log.info(f'{mod} history client returned backfill config: {config}')
|
log.info(
|
||||||
|
f'`{mod}` history client returned backfill config:\n'
|
||||||
|
f'{config}\n'
|
||||||
|
)
|
||||||
|
|
||||||
# get latest query's worth of history all the way
|
# get latest query's worth of history all the way
|
||||||
# back to what is recorded in the tsdb
|
# back to what is recorded in the tsdb
|
||||||
try:
|
try:
|
||||||
array, mr_start_dt, mr_end_dt = await get_hist(
|
(
|
||||||
|
array,
|
||||||
|
mr_start_dt,
|
||||||
|
mr_end_dt,
|
||||||
|
) = await get_hist(
|
||||||
timeframe,
|
timeframe,
|
||||||
end_dt=None,
|
end_dt=None,
|
||||||
)
|
)
|
||||||
|
@ -649,6 +679,7 @@ async def tsdb_backfill(
|
||||||
# there's no backfilling possible.
|
# there's no backfilling possible.
|
||||||
except DataUnavailable:
|
except DataUnavailable:
|
||||||
task_status.started()
|
task_status.started()
|
||||||
|
await tractor.pause()
|
||||||
return
|
return
|
||||||
|
|
||||||
# TODO: fill in non-zero epoch time values ALWAYS!
|
# TODO: fill in non-zero epoch time values ALWAYS!
|
||||||
|
@ -699,9 +730,8 @@ async def tsdb_backfill(
|
||||||
)
|
)
|
||||||
except TimeseriesNotFound:
|
except TimeseriesNotFound:
|
||||||
log.warning(
|
log.warning(
|
||||||
f'No timeseries yet for {fqme}'
|
f'No timeseries yet for {timeframe}@{fqme}'
|
||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
(
|
(
|
||||||
tsdb_history,
|
tsdb_history,
|
||||||
|
@ -784,25 +814,24 @@ async def tsdb_backfill(
|
||||||
f'timeframe of {timeframe} seconds..\n'
|
f'timeframe of {timeframe} seconds..\n'
|
||||||
'So yuh.. dun do dat brudder.'
|
'So yuh.. dun do dat brudder.'
|
||||||
)
|
)
|
||||||
|
|
||||||
# if there is a gap to backfill from the first
|
# if there is a gap to backfill from the first
|
||||||
# history frame until the last datum loaded from the tsdb
|
# history frame until the last datum loaded from the tsdb
|
||||||
# continue that now in the background
|
# continue that now in the background
|
||||||
bf_done = await tn.start(
|
bf_done = await tn.start(
|
||||||
partial(
|
partial(
|
||||||
start_backfill,
|
start_backfill,
|
||||||
get_hist,
|
get_hist=get_hist,
|
||||||
mod,
|
mod=mod,
|
||||||
mkt,
|
mkt=mkt,
|
||||||
shm,
|
shm=shm,
|
||||||
timeframe,
|
timeframe=timeframe,
|
||||||
|
|
||||||
backfill_from_shm_index=backfill_gap_from_shm_index,
|
backfill_from_shm_index=backfill_gap_from_shm_index,
|
||||||
backfill_from_dt=mr_start_dt,
|
backfill_from_dt=mr_start_dt,
|
||||||
|
|
||||||
sampler_stream=sampler_stream,
|
sampler_stream=sampler_stream,
|
||||||
|
|
||||||
backfill_until_dt=last_tsdb_dt,
|
backfill_until_dt=last_tsdb_dt,
|
||||||
storage=storage,
|
storage=storage,
|
||||||
|
write_tsdb=True,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -824,8 +853,11 @@ async def tsdb_backfill(
|
||||||
finally:
|
finally:
|
||||||
return
|
return
|
||||||
|
|
||||||
# IF we need to continue backloading incrementally from the
|
# XXX NOTE: this is legacy from when we were using
|
||||||
# tsdb client..
|
# marketstore and we needed to continue backloading
|
||||||
|
# incrementally from the tsdb client.. (bc it couldn't
|
||||||
|
# handle a single large query with gRPC for some
|
||||||
|
# reason.. classic goolag pos)
|
||||||
tn.start_soon(
|
tn.start_soon(
|
||||||
back_load_from_tsdb,
|
back_load_from_tsdb,
|
||||||
|
|
||||||
|
@ -994,19 +1026,18 @@ async def manage_history(
|
||||||
log.info(f'Connected to sampler stream: {sample_stream}')
|
log.info(f'Connected to sampler stream: {sample_stream}')
|
||||||
|
|
||||||
for timeframe in [60, 1]:
|
for timeframe in [60, 1]:
|
||||||
await tn.start(
|
await tn.start(partial(
|
||||||
tsdb_backfill,
|
tsdb_backfill,
|
||||||
mod,
|
mod=mod,
|
||||||
storemod,
|
storemod=storemod,
|
||||||
tn,
|
tn=tn,
|
||||||
# bus,
|
# bus,
|
||||||
client,
|
storage=client,
|
||||||
mkt,
|
mkt=mkt,
|
||||||
tf2mem[timeframe],
|
shm=tf2mem[timeframe],
|
||||||
timeframe,
|
timeframe=timeframe,
|
||||||
|
sampler_stream=sample_stream,
|
||||||
sample_stream,
|
))
|
||||||
)
|
|
||||||
|
|
||||||
# indicate to caller that feed can be delivered to
|
# indicate to caller that feed can be delivered to
|
||||||
# remote requesting client since we've loaded history
|
# remote requesting client since we've loaded history
|
||||||
|
|
Loading…
Reference in New Issue