Create `piker.tsp` "time series processing" subpkg
Move `.data.history` -> `.tsp.__init__.py` for now as main pkg-mod and `.data.tsp` -> `.tsp._anal` (for analysis). Obviously follow commits will change surrounding codebase (imports) to match..distribute_dis
parent
d5d68f75ea
commit
4568c55f17
|
@ -32,6 +32,7 @@ from __future__ import annotations
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from pprint import pformat
|
||||||
from types import ModuleType
|
from types import ModuleType
|
||||||
from typing import (
|
from typing import (
|
||||||
Callable,
|
Callable,
|
||||||
|
@ -53,25 +54,64 @@ import polars as pl
|
||||||
from ..accounting import (
|
from ..accounting import (
|
||||||
MktPair,
|
MktPair,
|
||||||
)
|
)
|
||||||
from ._util import (
|
from ..data._util import (
|
||||||
log,
|
log,
|
||||||
)
|
)
|
||||||
from ._sharedmem import (
|
from ..data._sharedmem import (
|
||||||
maybe_open_shm_array,
|
maybe_open_shm_array,
|
||||||
ShmArray,
|
ShmArray,
|
||||||
)
|
)
|
||||||
from ._source import def_iohlcv_fields
|
from ..data._source import def_iohlcv_fields
|
||||||
from ._sampling import (
|
from ..data._sampling import (
|
||||||
open_sample_stream,
|
open_sample_stream,
|
||||||
)
|
)
|
||||||
from .tsp import (
|
from ._anal import (
|
||||||
dedupe,
|
|
||||||
get_null_segs,
|
get_null_segs,
|
||||||
iter_null_segs,
|
iter_null_segs,
|
||||||
sort_diff,
|
|
||||||
Frame,
|
Frame,
|
||||||
# Seq,
|
Seq,
|
||||||
|
|
||||||
|
# codec-ish
|
||||||
|
np2pl,
|
||||||
|
pl2np,
|
||||||
|
|
||||||
|
# `numpy` only
|
||||||
|
slice_from_time,
|
||||||
|
|
||||||
|
# `polars` specific
|
||||||
|
dedupe,
|
||||||
|
with_dts,
|
||||||
|
detect_time_gaps,
|
||||||
|
sort_diff,
|
||||||
|
|
||||||
|
# TODO:
|
||||||
|
detect_price_gaps
|
||||||
)
|
)
|
||||||
|
|
||||||
|
__all__: list[str] = [
|
||||||
|
'dedupe',
|
||||||
|
'get_null_segs',
|
||||||
|
'iter_null_segs',
|
||||||
|
'sort_diff',
|
||||||
|
'slice_from_time',
|
||||||
|
'Frame',
|
||||||
|
'Seq',
|
||||||
|
|
||||||
|
'np2pl',
|
||||||
|
'pl2np',
|
||||||
|
|
||||||
|
'slice_from_time',
|
||||||
|
|
||||||
|
'with_dts',
|
||||||
|
'detect_time_gaps',
|
||||||
|
'sort_diff',
|
||||||
|
|
||||||
|
# TODO:
|
||||||
|
'detect_price_gaps'
|
||||||
|
]
|
||||||
|
|
||||||
|
# TODO: break up all this shite into submods!
|
||||||
from ..brokers._util import (
|
from ..brokers._util import (
|
||||||
DataUnavailable,
|
DataUnavailable,
|
||||||
)
|
)
|
||||||
|
@ -252,21 +292,54 @@ async def maybe_fill_null_segments(
|
||||||
and
|
and
|
||||||
len(null_segs[-1])
|
len(null_segs[-1])
|
||||||
):
|
):
|
||||||
await tractor.pause()
|
(
|
||||||
|
iabs_slices,
|
||||||
|
iabs_zero_rows,
|
||||||
|
zero_t,
|
||||||
|
) = null_segs
|
||||||
|
log.warning(
|
||||||
|
f'{len(iabs_slices)} NULL TIME SEGMENTS DETECTED!\n'
|
||||||
|
f'{pformat(iabs_slices)}'
|
||||||
|
)
|
||||||
|
|
||||||
array = shm.array
|
# TODO: always backfill gaps with the earliest (price) datum's
|
||||||
zeros = array[array['low'] == 0]
|
|
||||||
|
|
||||||
# always backfill gaps with the earliest (price) datum's
|
|
||||||
# value to avoid the y-ranger including zeros and completely
|
# value to avoid the y-ranger including zeros and completely
|
||||||
# stretching the y-axis..
|
# stretching the y-axis..
|
||||||
if 0 < zeros.size:
|
# array: np.ndarray = shm.array
|
||||||
zeros[[
|
# zeros = array[array['low'] == 0]
|
||||||
|
ohlc_fields: list[str] = [
|
||||||
'open',
|
'open',
|
||||||
'high',
|
'high',
|
||||||
'low',
|
'low',
|
||||||
'close',
|
'close',
|
||||||
]] = shm._array[zeros['index'][0] - 1]['close']
|
]
|
||||||
|
|
||||||
|
for istart, istop in iabs_slices:
|
||||||
|
|
||||||
|
# get view into buffer for null-segment
|
||||||
|
gap: np.ndarray = shm._array[istart:istop]
|
||||||
|
|
||||||
|
# copy the oldest OHLC samples forward
|
||||||
|
gap[ohlc_fields] = shm._array[istart]['close']
|
||||||
|
|
||||||
|
start_t: float = shm._array[istart]['time']
|
||||||
|
t_diff: float = (istop - istart)*timeframe
|
||||||
|
gap['time'] = np.arange(
|
||||||
|
start=start_t,
|
||||||
|
stop=start_t + t_diff,
|
||||||
|
step=timeframe,
|
||||||
|
)
|
||||||
|
|
||||||
|
await sampler_stream.send({
|
||||||
|
'broadcast_all': {
|
||||||
|
|
||||||
|
# XXX NOTE XXX: see the
|
||||||
|
# `.ui._display.increment_history_view()` if block
|
||||||
|
# that looks for this info to FORCE a hard viz
|
||||||
|
# redraw!
|
||||||
|
'backfilling': (mkt.fqme, timeframe),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
# TODO: interatively step through any remaining
|
# TODO: interatively step through any remaining
|
||||||
# time-gaps/null-segments and spawn piecewise backfiller
|
# time-gaps/null-segments and spawn piecewise backfiller
|
||||||
|
@ -277,10 +350,7 @@ async def maybe_fill_null_segments(
|
||||||
# parallel possible no matter the backend?
|
# parallel possible no matter the backend?
|
||||||
# -[ ] fill algo: do queries in alternating "latest, then
|
# -[ ] fill algo: do queries in alternating "latest, then
|
||||||
# earliest, then latest.. etc?"
|
# earliest, then latest.. etc?"
|
||||||
# if (
|
# await tractor.pause()
|
||||||
# next_end_dt not in frame[
|
|
||||||
# ):
|
|
||||||
# pass
|
|
||||||
|
|
||||||
|
|
||||||
async def start_backfill(
|
async def start_backfill(
|
||||||
|
@ -1252,8 +1322,8 @@ def iter_dfs_from_shms(
|
||||||
assert not opened
|
assert not opened
|
||||||
ohlcv = shm.array
|
ohlcv = shm.array
|
||||||
|
|
||||||
from ..data import tsp
|
from ._anal import np2pl
|
||||||
df: pl.DataFrame = tsp.np2pl(ohlcv)
|
df: pl.DataFrame = np2pl(ohlcv)
|
||||||
|
|
||||||
yield (
|
yield (
|
||||||
shmfile,
|
shmfile,
|
|
@ -319,9 +319,8 @@ def get_null_segs(
|
||||||
if num_gaps < 1:
|
if num_gaps < 1:
|
||||||
if absi_zeros.size > 1:
|
if absi_zeros.size > 1:
|
||||||
absi_zsegs = [[
|
absi_zsegs = [[
|
||||||
# see `get_hist()` in backend, should ALWAYS be
|
# TODO: maybe mk these max()/min() limits func
|
||||||
# able to handle a `start_dt=None`!
|
# consts instead of called more then once?
|
||||||
# None,
|
|
||||||
max(
|
max(
|
||||||
absi_zeros[0] - 1,
|
absi_zeros[0] - 1,
|
||||||
0,
|
0,
|
||||||
|
@ -359,7 +358,10 @@ def get_null_segs(
|
||||||
# corresponding to the first zero-segment's row, we add it
|
# corresponding to the first zero-segment's row, we add it
|
||||||
# manually here.
|
# manually here.
|
||||||
absi_zsegs.append([
|
absi_zsegs.append([
|
||||||
|
max(
|
||||||
absi_zeros[0] - 1,
|
absi_zeros[0] - 1,
|
||||||
|
0,
|
||||||
|
),
|
||||||
None,
|
None,
|
||||||
])
|
])
|
||||||
|
|
||||||
|
@ -400,14 +402,18 @@ def get_null_segs(
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if 0 < num_gaps < 2:
|
if 0 < num_gaps < 2:
|
||||||
absi_zsegs[-1][1] = absi_zeros[-1] + 1
|
absi_zsegs[-1][1] = min(
|
||||||
|
absi_zeros[-1] + 1,
|
||||||
|
frame['index'][-1],
|
||||||
|
)
|
||||||
|
|
||||||
iabs_first: int = frame['index'][0]
|
iabs_first: int = frame['index'][0]
|
||||||
for start, end in absi_zsegs:
|
for start, end in absi_zsegs:
|
||||||
|
|
||||||
ts_start: float = times[start - iabs_first]
|
ts_start: float = times[start - iabs_first]
|
||||||
ts_end: float = times[end - iabs_first]
|
ts_end: float = times[end - iabs_first]
|
||||||
if (
|
if (
|
||||||
ts_start == 0
|
(ts_start == 0 and not start == 0)
|
||||||
or
|
or
|
||||||
ts_end == 0
|
ts_end == 0
|
||||||
):
|
):
|
||||||
|
@ -451,11 +457,13 @@ def iter_null_segs(
|
||||||
],
|
],
|
||||||
None,
|
None,
|
||||||
]:
|
]:
|
||||||
if null_segs is None:
|
if not (
|
||||||
null_segs: tuple = get_null_segs(
|
null_segs := get_null_segs(
|
||||||
frame,
|
frame,
|
||||||
period=timeframe,
|
period=timeframe,
|
||||||
)
|
)
|
||||||
|
):
|
||||||
|
return
|
||||||
|
|
||||||
absi_pairs_zsegs: list[list[float, float]]
|
absi_pairs_zsegs: list[list[float, float]]
|
||||||
izeros: Seq
|
izeros: Seq
|
||||||
|
@ -502,6 +510,7 @@ def iter_null_segs(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: move to ._pl_anal
|
||||||
def with_dts(
|
def with_dts(
|
||||||
df: pl.DataFrame,
|
df: pl.DataFrame,
|
||||||
time_col: str = 'time',
|
time_col: str = 'time',
|
||||||
|
@ -525,19 +534,6 @@ def with_dts(
|
||||||
# )
|
# )
|
||||||
|
|
||||||
|
|
||||||
def dedup_dt(
|
|
||||||
df: pl.DataFrame,
|
|
||||||
) -> pl.DataFrame:
|
|
||||||
'''
|
|
||||||
Drop duplicate date-time rows (normally from an OHLC frame).
|
|
||||||
|
|
||||||
'''
|
|
||||||
return df.unique(
|
|
||||||
subset=['dt'],
|
|
||||||
maintain_order=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
t_unit: Literal = Literal[
|
t_unit: Literal = Literal[
|
||||||
'days',
|
'days',
|
||||||
'hours',
|
'hours',
|
||||||
|
@ -651,7 +647,11 @@ def dedupe(src_df: pl.DataFrame) -> tuple[
|
||||||
)
|
)
|
||||||
|
|
||||||
# remove duplicated datetime samples/sections
|
# remove duplicated datetime samples/sections
|
||||||
deduped: pl.DataFrame = dedup_dt(df)
|
deduped: pl.DataFrame = df.unique(
|
||||||
|
subset=['dt'],
|
||||||
|
maintain_order=True,
|
||||||
|
)
|
||||||
|
|
||||||
deduped_gaps = detect_time_gaps(deduped)
|
deduped_gaps = detect_time_gaps(deduped)
|
||||||
|
|
||||||
diff: int = (
|
diff: int = (
|
Loading…
Reference in New Issue