Create `piker.tsp` "time series processing" subpkg
Move `.data.history` -> `.tsp.__init__.py` for now as main pkg-mod and `.data.tsp` -> `.tsp._anal` (for analysis). Obviously follow commits will change surrounding codebase (imports) to match..distribute_dis
parent
d5d68f75ea
commit
4568c55f17
|
@ -32,6 +32,7 @@ from __future__ import annotations
|
|||
from datetime import datetime
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from pprint import pformat
|
||||
from types import ModuleType
|
||||
from typing import (
|
||||
Callable,
|
||||
|
@ -53,25 +54,64 @@ import polars as pl
|
|||
from ..accounting import (
|
||||
MktPair,
|
||||
)
|
||||
from ._util import (
|
||||
from ..data._util import (
|
||||
log,
|
||||
)
|
||||
from ._sharedmem import (
|
||||
from ..data._sharedmem import (
|
||||
maybe_open_shm_array,
|
||||
ShmArray,
|
||||
)
|
||||
from ._source import def_iohlcv_fields
|
||||
from ._sampling import (
|
||||
from ..data._source import def_iohlcv_fields
|
||||
from ..data._sampling import (
|
||||
open_sample_stream,
|
||||
)
|
||||
from .tsp import (
|
||||
dedupe,
|
||||
from ._anal import (
|
||||
|
||||
get_null_segs,
|
||||
iter_null_segs,
|
||||
sort_diff,
|
||||
Frame,
|
||||
# Seq,
|
||||
Seq,
|
||||
|
||||
# codec-ish
|
||||
np2pl,
|
||||
pl2np,
|
||||
|
||||
# `numpy` only
|
||||
slice_from_time,
|
||||
|
||||
# `polars` specific
|
||||
dedupe,
|
||||
with_dts,
|
||||
detect_time_gaps,
|
||||
sort_diff,
|
||||
|
||||
# TODO:
|
||||
detect_price_gaps
|
||||
)
|
||||
|
||||
__all__: list[str] = [
|
||||
'dedupe',
|
||||
'get_null_segs',
|
||||
'iter_null_segs',
|
||||
'sort_diff',
|
||||
'slice_from_time',
|
||||
'Frame',
|
||||
'Seq',
|
||||
|
||||
'np2pl',
|
||||
'pl2np',
|
||||
|
||||
'slice_from_time',
|
||||
|
||||
'with_dts',
|
||||
'detect_time_gaps',
|
||||
'sort_diff',
|
||||
|
||||
# TODO:
|
||||
'detect_price_gaps'
|
||||
]
|
||||
|
||||
# TODO: break up all this shite into submods!
|
||||
from ..brokers._util import (
|
||||
DataUnavailable,
|
||||
)
|
||||
|
@ -252,21 +292,54 @@ async def maybe_fill_null_segments(
|
|||
and
|
||||
len(null_segs[-1])
|
||||
):
|
||||
await tractor.pause()
|
||||
(
|
||||
iabs_slices,
|
||||
iabs_zero_rows,
|
||||
zero_t,
|
||||
) = null_segs
|
||||
log.warning(
|
||||
f'{len(iabs_slices)} NULL TIME SEGMENTS DETECTED!\n'
|
||||
f'{pformat(iabs_slices)}'
|
||||
)
|
||||
|
||||
array = shm.array
|
||||
zeros = array[array['low'] == 0]
|
||||
|
||||
# always backfill gaps with the earliest (price) datum's
|
||||
# TODO: always backfill gaps with the earliest (price) datum's
|
||||
# value to avoid the y-ranger including zeros and completely
|
||||
# stretching the y-axis..
|
||||
if 0 < zeros.size:
|
||||
zeros[[
|
||||
# array: np.ndarray = shm.array
|
||||
# zeros = array[array['low'] == 0]
|
||||
ohlc_fields: list[str] = [
|
||||
'open',
|
||||
'high',
|
||||
'low',
|
||||
'close',
|
||||
]] = shm._array[zeros['index'][0] - 1]['close']
|
||||
]
|
||||
|
||||
for istart, istop in iabs_slices:
|
||||
|
||||
# get view into buffer for null-segment
|
||||
gap: np.ndarray = shm._array[istart:istop]
|
||||
|
||||
# copy the oldest OHLC samples forward
|
||||
gap[ohlc_fields] = shm._array[istart]['close']
|
||||
|
||||
start_t: float = shm._array[istart]['time']
|
||||
t_diff: float = (istop - istart)*timeframe
|
||||
gap['time'] = np.arange(
|
||||
start=start_t,
|
||||
stop=start_t + t_diff,
|
||||
step=timeframe,
|
||||
)
|
||||
|
||||
await sampler_stream.send({
|
||||
'broadcast_all': {
|
||||
|
||||
# XXX NOTE XXX: see the
|
||||
# `.ui._display.increment_history_view()` if block
|
||||
# that looks for this info to FORCE a hard viz
|
||||
# redraw!
|
||||
'backfilling': (mkt.fqme, timeframe),
|
||||
},
|
||||
})
|
||||
|
||||
# TODO: interatively step through any remaining
|
||||
# time-gaps/null-segments and spawn piecewise backfiller
|
||||
|
@ -277,10 +350,7 @@ async def maybe_fill_null_segments(
|
|||
# parallel possible no matter the backend?
|
||||
# -[ ] fill algo: do queries in alternating "latest, then
|
||||
# earliest, then latest.. etc?"
|
||||
# if (
|
||||
# next_end_dt not in frame[
|
||||
# ):
|
||||
# pass
|
||||
# await tractor.pause()
|
||||
|
||||
|
||||
async def start_backfill(
|
||||
|
@ -1252,8 +1322,8 @@ def iter_dfs_from_shms(
|
|||
assert not opened
|
||||
ohlcv = shm.array
|
||||
|
||||
from ..data import tsp
|
||||
df: pl.DataFrame = tsp.np2pl(ohlcv)
|
||||
from ._anal import np2pl
|
||||
df: pl.DataFrame = np2pl(ohlcv)
|
||||
|
||||
yield (
|
||||
shmfile,
|
|
@ -319,9 +319,8 @@ def get_null_segs(
|
|||
if num_gaps < 1:
|
||||
if absi_zeros.size > 1:
|
||||
absi_zsegs = [[
|
||||
# see `get_hist()` in backend, should ALWAYS be
|
||||
# able to handle a `start_dt=None`!
|
||||
# None,
|
||||
# TODO: maybe mk these max()/min() limits func
|
||||
# consts instead of called more then once?
|
||||
max(
|
||||
absi_zeros[0] - 1,
|
||||
0,
|
||||
|
@ -359,7 +358,10 @@ def get_null_segs(
|
|||
# corresponding to the first zero-segment's row, we add it
|
||||
# manually here.
|
||||
absi_zsegs.append([
|
||||
max(
|
||||
absi_zeros[0] - 1,
|
||||
0,
|
||||
),
|
||||
None,
|
||||
])
|
||||
|
||||
|
@ -400,14 +402,18 @@ def get_null_segs(
|
|||
|
||||
else:
|
||||
if 0 < num_gaps < 2:
|
||||
absi_zsegs[-1][1] = absi_zeros[-1] + 1
|
||||
absi_zsegs[-1][1] = min(
|
||||
absi_zeros[-1] + 1,
|
||||
frame['index'][-1],
|
||||
)
|
||||
|
||||
iabs_first: int = frame['index'][0]
|
||||
for start, end in absi_zsegs:
|
||||
|
||||
ts_start: float = times[start - iabs_first]
|
||||
ts_end: float = times[end - iabs_first]
|
||||
if (
|
||||
ts_start == 0
|
||||
(ts_start == 0 and not start == 0)
|
||||
or
|
||||
ts_end == 0
|
||||
):
|
||||
|
@ -451,11 +457,13 @@ def iter_null_segs(
|
|||
],
|
||||
None,
|
||||
]:
|
||||
if null_segs is None:
|
||||
null_segs: tuple = get_null_segs(
|
||||
if not (
|
||||
null_segs := get_null_segs(
|
||||
frame,
|
||||
period=timeframe,
|
||||
)
|
||||
):
|
||||
return
|
||||
|
||||
absi_pairs_zsegs: list[list[float, float]]
|
||||
izeros: Seq
|
||||
|
@ -502,6 +510,7 @@ def iter_null_segs(
|
|||
)
|
||||
|
||||
|
||||
# TODO: move to ._pl_anal
|
||||
def with_dts(
|
||||
df: pl.DataFrame,
|
||||
time_col: str = 'time',
|
||||
|
@ -525,19 +534,6 @@ def with_dts(
|
|||
# )
|
||||
|
||||
|
||||
def dedup_dt(
|
||||
df: pl.DataFrame,
|
||||
) -> pl.DataFrame:
|
||||
'''
|
||||
Drop duplicate date-time rows (normally from an OHLC frame).
|
||||
|
||||
'''
|
||||
return df.unique(
|
||||
subset=['dt'],
|
||||
maintain_order=True,
|
||||
)
|
||||
|
||||
|
||||
t_unit: Literal = Literal[
|
||||
'days',
|
||||
'hours',
|
||||
|
@ -651,7 +647,11 @@ def dedupe(src_df: pl.DataFrame) -> tuple[
|
|||
)
|
||||
|
||||
# remove duplicated datetime samples/sections
|
||||
deduped: pl.DataFrame = dedup_dt(df)
|
||||
deduped: pl.DataFrame = df.unique(
|
||||
subset=['dt'],
|
||||
maintain_order=True,
|
||||
)
|
||||
|
||||
deduped_gaps = detect_time_gaps(deduped)
|
||||
|
||||
diff: int = (
|
Loading…
Reference in New Issue