Factor gap annotating into new `markup_gaps()`

Since we definitely want to markup gaps that are both data-errors and
just plain old venue closures (time gaps), generalize the `gaps:
pl.DataFrame` loop in a func and call it from the `ldshm` cmd for now.

Some other tweaks to `store ldshm`:
- add `np.median()` failover when detecting (ohlc) ts sample rate.
- use new `tsp.dedupe()` signature.
- differentiate between sample-period size gaps and venue closure gaps
  by calling `tsp.detect_time_gaps()` with diff size thresholds.
- add todo for backfilling null-segs when detected.
distribute_dis
Tyler Goodlet 2024-01-04 11:01:21 -05:00
parent 05f874001a
commit 6959429af8
1 changed files with 197 additions and 94 deletions

View File

@ -24,8 +24,13 @@ from __future__ import annotations
# AsyncExitStack, # AsyncExitStack,
# ) # )
from pathlib import Path from pathlib import Path
from math import copysign
import time import time
from types import ModuleType from types import ModuleType
from typing import (
Any,
TYPE_CHECKING,
)
import polars as pl import polars as pl
import numpy as np import numpy as np
@ -42,15 +47,17 @@ from piker.data import (
ShmArray, ShmArray,
) )
from piker import tsp from piker import tsp
from . import ( from piker.data._formatters import BGM
log, from . import log
)
from . import ( from . import (
__tsdbs__, __tsdbs__,
open_storage_client, open_storage_client,
StorageClient, StorageClient,
) )
if TYPE_CHECKING:
from piker.ui._remote_ctl import AnnotCtl
store = typer.Typer() store = typer.Typer()
@ -203,10 +210,12 @@ def anal(
deduped: pl.DataFrame # deduplicated dts deduped: pl.DataFrame # deduplicated dts
( (
df, df,
gaps,
deduped, deduped,
diff, diff,
) = tsp.dedupe(shm_df) ) = tsp.dedupe(
shm_df,
period=period,
)
write_edits: bool = True write_edits: bool = True
if ( if (
@ -217,7 +226,6 @@ def anal(
) )
): ):
await tractor.pause() await tractor.pause()
await client.write_ohlcv( await client.write_ohlcv(
fqme, fqme,
ohlcv=deduped, ohlcv=deduped,
@ -229,10 +237,117 @@ def anal(
# is there something more minimal but nearly as # is there something more minimal but nearly as
# functional as ipython? # functional as ipython?
await tractor.pause() await tractor.pause()
assert not null_segs
trio.run(main) trio.run(main)
async def markup_gaps(
fqme: str,
timeframe: float,
actl: AnnotCtl,
wdts: pl.DataFrame,
gaps: pl.DataFrame,
) -> dict[int, dict]:
'''
Remote annotate time-gaps in a dt-fielded ts (normally OHLC)
with rectangles.
'''
aids: dict[int] = {}
for i in range(gaps.height):
row: pl.DataFrame = gaps[i]
# the gap's RIGHT-most bar's OPEN value
# at that time (sample) step.
iend: int = row['index'][0]
# dt: datetime = row['dt'][0]
# dt_prev: datetime = row['dt_prev'][0]
# dt_end_t: float = dt.timestamp()
# TODO: can we eventually remove this
# once we figure out why the epoch cols
# don't match?
# TODO: FIX HOW/WHY these aren't matching
# and are instead off by 4hours (EST
# vs. UTC?!?!)
# end_t: float = row['time']
# assert (
# dt.timestamp()
# ==
# end_t
# )
# the gap's LEFT-most bar's CLOSE value
# at that time (sample) step.
prev_r: pl.DataFrame = wdts.filter(
pl.col('index') == iend - 1
)
istart: int = prev_r['index'][0]
# dt_start_t: float = dt_prev.timestamp()
# start_t: float = prev_r['time']
# assert (
# dt_start_t
# ==
# start_t
# )
# TODO: implement px-col width measure
# and ensure at least as many px-cols
# shown per rect as configured by user.
# gap_w: float = abs((iend - istart))
# if gap_w < 6:
# margin: float = 6
# iend += margin
# istart -= margin
rect_gap: float = BGM*3/8
opn: float = row['open'][0]
ro: tuple[float, float] = (
# dt_end_t,
iend + rect_gap + 1,
opn,
)
cls: float = prev_r['close'][0]
lc: tuple[float, float] = (
# dt_start_t,
istart - rect_gap, # + 1 ,
cls,
)
color: str = 'dad_blue'
diff: float = cls - opn
sgn: float = copysign(1, diff)
color: str = {
-1: 'buy_green',
1: 'sell_red',
}[sgn]
rect_kwargs: dict[str, Any] = dict(
fqme=fqme,
timeframe=timeframe,
start_pos=lc,
end_pos=ro,
color=color,
)
aid: int = await actl.add_rect(**rect_kwargs)
assert aid
aids[aid] = rect_kwargs
# tell chart to redraw all its
# graphics view layers Bo
await actl.redraw(
fqme=fqme,
timeframe=timeframe,
)
return aids
@store.command() @store.command()
def ldshm( def ldshm(
fqme: str, fqme: str,
@ -249,7 +364,6 @@ def ldshm(
async def main(): async def main():
from piker.ui._remote_ctl import ( from piker.ui._remote_ctl import (
open_annot_ctl, open_annot_ctl,
AnnotCtl,
) )
actl: AnnotCtl actl: AnnotCtl
mod: ModuleType mod: ModuleType
@ -274,111 +388,97 @@ def ldshm(
shm_df, shm_df,
) in tsp.iter_dfs_from_shms(fqme): ) in tsp.iter_dfs_from_shms(fqme):
# compute ohlc properties for naming
times: np.ndarray = shm.array['time'] times: np.ndarray = shm.array['time']
period_s: float = float(times[-1] - times[-2]) d1: float = float(times[-1] - times[-2])
if period_s < 1.: d2: float = float(times[-2] - times[-3])
med: float = np.median(np.diff(times))
if (
d1 < 1.
and d2 < 1.
and med < 1.
):
raise ValueError( raise ValueError(
f'Something is wrong with time period for {shm}:\n{times}' f'Something is wrong with time period for {shm}:\n{times}'
) )
period_s: float = float(max(d1, d2, med))
# over-write back to shm? # over-write back to shm?
df: pl.DataFrame # with dts wdts: pl.DataFrame # with dts
deduped: pl.DataFrame # deduplicated dts deduped: pl.DataFrame # deduplicated dts
( (
df, wdts,
gaps,
deduped, deduped,
diff, diff,
) = tsp.dedupe(shm_df) ) = tsp.dedupe(
shm_df,
period=period_s,
)
null_segs: tuple = tsp.get_null_segs( null_segs: tuple = tsp.get_null_segs(
frame=shm.array, frame=shm.array,
period=period_s, period=period_s,
) )
needs_correction: bool = ( # detect gaps from in expected (uniform OHLC) sample period
not gaps.is_empty() step_gaps: pl.DataFrame = tsp.detect_time_gaps(
or null_segs wdts,
expect_period=period_s,
) )
# TODO: maybe only optionally enter this depending
# on some CLI flags and/or gap detection?
if needs_correction:
for i in range(gaps.height):
row: pl.DataFrame = gaps[i]
# TODO: can we eventually remove this # TODO: by default we always want to mark these up
# once we figure out why the epoch cols # with rects showing up/down gaps Bo
# don't match? venue_gaps: pl.DataFrame = tsp.detect_time_gaps(
iend: int = row['index'][0] wdts,
# dt: datetime = row['dt'][0] expect_period=period_s,
# dt_prev: datetime = row['dt_prev'][0]
# the gap's right-most bar's OPEN value # TODO: actually pull the exact duration
# at that time (sample) step. # expected for each venue operational period?
# dt_end_t: float = dt.timestamp() gap_dt_unit='days',
gap_thresh=1,
)
# TODO: FIX HOW/WHY these aren't matching # TODO: call null-seg fixer somehow?
# and are instead off by 4hours (EST if null_segs:
# vs. UTC?!?!) await tractor.pause()
# end_t: float = row['time'] # async with (
# assert ( # trio.open_nursery() as tn,
# dt.timestamp() # mod.open_history_client(
# == # mkt,
# end_t # ) as (get_hist, config),
# ) # ):
# nulls_detected: trio.Event = await tn.start(partial(
# tsp.maybe_fill_null_segments,
# the gap's left-most bar's CLOSE value # shm=shm,
# at that time (sample) step. # timeframe=timeframe,
prev_r: pl.DataFrame = df.filter( # get_hist=get_hist,
pl.col('index') == iend - 1 # sampler_stream=sampler_stream,
# mkt=mkt,
# ))
# TODO: find the disjoint set of step gaps from
# venue (closure) set!
# -[ ] do a set diff by checking for the unique
# gap set only in the step_gaps?
if (
not venue_gaps.is_empty()
# and not step_gaps.is_empty()
):
do_markup_gaps: bool = True
if do_markup_gaps:
aids: dict = await markup_gaps(
fqme,
period_s,
actl,
wdts,
step_gaps,
) )
istart: int = prev_r['index'][0] assert aids
# dt_start_t: float = dt_prev.timestamp()
# start_t: float = prev_r['time'] # write repaired ts to parquet-file?
# assert ( if write_parquet:
# dt_start_t start: float = time.time()
# ==
# start_t
# )
# TODO: implement px-col width measure
# and ensure at least as many px-cols
# shown per rect as configured by user.
gap_w: float = abs((iend - istart))
if gap_w < 6:
margin: float = 6
iend += margin
istart -= margin
ro: tuple[float, float] = (
# dt_end_t,
iend,
row['open'][0],
)
lc: tuple[float, float] = (
# dt_start_t,
istart,
prev_r['close'][0],
)
# async with actl.open_rect(
# ) as aid:
aid: int = await actl.add_rect(
fqme=fqme,
timeframe=period_s,
start_pos=lc,
end_pos=ro,
)
assert aid
# write to parquet file?
if (
write_parquet
):
# write to fs
start = time.time()
path: Path = await client.write_ohlcv( path: Path = await client.write_ohlcv(
fqme, fqme,
ohlcv=deduped, ohlcv=deduped,
@ -390,7 +490,7 @@ def ldshm(
) )
# read back from fs # read back from fs
start = time.time() start: float = time.time()
read_df: pl.DataFrame = pl.read_parquet(path) read_df: pl.DataFrame = pl.read_parquet(path)
read_delay: float = round( read_delay: float = round(
time.time() - start, time.time() - start,
@ -412,6 +512,8 @@ def ldshm(
shm._array.setflags( shm._array.setflags(
write=int(1), write=int(1),
) )
# last chance manual overwrites in REPL
await tractor.pause()
shm.push( shm.push(
new, new,
prepend=True, prepend=True,
@ -419,8 +521,6 @@ def ldshm(
update_first=False, # don't update ._first update_first=False, # don't update ._first
) )
await tractor.pause()
assert diff
else: else:
# allow interaction even when no ts problems. # allow interaction even when no ts problems.
@ -428,8 +528,11 @@ def ldshm(
assert not diff assert not diff
if df is None: if shm_df is None:
log.error(f'No matching shm buffers for {fqme} ?') log.error(
f'No matching shm buffers for {fqme} ?'
)
trio.run(main) trio.run(main)