Factor gap annotating into new `markup_gaps()`
Since we definitely want to markup gaps that are both data-errors and just plain old venue closures (time gaps), generalize the `gaps: pl.DataFrame` loop in a func and call it from the `ldshm` cmd for now. Some other tweaks to `store ldshm`: - add `np.median()` failover when detecting (ohlc) ts sample rate. - use new `tsp.dedupe()` signature. - differentiate between sample-period size gaps and venue closure gaps by calling `tsp.detect_time_gaps()` with diff size thresholds. - add todo for backfilling null-segs when detected.distribute_dis
parent
05f874001a
commit
6959429af8
|
@ -24,8 +24,13 @@ from __future__ import annotations
|
||||||
# AsyncExitStack,
|
# AsyncExitStack,
|
||||||
# )
|
# )
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from math import copysign
|
||||||
import time
|
import time
|
||||||
from types import ModuleType
|
from types import ModuleType
|
||||||
|
from typing import (
|
||||||
|
Any,
|
||||||
|
TYPE_CHECKING,
|
||||||
|
)
|
||||||
|
|
||||||
import polars as pl
|
import polars as pl
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
@ -42,15 +47,17 @@ from piker.data import (
|
||||||
ShmArray,
|
ShmArray,
|
||||||
)
|
)
|
||||||
from piker import tsp
|
from piker import tsp
|
||||||
from . import (
|
from piker.data._formatters import BGM
|
||||||
log,
|
from . import log
|
||||||
)
|
|
||||||
from . import (
|
from . import (
|
||||||
__tsdbs__,
|
__tsdbs__,
|
||||||
open_storage_client,
|
open_storage_client,
|
||||||
StorageClient,
|
StorageClient,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from piker.ui._remote_ctl import AnnotCtl
|
||||||
|
|
||||||
|
|
||||||
store = typer.Typer()
|
store = typer.Typer()
|
||||||
|
|
||||||
|
@ -203,10 +210,12 @@ def anal(
|
||||||
deduped: pl.DataFrame # deduplicated dts
|
deduped: pl.DataFrame # deduplicated dts
|
||||||
(
|
(
|
||||||
df,
|
df,
|
||||||
gaps,
|
|
||||||
deduped,
|
deduped,
|
||||||
diff,
|
diff,
|
||||||
) = tsp.dedupe(shm_df)
|
) = tsp.dedupe(
|
||||||
|
shm_df,
|
||||||
|
period=period,
|
||||||
|
)
|
||||||
|
|
||||||
write_edits: bool = True
|
write_edits: bool = True
|
||||||
if (
|
if (
|
||||||
|
@ -217,7 +226,6 @@ def anal(
|
||||||
)
|
)
|
||||||
):
|
):
|
||||||
await tractor.pause()
|
await tractor.pause()
|
||||||
|
|
||||||
await client.write_ohlcv(
|
await client.write_ohlcv(
|
||||||
fqme,
|
fqme,
|
||||||
ohlcv=deduped,
|
ohlcv=deduped,
|
||||||
|
@ -229,10 +237,117 @@ def anal(
|
||||||
# is there something more minimal but nearly as
|
# is there something more minimal but nearly as
|
||||||
# functional as ipython?
|
# functional as ipython?
|
||||||
await tractor.pause()
|
await tractor.pause()
|
||||||
|
assert not null_segs
|
||||||
|
|
||||||
trio.run(main)
|
trio.run(main)
|
||||||
|
|
||||||
|
|
||||||
|
async def markup_gaps(
|
||||||
|
fqme: str,
|
||||||
|
timeframe: float,
|
||||||
|
actl: AnnotCtl,
|
||||||
|
wdts: pl.DataFrame,
|
||||||
|
gaps: pl.DataFrame,
|
||||||
|
|
||||||
|
) -> dict[int, dict]:
|
||||||
|
'''
|
||||||
|
Remote annotate time-gaps in a dt-fielded ts (normally OHLC)
|
||||||
|
with rectangles.
|
||||||
|
|
||||||
|
'''
|
||||||
|
aids: dict[int] = {}
|
||||||
|
for i in range(gaps.height):
|
||||||
|
|
||||||
|
row: pl.DataFrame = gaps[i]
|
||||||
|
|
||||||
|
# the gap's RIGHT-most bar's OPEN value
|
||||||
|
# at that time (sample) step.
|
||||||
|
iend: int = row['index'][0]
|
||||||
|
# dt: datetime = row['dt'][0]
|
||||||
|
# dt_prev: datetime = row['dt_prev'][0]
|
||||||
|
# dt_end_t: float = dt.timestamp()
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: can we eventually remove this
|
||||||
|
# once we figure out why the epoch cols
|
||||||
|
# don't match?
|
||||||
|
# TODO: FIX HOW/WHY these aren't matching
|
||||||
|
# and are instead off by 4hours (EST
|
||||||
|
# vs. UTC?!?!)
|
||||||
|
# end_t: float = row['time']
|
||||||
|
# assert (
|
||||||
|
# dt.timestamp()
|
||||||
|
# ==
|
||||||
|
# end_t
|
||||||
|
# )
|
||||||
|
|
||||||
|
# the gap's LEFT-most bar's CLOSE value
|
||||||
|
# at that time (sample) step.
|
||||||
|
prev_r: pl.DataFrame = wdts.filter(
|
||||||
|
pl.col('index') == iend - 1
|
||||||
|
)
|
||||||
|
istart: int = prev_r['index'][0]
|
||||||
|
# dt_start_t: float = dt_prev.timestamp()
|
||||||
|
|
||||||
|
# start_t: float = prev_r['time']
|
||||||
|
# assert (
|
||||||
|
# dt_start_t
|
||||||
|
# ==
|
||||||
|
# start_t
|
||||||
|
# )
|
||||||
|
|
||||||
|
# TODO: implement px-col width measure
|
||||||
|
# and ensure at least as many px-cols
|
||||||
|
# shown per rect as configured by user.
|
||||||
|
# gap_w: float = abs((iend - istart))
|
||||||
|
# if gap_w < 6:
|
||||||
|
# margin: float = 6
|
||||||
|
# iend += margin
|
||||||
|
# istart -= margin
|
||||||
|
|
||||||
|
rect_gap: float = BGM*3/8
|
||||||
|
opn: float = row['open'][0]
|
||||||
|
ro: tuple[float, float] = (
|
||||||
|
# dt_end_t,
|
||||||
|
iend + rect_gap + 1,
|
||||||
|
opn,
|
||||||
|
)
|
||||||
|
cls: float = prev_r['close'][0]
|
||||||
|
lc: tuple[float, float] = (
|
||||||
|
# dt_start_t,
|
||||||
|
istart - rect_gap, # + 1 ,
|
||||||
|
cls,
|
||||||
|
)
|
||||||
|
|
||||||
|
color: str = 'dad_blue'
|
||||||
|
diff: float = cls - opn
|
||||||
|
sgn: float = copysign(1, diff)
|
||||||
|
color: str = {
|
||||||
|
-1: 'buy_green',
|
||||||
|
1: 'sell_red',
|
||||||
|
}[sgn]
|
||||||
|
|
||||||
|
rect_kwargs: dict[str, Any] = dict(
|
||||||
|
fqme=fqme,
|
||||||
|
timeframe=timeframe,
|
||||||
|
start_pos=lc,
|
||||||
|
end_pos=ro,
|
||||||
|
color=color,
|
||||||
|
)
|
||||||
|
|
||||||
|
aid: int = await actl.add_rect(**rect_kwargs)
|
||||||
|
assert aid
|
||||||
|
aids[aid] = rect_kwargs
|
||||||
|
|
||||||
|
# tell chart to redraw all its
|
||||||
|
# graphics view layers Bo
|
||||||
|
await actl.redraw(
|
||||||
|
fqme=fqme,
|
||||||
|
timeframe=timeframe,
|
||||||
|
)
|
||||||
|
return aids
|
||||||
|
|
||||||
|
|
||||||
@store.command()
|
@store.command()
|
||||||
def ldshm(
|
def ldshm(
|
||||||
fqme: str,
|
fqme: str,
|
||||||
|
@ -249,7 +364,6 @@ def ldshm(
|
||||||
async def main():
|
async def main():
|
||||||
from piker.ui._remote_ctl import (
|
from piker.ui._remote_ctl import (
|
||||||
open_annot_ctl,
|
open_annot_ctl,
|
||||||
AnnotCtl,
|
|
||||||
)
|
)
|
||||||
actl: AnnotCtl
|
actl: AnnotCtl
|
||||||
mod: ModuleType
|
mod: ModuleType
|
||||||
|
@ -274,111 +388,97 @@ def ldshm(
|
||||||
shm_df,
|
shm_df,
|
||||||
) in tsp.iter_dfs_from_shms(fqme):
|
) in tsp.iter_dfs_from_shms(fqme):
|
||||||
|
|
||||||
# compute ohlc properties for naming
|
|
||||||
times: np.ndarray = shm.array['time']
|
times: np.ndarray = shm.array['time']
|
||||||
period_s: float = float(times[-1] - times[-2])
|
d1: float = float(times[-1] - times[-2])
|
||||||
if period_s < 1.:
|
d2: float = float(times[-2] - times[-3])
|
||||||
|
med: float = np.median(np.diff(times))
|
||||||
|
if (
|
||||||
|
d1 < 1.
|
||||||
|
and d2 < 1.
|
||||||
|
and med < 1.
|
||||||
|
):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f'Something is wrong with time period for {shm}:\n{times}'
|
f'Something is wrong with time period for {shm}:\n{times}'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
period_s: float = float(max(d1, d2, med))
|
||||||
|
|
||||||
# over-write back to shm?
|
# over-write back to shm?
|
||||||
df: pl.DataFrame # with dts
|
wdts: pl.DataFrame # with dts
|
||||||
deduped: pl.DataFrame # deduplicated dts
|
deduped: pl.DataFrame # deduplicated dts
|
||||||
(
|
(
|
||||||
df,
|
wdts,
|
||||||
gaps,
|
|
||||||
deduped,
|
deduped,
|
||||||
diff,
|
diff,
|
||||||
) = tsp.dedupe(shm_df)
|
) = tsp.dedupe(
|
||||||
|
shm_df,
|
||||||
|
period=period_s,
|
||||||
|
)
|
||||||
|
|
||||||
null_segs: tuple = tsp.get_null_segs(
|
null_segs: tuple = tsp.get_null_segs(
|
||||||
frame=shm.array,
|
frame=shm.array,
|
||||||
period=period_s,
|
period=period_s,
|
||||||
)
|
)
|
||||||
|
|
||||||
needs_correction: bool = (
|
# detect gaps from in expected (uniform OHLC) sample period
|
||||||
not gaps.is_empty()
|
step_gaps: pl.DataFrame = tsp.detect_time_gaps(
|
||||||
or null_segs
|
wdts,
|
||||||
|
expect_period=period_s,
|
||||||
)
|
)
|
||||||
# TODO: maybe only optionally enter this depending
|
|
||||||
# on some CLI flags and/or gap detection?
|
|
||||||
if needs_correction:
|
|
||||||
for i in range(gaps.height):
|
|
||||||
row: pl.DataFrame = gaps[i]
|
|
||||||
|
|
||||||
# TODO: can we eventually remove this
|
# TODO: by default we always want to mark these up
|
||||||
# once we figure out why the epoch cols
|
# with rects showing up/down gaps Bo
|
||||||
# don't match?
|
venue_gaps: pl.DataFrame = tsp.detect_time_gaps(
|
||||||
iend: int = row['index'][0]
|
wdts,
|
||||||
# dt: datetime = row['dt'][0]
|
expect_period=period_s,
|
||||||
# dt_prev: datetime = row['dt_prev'][0]
|
|
||||||
|
|
||||||
# the gap's right-most bar's OPEN value
|
# TODO: actually pull the exact duration
|
||||||
# at that time (sample) step.
|
# expected for each venue operational period?
|
||||||
# dt_end_t: float = dt.timestamp()
|
gap_dt_unit='days',
|
||||||
|
gap_thresh=1,
|
||||||
|
)
|
||||||
|
|
||||||
# TODO: FIX HOW/WHY these aren't matching
|
# TODO: call null-seg fixer somehow?
|
||||||
# and are instead off by 4hours (EST
|
if null_segs:
|
||||||
# vs. UTC?!?!)
|
await tractor.pause()
|
||||||
# end_t: float = row['time']
|
# async with (
|
||||||
# assert (
|
# trio.open_nursery() as tn,
|
||||||
# dt.timestamp()
|
# mod.open_history_client(
|
||||||
# ==
|
# mkt,
|
||||||
# end_t
|
# ) as (get_hist, config),
|
||||||
# )
|
# ):
|
||||||
|
# nulls_detected: trio.Event = await tn.start(partial(
|
||||||
|
# tsp.maybe_fill_null_segments,
|
||||||
|
|
||||||
# the gap's left-most bar's CLOSE value
|
# shm=shm,
|
||||||
# at that time (sample) step.
|
# timeframe=timeframe,
|
||||||
prev_r: pl.DataFrame = df.filter(
|
# get_hist=get_hist,
|
||||||
pl.col('index') == iend - 1
|
# sampler_stream=sampler_stream,
|
||||||
|
# mkt=mkt,
|
||||||
|
# ))
|
||||||
|
|
||||||
|
# TODO: find the disjoint set of step gaps from
|
||||||
|
# venue (closure) set!
|
||||||
|
# -[ ] do a set diff by checking for the unique
|
||||||
|
# gap set only in the step_gaps?
|
||||||
|
if (
|
||||||
|
not venue_gaps.is_empty()
|
||||||
|
# and not step_gaps.is_empty()
|
||||||
|
):
|
||||||
|
do_markup_gaps: bool = True
|
||||||
|
if do_markup_gaps:
|
||||||
|
aids: dict = await markup_gaps(
|
||||||
|
fqme,
|
||||||
|
period_s,
|
||||||
|
actl,
|
||||||
|
wdts,
|
||||||
|
step_gaps,
|
||||||
)
|
)
|
||||||
istart: int = prev_r['index'][0]
|
assert aids
|
||||||
# dt_start_t: float = dt_prev.timestamp()
|
|
||||||
|
|
||||||
# start_t: float = prev_r['time']
|
# write repaired ts to parquet-file?
|
||||||
# assert (
|
if write_parquet:
|
||||||
# dt_start_t
|
start: float = time.time()
|
||||||
# ==
|
|
||||||
# start_t
|
|
||||||
# )
|
|
||||||
|
|
||||||
# TODO: implement px-col width measure
|
|
||||||
# and ensure at least as many px-cols
|
|
||||||
# shown per rect as configured by user.
|
|
||||||
gap_w: float = abs((iend - istart))
|
|
||||||
if gap_w < 6:
|
|
||||||
margin: float = 6
|
|
||||||
iend += margin
|
|
||||||
istart -= margin
|
|
||||||
|
|
||||||
ro: tuple[float, float] = (
|
|
||||||
# dt_end_t,
|
|
||||||
iend,
|
|
||||||
row['open'][0],
|
|
||||||
)
|
|
||||||
lc: tuple[float, float] = (
|
|
||||||
# dt_start_t,
|
|
||||||
istart,
|
|
||||||
prev_r['close'][0],
|
|
||||||
)
|
|
||||||
|
|
||||||
# async with actl.open_rect(
|
|
||||||
# ) as aid:
|
|
||||||
aid: int = await actl.add_rect(
|
|
||||||
fqme=fqme,
|
|
||||||
timeframe=period_s,
|
|
||||||
start_pos=lc,
|
|
||||||
end_pos=ro,
|
|
||||||
)
|
|
||||||
assert aid
|
|
||||||
|
|
||||||
# write to parquet file?
|
|
||||||
if (
|
|
||||||
write_parquet
|
|
||||||
):
|
|
||||||
# write to fs
|
|
||||||
start = time.time()
|
|
||||||
path: Path = await client.write_ohlcv(
|
path: Path = await client.write_ohlcv(
|
||||||
fqme,
|
fqme,
|
||||||
ohlcv=deduped,
|
ohlcv=deduped,
|
||||||
|
@ -390,7 +490,7 @@ def ldshm(
|
||||||
)
|
)
|
||||||
|
|
||||||
# read back from fs
|
# read back from fs
|
||||||
start = time.time()
|
start: float = time.time()
|
||||||
read_df: pl.DataFrame = pl.read_parquet(path)
|
read_df: pl.DataFrame = pl.read_parquet(path)
|
||||||
read_delay: float = round(
|
read_delay: float = round(
|
||||||
time.time() - start,
|
time.time() - start,
|
||||||
|
@ -412,6 +512,8 @@ def ldshm(
|
||||||
shm._array.setflags(
|
shm._array.setflags(
|
||||||
write=int(1),
|
write=int(1),
|
||||||
)
|
)
|
||||||
|
# last chance manual overwrites in REPL
|
||||||
|
await tractor.pause()
|
||||||
shm.push(
|
shm.push(
|
||||||
new,
|
new,
|
||||||
prepend=True,
|
prepend=True,
|
||||||
|
@ -419,8 +521,6 @@ def ldshm(
|
||||||
update_first=False, # don't update ._first
|
update_first=False, # don't update ._first
|
||||||
)
|
)
|
||||||
|
|
||||||
await tractor.pause()
|
|
||||||
assert diff
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# allow interaction even when no ts problems.
|
# allow interaction even when no ts problems.
|
||||||
|
@ -428,8 +528,11 @@ def ldshm(
|
||||||
assert not diff
|
assert not diff
|
||||||
|
|
||||||
|
|
||||||
if df is None:
|
if shm_df is None:
|
||||||
log.error(f'No matching shm buffers for {fqme} ?')
|
log.error(
|
||||||
|
f'No matching shm buffers for {fqme} ?'
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
trio.run(main)
|
trio.run(main)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue