Add `.data._timeseries` utility mod
Org all the new (time) gap detection routines here and also move in the `slice_from_time()` epoch -> index converter routine from `._pathops` B)basic_buy_bot
parent
54f8a615fc
commit
f25248c871
|
@ -1,5 +1,5 @@
|
||||||
# piker: trading gear for hackers
|
# piker: trading gear for hackers
|
||||||
# Copyright (C) 2018-present Tyler Goodlet (in stewardship of piker0)
|
# Copyright (C) 2018-present Tyler Goodlet (in stewardship of pikers)
|
||||||
|
|
||||||
# This program is free software: you can redistribute it and/or modify
|
# This program is free software: you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU Affero General Public License as published by
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
@ -289,158 +289,3 @@ def ohlc_flatten(
|
||||||
num=len(flat),
|
num=len(flat),
|
||||||
)
|
)
|
||||||
return x, flat
|
return x, flat
|
||||||
|
|
||||||
|
|
||||||
def slice_from_time(
|
|
||||||
arr: np.ndarray,
|
|
||||||
start_t: float,
|
|
||||||
stop_t: float,
|
|
||||||
step: float, # sampler period step-diff
|
|
||||||
|
|
||||||
) -> slice:
|
|
||||||
'''
|
|
||||||
Calculate array indices mapped from a time range and return them in
|
|
||||||
a slice.
|
|
||||||
|
|
||||||
Given an input array with an epoch `'time'` series entry, calculate
|
|
||||||
the indices which span the time range and return in a slice. Presume
|
|
||||||
each `'time'` step increment is uniform and when the time stamp
|
|
||||||
series contains gaps (the uniform presumption is untrue) use
|
|
||||||
``np.searchsorted()`` binary search to look up the appropriate
|
|
||||||
index.
|
|
||||||
|
|
||||||
'''
|
|
||||||
profiler = Profiler(
|
|
||||||
msg='slice_from_time()',
|
|
||||||
disabled=not pg_profile_enabled(),
|
|
||||||
ms_threshold=ms_slower_then,
|
|
||||||
)
|
|
||||||
|
|
||||||
times = arr['time']
|
|
||||||
t_first = floor(times[0])
|
|
||||||
t_last = ceil(times[-1])
|
|
||||||
|
|
||||||
# the greatest index we can return which slices to the
|
|
||||||
# end of the input array.
|
|
||||||
read_i_max = arr.shape[0]
|
|
||||||
|
|
||||||
# compute (presumed) uniform-time-step index offsets
|
|
||||||
i_start_t = floor(start_t)
|
|
||||||
read_i_start = floor(((i_start_t - t_first) // step)) - 1
|
|
||||||
|
|
||||||
i_stop_t = ceil(stop_t)
|
|
||||||
|
|
||||||
# XXX: edge case -> always set stop index to last in array whenever
|
|
||||||
# the input stop time is detected to be greater then the equiv time
|
|
||||||
# stamp at that last entry.
|
|
||||||
if i_stop_t >= t_last:
|
|
||||||
read_i_stop = read_i_max
|
|
||||||
else:
|
|
||||||
read_i_stop = ceil((i_stop_t - t_first) // step) + 1
|
|
||||||
|
|
||||||
# always clip outputs to array support
|
|
||||||
# for read start:
|
|
||||||
# - never allow a start < the 0 index
|
|
||||||
# - never allow an end index > the read array len
|
|
||||||
read_i_start = min(
|
|
||||||
max(0, read_i_start),
|
|
||||||
read_i_max - 1,
|
|
||||||
)
|
|
||||||
read_i_stop = max(
|
|
||||||
0,
|
|
||||||
min(read_i_stop, read_i_max),
|
|
||||||
)
|
|
||||||
|
|
||||||
# check for larger-then-latest calculated index for given start
|
|
||||||
# time, in which case we do a binary search for the correct index.
|
|
||||||
# NOTE: this is usually the result of a time series with time gaps
|
|
||||||
# where it is expected that each index step maps to a uniform step
|
|
||||||
# in the time stamp series.
|
|
||||||
t_iv_start = times[read_i_start]
|
|
||||||
if (
|
|
||||||
t_iv_start > i_start_t
|
|
||||||
):
|
|
||||||
# do a binary search for the best index mapping to ``start_t``
|
|
||||||
# given we measured an overshoot using the uniform-time-step
|
|
||||||
# calculation from above.
|
|
||||||
|
|
||||||
# TODO: once we start caching these per source-array,
|
|
||||||
# we can just overwrite ``read_i_start`` directly.
|
|
||||||
new_read_i_start = np.searchsorted(
|
|
||||||
times,
|
|
||||||
i_start_t,
|
|
||||||
side='left',
|
|
||||||
)
|
|
||||||
|
|
||||||
# TODO: minimize binary search work as much as possible:
|
|
||||||
# - cache these remap values which compensate for gaps in the
|
|
||||||
# uniform time step basis where we calc a later start
|
|
||||||
# index for the given input ``start_t``.
|
|
||||||
# - can we shorten the input search sequence by heuristic?
|
|
||||||
# up_to_arith_start = index[:read_i_start]
|
|
||||||
|
|
||||||
if (
|
|
||||||
new_read_i_start <= read_i_start
|
|
||||||
):
|
|
||||||
# t_diff = t_iv_start - start_t
|
|
||||||
# print(
|
|
||||||
# f"WE'RE CUTTING OUT TIME - STEP:{step}\n"
|
|
||||||
# f'start_t:{start_t} -> 0index start_t:{t_iv_start}\n'
|
|
||||||
# f'diff: {t_diff}\n'
|
|
||||||
# f'REMAPPED START i: {read_i_start} -> {new_read_i_start}\n'
|
|
||||||
# )
|
|
||||||
read_i_start = new_read_i_start
|
|
||||||
|
|
||||||
t_iv_stop = times[read_i_stop - 1]
|
|
||||||
if (
|
|
||||||
t_iv_stop > i_stop_t
|
|
||||||
):
|
|
||||||
# t_diff = stop_t - t_iv_stop
|
|
||||||
# print(
|
|
||||||
# f"WE'RE CUTTING OUT TIME - STEP:{step}\n"
|
|
||||||
# f'calced iv stop:{t_iv_stop} -> stop_t:{stop_t}\n'
|
|
||||||
# f'diff: {t_diff}\n'
|
|
||||||
# # f'SHOULD REMAP STOP: {read_i_start} -> {new_read_i_start}\n'
|
|
||||||
# )
|
|
||||||
new_read_i_stop = np.searchsorted(
|
|
||||||
times[read_i_start:],
|
|
||||||
# times,
|
|
||||||
i_stop_t,
|
|
||||||
side='right',
|
|
||||||
)
|
|
||||||
|
|
||||||
if (
|
|
||||||
new_read_i_stop <= read_i_stop
|
|
||||||
):
|
|
||||||
read_i_stop = read_i_start + new_read_i_stop + 1
|
|
||||||
|
|
||||||
# sanity checks for range size
|
|
||||||
# samples = (i_stop_t - i_start_t) // step
|
|
||||||
# index_diff = read_i_stop - read_i_start + 1
|
|
||||||
# if index_diff > (samples + 3):
|
|
||||||
# breakpoint()
|
|
||||||
|
|
||||||
# read-relative indexes: gives a slice where `shm.array[read_slc]`
|
|
||||||
# will be the data spanning the input time range `start_t` ->
|
|
||||||
# `stop_t`
|
|
||||||
read_slc = slice(
|
|
||||||
int(read_i_start),
|
|
||||||
int(read_i_stop),
|
|
||||||
)
|
|
||||||
|
|
||||||
profiler(
|
|
||||||
'slicing complete'
|
|
||||||
# f'{start_t} -> {abs_slc.start} | {read_slc.start}\n'
|
|
||||||
# f'{stop_t} -> {abs_slc.stop} | {read_slc.stop}\n'
|
|
||||||
)
|
|
||||||
|
|
||||||
# NOTE: if caller needs absolute buffer indices they can
|
|
||||||
# slice the buffer abs index like so:
|
|
||||||
# index = arr['index']
|
|
||||||
# abs_indx = index[read_slc]
|
|
||||||
# abs_slc = slice(
|
|
||||||
# int(abs_indx[0]),
|
|
||||||
# int(abs_indx[-1]),
|
|
||||||
# )
|
|
||||||
|
|
||||||
return read_slc
|
|
||||||
|
|
|
@ -0,0 +1,309 @@
|
||||||
|
# piker: trading gear for hackers
|
||||||
|
# Copyright (C) 2018-present Tyler Goodlet (in stewardship of pikers)
|
||||||
|
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
'''
|
||||||
|
Financial time series processing utilities usually
|
||||||
|
pertaining to OHLCV style sampled data.
|
||||||
|
|
||||||
|
Routines are generally implemented in either ``numpy`` or ``polars`` B)
|
||||||
|
|
||||||
|
'''
|
||||||
|
from __future__ import annotations
|
||||||
|
from typing import Literal
|
||||||
|
from math import (
|
||||||
|
ceil,
|
||||||
|
floor,
|
||||||
|
)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import polars as pl
|
||||||
|
|
||||||
|
from ._sharedmem import ShmArray
|
||||||
|
from .._profile import (
|
||||||
|
Profiler,
|
||||||
|
pg_profile_enabled,
|
||||||
|
ms_slower_then,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def slice_from_time(
|
||||||
|
arr: np.ndarray,
|
||||||
|
start_t: float,
|
||||||
|
stop_t: float,
|
||||||
|
step: float, # sampler period step-diff
|
||||||
|
|
||||||
|
) -> slice:
|
||||||
|
'''
|
||||||
|
Calculate array indices mapped from a time range and return them in
|
||||||
|
a slice.
|
||||||
|
|
||||||
|
Given an input array with an epoch `'time'` series entry, calculate
|
||||||
|
the indices which span the time range and return in a slice. Presume
|
||||||
|
each `'time'` step increment is uniform and when the time stamp
|
||||||
|
series contains gaps (the uniform presumption is untrue) use
|
||||||
|
``np.searchsorted()`` binary search to look up the appropriate
|
||||||
|
index.
|
||||||
|
|
||||||
|
'''
|
||||||
|
profiler = Profiler(
|
||||||
|
msg='slice_from_time()',
|
||||||
|
disabled=not pg_profile_enabled(),
|
||||||
|
ms_threshold=ms_slower_then,
|
||||||
|
)
|
||||||
|
|
||||||
|
times = arr['time']
|
||||||
|
t_first = floor(times[0])
|
||||||
|
t_last = ceil(times[-1])
|
||||||
|
|
||||||
|
# the greatest index we can return which slices to the
|
||||||
|
# end of the input array.
|
||||||
|
read_i_max = arr.shape[0]
|
||||||
|
|
||||||
|
# compute (presumed) uniform-time-step index offsets
|
||||||
|
i_start_t = floor(start_t)
|
||||||
|
read_i_start = floor(((i_start_t - t_first) // step)) - 1
|
||||||
|
|
||||||
|
i_stop_t = ceil(stop_t)
|
||||||
|
|
||||||
|
# XXX: edge case -> always set stop index to last in array whenever
|
||||||
|
# the input stop time is detected to be greater then the equiv time
|
||||||
|
# stamp at that last entry.
|
||||||
|
if i_stop_t >= t_last:
|
||||||
|
read_i_stop = read_i_max
|
||||||
|
else:
|
||||||
|
read_i_stop = ceil((i_stop_t - t_first) // step) + 1
|
||||||
|
|
||||||
|
# always clip outputs to array support
|
||||||
|
# for read start:
|
||||||
|
# - never allow a start < the 0 index
|
||||||
|
# - never allow an end index > the read array len
|
||||||
|
read_i_start = min(
|
||||||
|
max(0, read_i_start),
|
||||||
|
read_i_max - 1,
|
||||||
|
)
|
||||||
|
read_i_stop = max(
|
||||||
|
0,
|
||||||
|
min(read_i_stop, read_i_max),
|
||||||
|
)
|
||||||
|
|
||||||
|
# check for larger-then-latest calculated index for given start
|
||||||
|
# time, in which case we do a binary search for the correct index.
|
||||||
|
# NOTE: this is usually the result of a time series with time gaps
|
||||||
|
# where it is expected that each index step maps to a uniform step
|
||||||
|
# in the time stamp series.
|
||||||
|
t_iv_start = times[read_i_start]
|
||||||
|
if (
|
||||||
|
t_iv_start > i_start_t
|
||||||
|
):
|
||||||
|
# do a binary search for the best index mapping to ``start_t``
|
||||||
|
# given we measured an overshoot using the uniform-time-step
|
||||||
|
# calculation from above.
|
||||||
|
|
||||||
|
# TODO: once we start caching these per source-array,
|
||||||
|
# we can just overwrite ``read_i_start`` directly.
|
||||||
|
new_read_i_start = np.searchsorted(
|
||||||
|
times,
|
||||||
|
i_start_t,
|
||||||
|
side='left',
|
||||||
|
)
|
||||||
|
|
||||||
|
# TODO: minimize binary search work as much as possible:
|
||||||
|
# - cache these remap values which compensate for gaps in the
|
||||||
|
# uniform time step basis where we calc a later start
|
||||||
|
# index for the given input ``start_t``.
|
||||||
|
# - can we shorten the input search sequence by heuristic?
|
||||||
|
# up_to_arith_start = index[:read_i_start]
|
||||||
|
|
||||||
|
if (
|
||||||
|
new_read_i_start <= read_i_start
|
||||||
|
):
|
||||||
|
# t_diff = t_iv_start - start_t
|
||||||
|
# print(
|
||||||
|
# f"WE'RE CUTTING OUT TIME - STEP:{step}\n"
|
||||||
|
# f'start_t:{start_t} -> 0index start_t:{t_iv_start}\n'
|
||||||
|
# f'diff: {t_diff}\n'
|
||||||
|
# f'REMAPPED START i: {read_i_start} -> {new_read_i_start}\n'
|
||||||
|
# )
|
||||||
|
read_i_start = new_read_i_start
|
||||||
|
|
||||||
|
t_iv_stop = times[read_i_stop - 1]
|
||||||
|
if (
|
||||||
|
t_iv_stop > i_stop_t
|
||||||
|
):
|
||||||
|
# t_diff = stop_t - t_iv_stop
|
||||||
|
# print(
|
||||||
|
# f"WE'RE CUTTING OUT TIME - STEP:{step}\n"
|
||||||
|
# f'calced iv stop:{t_iv_stop} -> stop_t:{stop_t}\n'
|
||||||
|
# f'diff: {t_diff}\n'
|
||||||
|
# # f'SHOULD REMAP STOP: {read_i_start} -> {new_read_i_start}\n'
|
||||||
|
# )
|
||||||
|
new_read_i_stop = np.searchsorted(
|
||||||
|
times[read_i_start:],
|
||||||
|
# times,
|
||||||
|
i_stop_t,
|
||||||
|
side='right',
|
||||||
|
)
|
||||||
|
|
||||||
|
if (
|
||||||
|
new_read_i_stop <= read_i_stop
|
||||||
|
):
|
||||||
|
read_i_stop = read_i_start + new_read_i_stop + 1
|
||||||
|
|
||||||
|
# sanity checks for range size
|
||||||
|
# samples = (i_stop_t - i_start_t) // step
|
||||||
|
# index_diff = read_i_stop - read_i_start + 1
|
||||||
|
# if index_diff > (samples + 3):
|
||||||
|
# breakpoint()
|
||||||
|
|
||||||
|
# read-relative indexes: gives a slice where `shm.array[read_slc]`
|
||||||
|
# will be the data spanning the input time range `start_t` ->
|
||||||
|
# `stop_t`
|
||||||
|
read_slc = slice(
|
||||||
|
int(read_i_start),
|
||||||
|
int(read_i_stop),
|
||||||
|
)
|
||||||
|
|
||||||
|
profiler(
|
||||||
|
'slicing complete'
|
||||||
|
# f'{start_t} -> {abs_slc.start} | {read_slc.start}\n'
|
||||||
|
# f'{stop_t} -> {abs_slc.stop} | {read_slc.stop}\n'
|
||||||
|
)
|
||||||
|
|
||||||
|
# NOTE: if caller needs absolute buffer indices they can
|
||||||
|
# slice the buffer abs index like so:
|
||||||
|
# index = arr['index']
|
||||||
|
# abs_indx = index[read_slc]
|
||||||
|
# abs_slc = slice(
|
||||||
|
# int(abs_indx[0]),
|
||||||
|
# int(abs_indx[-1]),
|
||||||
|
# )
|
||||||
|
|
||||||
|
return read_slc
|
||||||
|
|
||||||
|
|
||||||
|
def detect_null_time_gap(shm: ShmArray) -> tuple[float, float] | None:
|
||||||
|
# detect if there are any zero-epoch stamped rows
|
||||||
|
zero_pred: np.ndarray = shm.array['time'] == 0
|
||||||
|
zero_t: np.ndarray = shm.array[zero_pred]
|
||||||
|
if zero_t.size:
|
||||||
|
istart, iend = zero_t['index'][[0, -1]]
|
||||||
|
start, end = shm._array['time'][
|
||||||
|
[istart - 2, iend + 2]
|
||||||
|
]
|
||||||
|
return istart - 2, start, end, iend + 2
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
t_unit: Literal[
|
||||||
|
'days',
|
||||||
|
'hours',
|
||||||
|
'minutes',
|
||||||
|
'seconds',
|
||||||
|
'miliseconds',
|
||||||
|
'microseconds',
|
||||||
|
'nanoseconds',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def with_dts(
|
||||||
|
df: pl.DataFrame,
|
||||||
|
time_col: str = 'time',
|
||||||
|
) -> pl.DataFrame:
|
||||||
|
'''
|
||||||
|
Insert datetime (casted) columns to a (presumably) OHLC sampled
|
||||||
|
time series with an epoch-time column keyed by ``time_col``.
|
||||||
|
|
||||||
|
'''
|
||||||
|
return df.with_columns([
|
||||||
|
pl.col(time_col).shift(1).suffix('_prev'),
|
||||||
|
pl.col(time_col).diff().alias('s_diff'),
|
||||||
|
pl.from_epoch(pl.col(time_col)).alias('dt'),
|
||||||
|
]).with_columns([
|
||||||
|
pl.from_epoch(pl.col(f'{time_col}_prev')).alias('dt_prev'),
|
||||||
|
pl.col('dt').diff().alias('dt_diff'),
|
||||||
|
]) #.with_columns(
|
||||||
|
# pl.col('dt').diff().dt.days().alias('days_dt_diff'),
|
||||||
|
# )
|
||||||
|
|
||||||
|
|
||||||
|
def detect_time_gaps(
|
||||||
|
df: pl.DataFrame,
|
||||||
|
|
||||||
|
time_col: str = 'time',
|
||||||
|
# epoch sampling step diff
|
||||||
|
expect_period: float = 60,
|
||||||
|
|
||||||
|
# datetime diff unit and gap value
|
||||||
|
# crypto mkts
|
||||||
|
# gap_dt_unit: t_unit = 'minutes',
|
||||||
|
# gap_thresh: int = 1,
|
||||||
|
|
||||||
|
# legacy stock mkts
|
||||||
|
gap_dt_unit: t_unit = 'days',
|
||||||
|
gap_thresh: int = 2,
|
||||||
|
|
||||||
|
) -> pl.DataFrame:
|
||||||
|
'''
|
||||||
|
Filter to OHLC datums which contain sample step gaps.
|
||||||
|
|
||||||
|
For eg. legacy markets which have venue close gaps and/or
|
||||||
|
actual missing data segments.
|
||||||
|
|
||||||
|
'''
|
||||||
|
dt_gap_col: str = f'{gap_dt_unit}_diff'
|
||||||
|
return with_dts(
|
||||||
|
df
|
||||||
|
).filter(
|
||||||
|
pl.col('s_diff').abs() > expect_period
|
||||||
|
).with_columns(
|
||||||
|
getattr(
|
||||||
|
pl.col('dt_diff').dt,
|
||||||
|
gap_dt_unit, # NOTE: must be valid ``Expr.dt.<name>``
|
||||||
|
)().alias(dt_gap_col)
|
||||||
|
).filter(
|
||||||
|
pl.col(dt_gap_col).abs() > gap_thresh
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def detect_price_gaps(
|
||||||
|
df: pl.DataFrame,
|
||||||
|
gt_multiplier: float = 2.,
|
||||||
|
price_fields: list[str] = ['high', 'low'],
|
||||||
|
|
||||||
|
) -> pl.DataFrame:
|
||||||
|
'''
|
||||||
|
Detect gaps in clearing price over an OHLC series.
|
||||||
|
|
||||||
|
2 types of gaps generally exist; up gaps and down gaps:
|
||||||
|
|
||||||
|
- UP gap: when any next sample's lo price is strictly greater
|
||||||
|
then the current sample's hi price.
|
||||||
|
|
||||||
|
- DOWN gap: when any next sample's hi price is strictly
|
||||||
|
less then the current samples lo price.
|
||||||
|
|
||||||
|
'''
|
||||||
|
# return df.filter(
|
||||||
|
# pl.col('high') - ) > expect_period,
|
||||||
|
# ).select([
|
||||||
|
# pl.dt.datetime(pl.col(time_col).shift(1)).suffix('_previous'),
|
||||||
|
# pl.all(),
|
||||||
|
# ]).select([
|
||||||
|
# pl.all(),
|
||||||
|
# (pl.col(time_col) - pl.col(f'{time_col}_previous')).alias('diff'),
|
||||||
|
# ])
|
||||||
|
...
|
|
@ -54,9 +54,6 @@ from contextlib import asynccontextmanager as acm
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import time
|
import time
|
||||||
from typing import (
|
|
||||||
Literal,
|
|
||||||
)
|
|
||||||
|
|
||||||
# from bidict import bidict
|
# from bidict import bidict
|
||||||
# import tractor
|
# import tractor
|
||||||
|
@ -374,104 +371,3 @@ async def get_client(
|
||||||
client = NativeStorageClient(datadir)
|
client = NativeStorageClient(datadir)
|
||||||
client.index_files()
|
client.index_files()
|
||||||
yield client
|
yield client
|
||||||
|
|
||||||
|
|
||||||
def with_dts(
|
|
||||||
df: pl.DataFrame,
|
|
||||||
time_col: str = 'time',
|
|
||||||
) -> pl.DataFrame:
|
|
||||||
'''
|
|
||||||
Insert datetime (casted) columns to a (presumably) OHLC sampled
|
|
||||||
time series with an epoch-time column keyed by ``time_col``.
|
|
||||||
|
|
||||||
'''
|
|
||||||
return df.with_columns([
|
|
||||||
pl.col(time_col).shift(1).suffix('_prev'),
|
|
||||||
pl.col(time_col).diff().alias('s_diff'),
|
|
||||||
pl.from_epoch(pl.col(time_col)).alias('dt'),
|
|
||||||
]).with_columns([
|
|
||||||
pl.from_epoch(pl.col(f'{time_col}_prev')).alias('dt_prev'),
|
|
||||||
pl.col('dt').diff().alias('dt_diff'),
|
|
||||||
]) #.with_columns(
|
|
||||||
# pl.col('dt').diff().dt.days().alias('days_dt_diff'),
|
|
||||||
# )
|
|
||||||
|
|
||||||
|
|
||||||
t_unit: Literal[
|
|
||||||
'days',
|
|
||||||
'hours',
|
|
||||||
'minutes',
|
|
||||||
'seconds',
|
|
||||||
'miliseconds',
|
|
||||||
'microseconds',
|
|
||||||
'nanoseconds',
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def detect_time_gaps(
|
|
||||||
df: pl.DataFrame,
|
|
||||||
|
|
||||||
time_col: str = 'time',
|
|
||||||
# epoch sampling step diff
|
|
||||||
expect_period: float = 60,
|
|
||||||
|
|
||||||
# datetime diff unit and gap value
|
|
||||||
# crypto mkts
|
|
||||||
# gap_dt_unit: t_unit = 'minutes',
|
|
||||||
# gap_thresh: int = 1,
|
|
||||||
|
|
||||||
# legacy stock mkts
|
|
||||||
gap_dt_unit: t_unit = 'days',
|
|
||||||
gap_thresh: int = 2,
|
|
||||||
|
|
||||||
) -> pl.DataFrame:
|
|
||||||
'''
|
|
||||||
Filter to OHLC datums which contain sample step gaps.
|
|
||||||
|
|
||||||
For eg. legacy markets which have venue close gaps and/or
|
|
||||||
actual missing data segments.
|
|
||||||
|
|
||||||
'''
|
|
||||||
dt_gap_col: str = f'{gap_dt_unit}_diff'
|
|
||||||
return with_dts(
|
|
||||||
df
|
|
||||||
).filter(
|
|
||||||
pl.col('s_diff').abs() > expect_period
|
|
||||||
).with_columns(
|
|
||||||
getattr(
|
|
||||||
pl.col('dt_diff').dt,
|
|
||||||
gap_dt_unit, # NOTE: must be valid ``Expr.dt.<name>``
|
|
||||||
)().alias(dt_gap_col)
|
|
||||||
).filter(
|
|
||||||
pl.col(dt_gap_col).abs() > gap_thresh
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def detect_price_gaps(
|
|
||||||
df: pl.DataFrame,
|
|
||||||
gt_multiplier: float = 2.,
|
|
||||||
price_fields: list[str] = ['high', 'low'],
|
|
||||||
|
|
||||||
) -> pl.DataFrame:
|
|
||||||
'''
|
|
||||||
Detect gaps in clearing price over an OHLC series.
|
|
||||||
|
|
||||||
2 types of gaps generally exist; up gaps and down gaps:
|
|
||||||
|
|
||||||
- UP gap: when any next sample's lo price is strictly greater
|
|
||||||
then the current sample's hi price.
|
|
||||||
|
|
||||||
- DOWN gap: when any next sample's hi price is strictly
|
|
||||||
less then the current samples lo price.
|
|
||||||
|
|
||||||
'''
|
|
||||||
# return df.filter(
|
|
||||||
# pl.col('high') - ) > expect_period,
|
|
||||||
# ).select([
|
|
||||||
# pl.dt.datetime(pl.col(time_col).shift(1)).suffix('_previous'),
|
|
||||||
# pl.all(),
|
|
||||||
# ]).select([
|
|
||||||
# pl.all(),
|
|
||||||
# (pl.col(time_col) - pl.col(f'{time_col}_previous')).alias('diff'),
|
|
||||||
# ])
|
|
||||||
...
|
|
||||||
|
|
|
@ -49,7 +49,7 @@ from ..data._formatters import (
|
||||||
OHLCBarsAsCurveFmtr, # OHLC converted to line
|
OHLCBarsAsCurveFmtr, # OHLC converted to line
|
||||||
StepCurveFmtr, # "step" curve (like for vlm)
|
StepCurveFmtr, # "step" curve (like for vlm)
|
||||||
)
|
)
|
||||||
from ..data._pathops import (
|
from ..data._timeseries import (
|
||||||
slice_from_time,
|
slice_from_time,
|
||||||
)
|
)
|
||||||
from ._ohlc import (
|
from ._ohlc import (
|
||||||
|
|
|
@ -30,7 +30,7 @@ import pendulum
|
||||||
import pyqtgraph as pg
|
import pyqtgraph as pg
|
||||||
|
|
||||||
from ..data.types import Struct
|
from ..data.types import Struct
|
||||||
from ..data._pathops import slice_from_time
|
from ..data._timeseries import slice_from_time
|
||||||
from ..log import get_logger
|
from ..log import get_logger
|
||||||
from .._profile import Profiler
|
from .._profile import Profiler
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue