Generalize time-gap detector to accept unit and threshold
parent
0dcfcea6ee
commit
2dbcecdac7
|
@ -54,6 +54,9 @@ from contextlib import asynccontextmanager as acm
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import time
|
import time
|
||||||
|
from typing import (
|
||||||
|
Literal,
|
||||||
|
)
|
||||||
|
|
||||||
# from bidict import bidict
|
# from bidict import bidict
|
||||||
# import tractor
|
# import tractor
|
||||||
|
@ -388,15 +391,38 @@ def with_dts(
|
||||||
pl.from_epoch(pl.col(time_col)).alias('dt'),
|
pl.from_epoch(pl.col(time_col)).alias('dt'),
|
||||||
]).with_columns([
|
]).with_columns([
|
||||||
pl.from_epoch(pl.col(f'{time_col}_prev')).alias('dt_prev'),
|
pl.from_epoch(pl.col(f'{time_col}_prev')).alias('dt_prev'),
|
||||||
]).with_columns(
|
pl.col('dt').diff().alias('dt_diff'),
|
||||||
(pl.col('dt') - pl.col('dt_prev')).alias('dt_diff'),
|
]) #.with_columns(
|
||||||
)
|
# pl.col('dt').diff().dt.days().alias('days_dt_diff'),
|
||||||
|
# )
|
||||||
|
|
||||||
|
|
||||||
|
t_unit: Literal[
|
||||||
|
'days',
|
||||||
|
'hours',
|
||||||
|
'minutes',
|
||||||
|
'seconds',
|
||||||
|
'miliseconds',
|
||||||
|
'microseconds',
|
||||||
|
'nanoseconds',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def detect_time_gaps(
|
def detect_time_gaps(
|
||||||
df: pl.DataFrame,
|
df: pl.DataFrame,
|
||||||
expect_period: float = 60,
|
|
||||||
time_col: str = 'time',
|
time_col: str = 'time',
|
||||||
|
# epoch sampling step diff
|
||||||
|
expect_period: float = 60,
|
||||||
|
|
||||||
|
# datetime diff unit and gap value
|
||||||
|
# crypto mkts
|
||||||
|
# gap_dt_unit: t_unit = 'minutes',
|
||||||
|
# gap_thresh: int = 1,
|
||||||
|
|
||||||
|
# legacy stock mkts
|
||||||
|
gap_dt_unit: t_unit = 'days',
|
||||||
|
gap_thresh: int = 2,
|
||||||
|
|
||||||
) -> pl.DataFrame:
|
) -> pl.DataFrame:
|
||||||
'''
|
'''
|
||||||
|
@ -406,7 +432,19 @@ def detect_time_gaps(
|
||||||
actual missing data segments.
|
actual missing data segments.
|
||||||
|
|
||||||
'''
|
'''
|
||||||
return with_dts(df).filter(pl.col('s_diff') > expect_period)
|
dt_gap_col: str = f'{gap_dt_unit}_diff'
|
||||||
|
return with_dts(
|
||||||
|
df
|
||||||
|
).filter(
|
||||||
|
pl.col('s_diff').abs() > expect_period
|
||||||
|
).with_columns(
|
||||||
|
getattr(
|
||||||
|
pl.col('dt_diff').dt,
|
||||||
|
gap_dt_unit, # NOTE: must be valid ``Expr.dt.<name>``
|
||||||
|
)().alias(dt_gap_col)
|
||||||
|
).filter(
|
||||||
|
pl.col(dt_gap_col).abs() > gap_thresh
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def detect_price_gaps(
|
def detect_price_gaps(
|
||||||
|
|
Loading…
Reference in New Issue