Drop the `gap_dt_unit: str` column
We don't need it in `detect_time_gaps()` since doing straight up datetime diffs in `polars` already has a humanized `str` representation but with higher precision like '2d 1h 24m 1s' B)account_tests
parent
f1289ccce2
commit
2c6ae5d994
|
@ -18,7 +18,8 @@
|
|||
Financial time series processing utilities usually
|
||||
pertaining to OHLCV style sampled data.
|
||||
|
||||
Routines are generally implemented in either ``numpy`` or ``polars`` B)
|
||||
Routines are generally implemented in either ``numpy`` or
|
||||
``polars`` B)
|
||||
|
||||
'''
|
||||
from __future__ import annotations
|
||||
|
@ -269,9 +270,14 @@ def detect_time_gaps(
|
|||
# gap_dt_unit: t_unit = 'minutes',
|
||||
# gap_thresh: int = 1,
|
||||
|
||||
# legacy stock mkts
|
||||
# NOTE: legacy stock mkts have venue operating hours
|
||||
# and thus gaps normally no more then 1-2 days at
|
||||
# a time.
|
||||
# XXX -> must be valid ``polars.Expr.dt.<name>``
|
||||
# TODO: allow passing in a frame of operating hours
|
||||
# durations/ranges for faster legit gap checks.
|
||||
gap_dt_unit: t_unit = 'days',
|
||||
gap_thresh: int = 2,
|
||||
gap_thresh: int = 1,
|
||||
|
||||
) -> pl.DataFrame:
|
||||
'''
|
||||
|
@ -281,18 +287,17 @@ def detect_time_gaps(
|
|||
actual missing data segments.
|
||||
|
||||
'''
|
||||
dt_gap_col: str = f'{gap_dt_unit}_diff'
|
||||
return with_dts(
|
||||
df
|
||||
).filter(
|
||||
pl.col('s_diff').abs() > expect_period
|
||||
).with_columns(
|
||||
getattr(
|
||||
pl.col('dt_diff').dt,
|
||||
gap_dt_unit, # NOTE: must be valid ``Expr.dt.<name>``
|
||||
)().alias(dt_gap_col)
|
||||
).filter(
|
||||
pl.col(dt_gap_col).abs() > gap_thresh
|
||||
return (
|
||||
with_dts(df)
|
||||
.filter(
|
||||
pl.col('s_diff').abs() > expect_period
|
||||
)
|
||||
.filter(
|
||||
getattr(
|
||||
pl.col('dt_diff').dt,
|
||||
gap_dt_unit,
|
||||
)().abs() > gap_thresh
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue