Drop the `gap_dt_unit: str` column

We don't need it in `detect_time_gaps()` since doing straight up
datetime diffs in `polars` already has a humanized `str` representation
but with higher precision like '2d 1h 24m 1s' B)
account_tests
Tyler Goodlet 2023-07-26 15:37:59 -04:00
parent f1289ccce2
commit 2c6ae5d994
1 changed files with 20 additions and 15 deletions

View File

@ -18,7 +18,8 @@
Financial time series processing utilities usually Financial time series processing utilities usually
pertaining to OHLCV style sampled data. pertaining to OHLCV style sampled data.
Routines are generally implemented in either ``numpy`` or ``polars`` B) Routines are generally implemented in either ``numpy`` or
``polars`` B)
''' '''
from __future__ import annotations from __future__ import annotations
@ -269,9 +270,14 @@ def detect_time_gaps(
# gap_dt_unit: t_unit = 'minutes', # gap_dt_unit: t_unit = 'minutes',
# gap_thresh: int = 1, # gap_thresh: int = 1,
# legacy stock mkts # NOTE: legacy stock mkts have venue operating hours
# and thus gaps normally no more then 1-2 days at
# a time.
# XXX -> must be valid ``polars.Expr.dt.<name>``
# TODO: allow passing in a frame of operating hours
# durations/ranges for faster legit gap checks.
gap_dt_unit: t_unit = 'days', gap_dt_unit: t_unit = 'days',
gap_thresh: int = 2, gap_thresh: int = 1,
) -> pl.DataFrame: ) -> pl.DataFrame:
''' '''
@ -281,18 +287,17 @@ def detect_time_gaps(
actual missing data segments. actual missing data segments.
''' '''
dt_gap_col: str = f'{gap_dt_unit}_diff' return (
return with_dts( with_dts(df)
df .filter(
).filter(
pl.col('s_diff').abs() > expect_period pl.col('s_diff').abs() > expect_period
).with_columns( )
.filter(
getattr( getattr(
pl.col('dt_diff').dt, pl.col('dt_diff').dt,
gap_dt_unit, # NOTE: must be valid ``Expr.dt.<name>`` gap_dt_unit,
)().alias(dt_gap_col) )().abs() > gap_thresh
).filter( )
pl.col(dt_gap_col).abs() > gap_thresh
) )