Drop the `gap_dt_unit: str` column

We don't need it in `detect_time_gaps()` since doing straight up
datetime diffs in `polars` already has a humanized `str` representation
but with higher precision like '2d 1h 24m 1s' B)
account_tests
Tyler Goodlet 2023-07-26 15:37:59 -04:00
parent f1289ccce2
commit 2c6ae5d994
1 changed files with 20 additions and 15 deletions

View File

@ -18,7 +18,8 @@
Financial time series processing utilities usually
pertaining to OHLCV style sampled data.
Routines are generally implemented in either ``numpy`` or ``polars`` B)
Routines are generally implemented in either ``numpy`` or
``polars`` B)
'''
from __future__ import annotations
@ -269,9 +270,14 @@ def detect_time_gaps(
# gap_dt_unit: t_unit = 'minutes',
# gap_thresh: int = 1,
# legacy stock mkts
# NOTE: legacy stock mkts have venue operating hours
# and thus gaps normally no more then 1-2 days at
# a time.
# XXX -> must be valid ``polars.Expr.dt.<name>``
# TODO: allow passing in a frame of operating hours
# durations/ranges for faster legit gap checks.
gap_dt_unit: t_unit = 'days',
gap_thresh: int = 2,
gap_thresh: int = 1,
) -> pl.DataFrame:
'''
@ -281,18 +287,17 @@ def detect_time_gaps(
actual missing data segments.
'''
dt_gap_col: str = f'{gap_dt_unit}_diff'
return with_dts(
df
).filter(
return (
with_dts(df)
.filter(
pl.col('s_diff').abs() > expect_period
).with_columns(
)
.filter(
getattr(
pl.col('dt_diff').dt,
gap_dt_unit, # NOTE: must be valid ``Expr.dt.<name>``
)().alias(dt_gap_col)
).filter(
pl.col(dt_gap_col).abs() > gap_thresh
gap_dt_unit,
)().abs() > gap_thresh
)
)