From 2c6ae5d994e6caaea9724b1d28208994c19f4373 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 26 Jul 2023 15:37:59 -0400 Subject: [PATCH] Drop the `gap_dt_unit: str` column We don't need it in `detect_time_gaps()` since doing straight up datetime diffs in `polars` already has a humanized `str` representation but with higher precision like '2d 1h 24m 1s' B) --- piker/data/_timeseries.py | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/piker/data/_timeseries.py b/piker/data/_timeseries.py index 2d73c263..7055854b 100644 --- a/piker/data/_timeseries.py +++ b/piker/data/_timeseries.py @@ -18,7 +18,8 @@ Financial time series processing utilities usually pertaining to OHLCV style sampled data. -Routines are generally implemented in either ``numpy`` or ``polars`` B) +Routines are generally implemented in either ``numpy`` or +``polars`` B) ''' from __future__ import annotations @@ -269,9 +270,14 @@ def detect_time_gaps( # gap_dt_unit: t_unit = 'minutes', # gap_thresh: int = 1, - # legacy stock mkts + # NOTE: legacy stock mkts have venue operating hours + # and thus gaps normally no more then 1-2 days at + # a time. + # XXX -> must be valid ``polars.Expr.dt.`` + # TODO: allow passing in a frame of operating hours + # durations/ranges for faster legit gap checks. gap_dt_unit: t_unit = 'days', - gap_thresh: int = 2, + gap_thresh: int = 1, ) -> pl.DataFrame: ''' @@ -281,18 +287,17 @@ def detect_time_gaps( actual missing data segments. ''' - dt_gap_col: str = f'{gap_dt_unit}_diff' - return with_dts( - df - ).filter( - pl.col('s_diff').abs() > expect_period - ).with_columns( - getattr( - pl.col('dt_diff').dt, - gap_dt_unit, # NOTE: must be valid ``Expr.dt.`` - )().alias(dt_gap_col) - ).filter( - pl.col(dt_gap_col).abs() > gap_thresh + return ( + with_dts(df) + .filter( + pl.col('s_diff').abs() > expect_period + ) + .filter( + getattr( + pl.col('dt_diff').dt, + gap_dt_unit, + )().abs() > gap_thresh + ) )