From b6d2550f33ec734bc65e292c5bd2d7ce86470a90 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 8 Dec 2023 14:38:27 -0500 Subject: [PATCH] Add datetime col de-duplicator --- piker/data/_timeseries.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/piker/data/_timeseries.py b/piker/data/_timeseries.py index cc84f763..6da534b4 100644 --- a/piker/data/_timeseries.py +++ b/piker/data/_timeseries.py @@ -263,6 +263,19 @@ def with_dts( # ) +def dedup_dt( + df: pl.DataFrame, +) -> pl.DataFrame: + ''' + Drop duplicate date-time rows (normally from an OHLC frame). + + ''' + return df.unique( + subset=['dt'], + maintain_order=True, + ) + + def detect_time_gaps( df: pl.DataFrame, @@ -294,10 +307,12 @@ def detect_time_gaps( ''' return ( with_dts(df) + # First by a seconds unit step size .filter( pl.col('s_diff').abs() > expect_period ) .filter( + # Second by an arbitrary dt-unit step size getattr( pl.col('dt_diff').dt, gap_dt_unit,