Add datetime col de-duplicator

distribute_dis
Tyler Goodlet 2023-12-08 14:38:27 -05:00
parent b9af6176c5
commit b6d2550f33
1 changed files with 15 additions and 0 deletions

View File

@ -263,6 +263,19 @@ def with_dts(
# )
def dedup_dt(
df: pl.DataFrame,
) -> pl.DataFrame:
'''
Drop duplicate date-time rows (normally from an OHLC frame).
'''
return df.unique(
subset=['dt'],
maintain_order=True,
)
def detect_time_gaps(
df: pl.DataFrame,
@ -294,10 +307,12 @@ def detect_time_gaps(
'''
return (
with_dts(df)
# First by a seconds unit step size
.filter(
pl.col('s_diff').abs() > expect_period
)
.filter(
# Second by an arbitrary dt-unit step size
getattr(
pl.col('dt_diff').dt,
gap_dt_unit,