Add datetime col de-duplicator

distribute_dis
Tyler Goodlet 2023-12-08 14:38:27 -05:00
parent b9af6176c5
commit b6d2550f33
1 changed files with 15 additions and 0 deletions

View File

@ -263,6 +263,19 @@ def with_dts(
# ) # )
def dedup_dt(
df: pl.DataFrame,
) -> pl.DataFrame:
'''
Drop duplicate date-time rows (normally from an OHLC frame).
'''
return df.unique(
subset=['dt'],
maintain_order=True,
)
def detect_time_gaps( def detect_time_gaps(
df: pl.DataFrame, df: pl.DataFrame,
@ -294,10 +307,12 @@ def detect_time_gaps(
''' '''
return ( return (
with_dts(df) with_dts(df)
# First by a seconds unit step size
.filter( .filter(
pl.col('s_diff').abs() > expect_period pl.col('s_diff').abs() > expect_period
) )
.filter( .filter(
# Second by an arbitrary dt-unit step size
getattr( getattr(
pl.col('dt_diff').dt, pl.col('dt_diff').dt,
gap_dt_unit, gap_dt_unit,