From 5702e422d8c2a7efad2227c5cc574db9db05f3aa Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 28 Dec 2023 10:40:08 -0500 Subject: [PATCH] Drop gap detection from `dedupe()`, expect caller to handle it --- piker/tsp/_anal.py | 41 ++++++++--------------------------------- 1 file changed, 8 insertions(+), 33 deletions(-) diff --git a/piker/tsp/_anal.py b/piker/tsp/_anal.py index 7ffdef2a..c34a0c3a 100644 --- a/piker/tsp/_anal.py +++ b/piker/tsp/_anal.py @@ -380,10 +380,6 @@ def get_null_segs( None, # backfilled on next iter ]) - # row = zero_t[fi] - # absi_pre_zseg = row['index'][0] - 1 - # absi_pre_zseg = absi - 1 - # final iter case, backfill FINAL end iabs! if (i + 1) == fi_zgaps.size: absi_zsegs[-1][1] = absi_zeros[-1] + 1 @@ -623,8 +619,9 @@ def detect_price_gaps( def dedupe( src_df: pl.DataFrame, - sort: bool = True, + time_gaps: pl.DataFrame | None = None, + sort: bool = True, period: float = 60, ) -> tuple[ @@ -641,49 +638,27 @@ def dedupe( ''' wdts: pl.DataFrame = with_dts(src_df) - src_gaps: pl.DataFrame = detect_time_gaps( - wdts, - expect_period=period, - gap_dt_unit=None if period < 60 else 'days', - ) - # if no gaps detected just return carbon copies - # and no len diff. - if src_gaps.is_empty(): - return ( - wdts, - src_gaps, - wdts, - 0, - ) + # maybe sort on any time field + if sort: + wdts = wdts.sort(by='time') + # TODO: detect out-of-order segments which were corrected! + # -[ ] report in log msg + # -[ ] possibly return segment sections which were moved? # remove duplicated datetime samples/sections deduped: pl.DataFrame = wdts.unique( subset=['dt'], maintain_order=True, ) - if sort: - deduped = deduped.sort(by='time') - - deduped_gaps: pl.DataFrame = detect_time_gaps( - deduped, - expect_period=period, - gap_dt_unit=None if period < 60 else 'days', - ) diff: int = ( wdts.height - deduped.height ) - log.warning( - f'TIME GAPs FOUND:\n' - # f'{gaps}\n' - f'deduped Gaps found:\n{deduped_gaps}' - ) return ( wdts, - deduped_gaps, deduped, diff, )