Add gap detection into the `store ldshm` cmd

2023-07-26 15:45:55 -04:00 · 2023-07-26 15:45:55 -04:00 · 385561276b
parent d94ab9d5b2
commit 385561276b
1 changed files with 21 additions and 7 deletions
--- a/piker/storage/cli.py
+++ b/piker/storage/cli.py
@ -150,6 +150,7 @@ def anal(
            open_piker_runtime(
                'tsdb_polars_anal',
                # enable_modules=['piker.service._ahab']
                debug_mode=True,
            ),
            open_storage_client() as (mod, client),
        ):
@ -168,10 +169,10 @@ def anal(
            src_df = await client.as_df(fqme, period)
            from piker.data import _timeseries as tsmod
-            df = tsmod.with_dts(src_df)
+            df: pl.DataFrame = tsmod.with_dts(src_df)
            gaps: pl.DataFrame = tsmod.detect_time_gaps(df)
-            if gaps:
+            if not gaps.is_empty():
                print(f'Gaps found:\n{gaps}')
            # TODO: something better with tab completion..
@ -216,7 +217,13 @@ def iter_dfs_from_shms(fqme: str) -> Generator[
        # lookup array buffer size based on file suffix
        # being either .rt or .hist
-        size: int = sizes[shmfile.name.rsplit('.')[-1]]
+        key: str = shmfile.name.rsplit('.')[-1]
        # skip FSP buffers for now..
        if key not in sizes:
            continue
        size: int = sizes[key]
        # attach to any shm buffer, load array into polars df,
        # write to local parquet file.
@ -271,23 +278,30 @@ def ldshm(
            open_piker_runtime(
                'polars_boi',
                enable_modules=['piker.data._sharedmem'],
                debug_mode=True,
            ),
        ):
            df: pl.DataFrame | None = None
-            for shmfile, shm, df in iter_dfs_from_shms(fqme):
+            for shmfile, shm, src_df in iter_dfs_from_shms(fqme):
                # compute ohlc properties for naming
                times: np.ndarray = shm.array['time']
                secs: float = times[-1] - times[-2]
                if secs < 1.:
                    breakpoint()
                    raise ValueError(
                        f'Something is wrong with time period for {shm}:\n{times}'
                    )
                from piker.data import _timeseries as tsmod
                df: pl.DataFrame = tsmod.with_dts(src_df)
                gaps: pl.DataFrame = tsmod.detect_time_gaps(df)
                # TODO: maybe only optionally enter this depending
                # on some CLI flags and/or gap detection?
                if (
                    not gaps.is_empty()
                    or secs > 2
                ):
                    await tractor.pause()
                # write to parquet file?