From a86573b5a20a54bac7c45e2aac6e96365d675f47 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Tue, 26 Dec 2023 17:14:26 -0500
Subject: [PATCH] Fix .parquet filenaming..

Apparently `.storage.nativedb.mk_ohlcv_shm_keyed_filepath()` was always
kinda broken if you passed in a `period: float` with an actual non-`int`
to the format string? Fixed it to strictly cast to `int()` before
str-ifying so that you don't get weird `60.0s.parquet` in there..

Further this rejigs the `sotre ldshm` gap correction-annotation loop to,
- use `StorageClient.write_ohlcv()` instead of hackily re-implementing
  it.. now that problem from above is fixed!
- use a `needs_correction: bool` var to determine if gap markup and
  de-duplictated data should be pushed to the shm buffer,
- go back to using `AnnotCtl.add_rect()` for all detected gaps such that
  they all persist (and thus are shown together) until the client
  disconnects.
---
 piker/storage/cli.py      | 248 ++++++++++++++++++++++----------------
 piker/storage/nativedb.py |  13 +-
 2 files changed, 151 insertions(+), 110 deletions(-)

diff --git a/piker/storage/cli.py b/piker/storage/cli.py
index 1d998558..8c6d67ea 100644
--- a/piker/storage/cli.py
+++ b/piker/storage/cli.py
@@ -20,8 +20,12 @@ Storage middle-ware CLIs.
 """
 from __future__ import annotations
 # from datetime import datetime
+# from contextlib import (
+#     AsyncExitStack,
+# )
 from pathlib import Path
 import time
+from types import ModuleType
 
 import polars as pl
 import numpy as np
@@ -34,7 +38,6 @@ import typer
 
 from piker.service import open_piker_runtime
 from piker.cli import cli
-from piker.config import get_conf_dir
 from piker.data import (
     ShmArray,
 )
@@ -45,6 +48,7 @@ from . import (
 from . import (
     __tsdbs__,
     open_storage_client,
+    StorageClient,
 )
 
 
@@ -232,7 +236,8 @@ def anal(
 @store.command()
 def ldshm(
     fqme: str,
-    write_parquet: bool = False,
+    write_parquet: bool = True,
+    reload_parquet_to_shm: bool = True,
 
 ) -> None:
     '''
@@ -242,15 +247,32 @@ def ldshm(
 
     '''
     async def main():
+        from piker.ui._remote_ctl import (
+            open_annot_ctl,
+            AnnotCtl,
+        )
+        actl: AnnotCtl
+        mod: ModuleType
+        client: StorageClient
         async with (
             open_piker_runtime(
                 'polars_boi',
                 enable_modules=['piker.data._sharedmem'],
                 debug_mode=True,
             ),
+            open_storage_client() as (
+                mod,
+                client,
+            ),
+            open_annot_ctl() as actl,
         ):
-            df: pl.DataFrame | None = None
-            for shmfile, shm, shm_df in tsp.iter_dfs_from_shms(fqme):
+            shm_df: pl.DataFrame | None = None
+            for (
+                shmfile,
+                shm,
+                # parquet_path,
+                shm_df,
+            ) in tsp.iter_dfs_from_shms(fqme):
 
                 # compute ohlc properties for naming
                 times: np.ndarray = shm.array['time']
@@ -275,122 +297,136 @@ def ldshm(
                     period=period_s,
                 )
 
-                # TODO: maybe only optionally enter this depending
-                # on some CLI flags and/or gap detection?
-                if (
+                needs_correction: bool = (
                     not gaps.is_empty()
                     or null_segs
-                ):
-                    from piker.ui._remote_ctl import (
-                        open_annot_ctl,
-                        AnnotCtl,
-                    )
-                    annot_ctl: AnnotCtl
-                    async with open_annot_ctl() as annot_ctl:
-                        for i in range(gaps.height):
+                )
+                # TODO: maybe only optionally enter this depending
+                # on some CLI flags and/or gap detection?
+                if needs_correction:
+                    for i in range(gaps.height):
+                        row: pl.DataFrame = gaps[i]
 
-                            row: pl.DataFrame = gaps[i]
+                        # TODO: can we eventually remove this
+                        # once we figure out why the epoch cols
+                        # don't match?
+                        iend: int = row['index'][0]
+                        # dt: datetime = row['dt'][0]
+                        # dt_prev: datetime = row['dt_prev'][0]
 
-                            # TODO: can we eventually remove this
-                            # once we figure out why the epoch cols
-                            # don't match?
-                            iend: int = row['index'][0]
-                            # dt: datetime = row['dt'][0]
-                            # dt_prev: datetime = row['dt_prev'][0]
+                        # the gap's right-most bar's OPEN value
+                        # at that time (sample) step.
+                        # dt_end_t: float = dt.timestamp()
 
-                            # the gap's right-most bar's OPEN value
-                            # at that time (sample) step.
-                            # dt_end_t: float = dt.timestamp()
+                        # TODO: FIX HOW/WHY these aren't matching
+                        # and are instead off by 4hours (EST
+                        # vs. UTC?!?!)
+                        # end_t: float = row['time']
+                        # assert (
+                        #     dt.timestamp()
+                        #     ==
+                        #     end_t
+                        # )
 
-                            # TODO: FIX HOW/WHY these aren't matching
-                            # and are instead off by 4hours (EST
-                            # vs. UTC?!?!)
-                            # end_t: float = row['time']
-                            # assert (
-                            #     dt.timestamp()
-                            #     ==
-                            #     end_t
-                            # )
+                        # the gap's left-most bar's CLOSE value
+                        # at that time (sample) step.
+                        prev_r: pl.DataFrame = df.filter(
+                            pl.col('index') == iend - 1
+                        )
+                        istart: int = prev_r['index'][0]
+                        # dt_start_t: float = dt_prev.timestamp()
 
-                            # the gap's left-most bar's CLOSE value
-                            # at that time (sample) step.
+                        # start_t: float = prev_r['time']
+                        # assert (
+                        #     dt_start_t
+                        #     ==
+                        #     start_t
+                        # )
 
-                            prev_r: pl.DataFrame = df.filter(
-                                pl.col('index') == gaps[0]['index'] - 1
+                        # TODO: implement px-col width measure
+                        # and ensure at least as many px-cols
+                        # shown per rect as configured by user.
+                        gap_w: float = abs((iend - istart))
+                        if gap_w < 6:
+                            margin: float = 6
+                            iend += margin
+                            istart -= margin
+
+                        ro: tuple[float, float] = (
+                            # dt_end_t,
+                            iend,
+                            row['open'][0],
+                        )
+                        lc: tuple[float, float] = (
+                            # dt_start_t,
+                            istart,
+                            prev_r['close'][0],
+                        )
+
+                        # async with actl.open_rect(
+                        # ) as aid:
+                        aid: int = await actl.add_rect(
+                            fqme=fqme,
+                            timeframe=period_s,
+                            start_pos=lc,
+                            end_pos=ro,
+                        )
+                        assert aid
+
+                    # write to parquet file?
+                    if (
+                        write_parquet
+                    ):
+                        # write to fs
+                        start = time.time()
+                        path: Path = await client.write_ohlcv(
+                            fqme,
+                            ohlcv=deduped,
+                            timeframe=period_s,
+                        )
+                        write_delay: float = round(
+                            time.time() - start,
+                            ndigits=6,
+                        )
+
+                        # read back from fs
+                        start = time.time()
+                        read_df: pl.DataFrame = pl.read_parquet(path)
+                        read_delay: float = round(
+                            time.time() - start,
+                            ndigits=6,
+                        )
+                        log.info(
+                            f'parquet write took {write_delay} secs\n'
+                            f'file path: {path}'
+                            f'parquet read took {read_delay} secs\n'
+                            f'polars df: {read_df}'
+                        )
+
+                        if reload_parquet_to_shm:
+                            new = tsp.pl2np(
+                                deduped,
+                                dtype=shm.array.dtype,
                             )
-                            istart: int = prev_r['index'][0]
-                            # dt_start_t: float = dt_prev.timestamp()
-
-                            # start_t: float = prev_r['time']
-                            # assert (
-                            #     dt_start_t
-                            #     ==
-                            #     start_t
-                            # )
-
-                            # TODO: implement px-col width measure
-                            # and ensure at least as many px-cols
-                            # shown per rect as configured by user.
-                            gap_w: float = abs((iend - istart))
-                            # await tractor.pause()
-                            if gap_w < 6:
-                                margin: float = 6
-                                iend += margin
-                                istart -= margin
-
-                            ro: tuple[float, float] = (
-                                # dt_end_t,
-                                iend,
-                                row['open'][0],
+                            # since normally readonly
+                            shm._array.setflags(
+                                write=int(1),
                             )
-                            lc: tuple[float, float] = (
-                                # dt_start_t,
-                                istart,
-                                prev_r['close'][0],
+                            shm.push(
+                                new,
+                                prepend=True,
+                                start=new['index'][-1],
+                                update_first=False,  # don't update ._first
                             )
 
-                            aid: int = await annot_ctl.add_rect(
-                                fqme=fqme,
-                                timeframe=period_s,
-                                start_pos=lc,
-                                end_pos=ro,
-                            )
-                            assert aid
-                            await tractor.pause()
+                    await tractor.pause()
+                    assert diff
 
-                # write to parquet file?
-                if write_parquet:
-                    timeframe: str = f'{period_s}s'
+                else:
+                    # allow interaction even when no ts problems.
+                    await tractor.pause()
+                    assert not diff
 
-                    datadir: Path = get_conf_dir() / 'nativedb'
-                    if not datadir.is_dir():
-                        datadir.mkdir()
-
-                    path: Path = datadir / f'{fqme}.{timeframe}.parquet'
-
-                    # write to fs
-                    start = time.time()
-                    df.write_parquet(path)
-                    delay: float = round(
-                        time.time() - start,
-                        ndigits=6,
-                    )
-                    log.info(
-                        f'parquet write took {delay} secs\n'
-                        f'file path: {path}'
-                    )
-
-                    # read back from fs
-                    start = time.time()
-                    read_df: pl.DataFrame = pl.read_parquet(path)
-                    delay: float = round(
-                        time.time() - start,
-                        ndigits=6,
-                    )
-                    print(
-                        f'parquet read took {delay} secs\n'
-                        f'polars df: {read_df}'
-                    )
 
             if df is None:
                 log.error(f'No matching shm buffers for {fqme} ?')
diff --git a/piker/storage/nativedb.py b/piker/storage/nativedb.py
index 7d64cb6e..bc7f10e3 100644
--- a/piker/storage/nativedb.py
+++ b/piker/storage/nativedb.py
@@ -95,16 +95,19 @@ def detect_period(shm: ShmArray) -> float:
 
 def mk_ohlcv_shm_keyed_filepath(
     fqme: str,
-    period: float,  # ow known as the "timeframe"
+    period: float | int,  # ow known as the "timeframe"
     datadir: Path,
 
-) -> str:
+) -> Path:
 
     if period < 1.:
         raise ValueError('Sample period should be >= 1.!?')
 
-    period_s: str = f'{period}s'
-    path: Path = datadir / f'{fqme}.ohlcv{period_s}.parquet'
+    path: Path = (
+        datadir
+        /
+        f'{fqme}.ohlcv{int(period)}s.parquet'
+    )
     return path
 
 
@@ -227,6 +230,7 @@ class NativeStorageClient:
         self,
         fqme: str,
         period: float,
+
     ) -> Path:
         return mk_ohlcv_shm_keyed_filepath(
             fqme=fqme,
@@ -239,6 +243,7 @@ class NativeStorageClient:
         fqme: str,
         df: pl.DataFrame,
         timeframe: float,
+
     ) -> None:
         # cache df for later usage since we (currently) need to
         # convert to np.ndarrays to push to our `ShmArray` rt