Add `store ldshm` subcmd

Changed from the old `store clone` to instead simply load any shm buffer matching a user provided `FQME: str` pattern; writing to parquet file is only done if an explicit option flag is passed by user. Implement new `iter_dfs_from_shms()` generator which allows interatively loading both 1m and 1s buffers delivering the `Path`, `ShmArray` and `polars.DataFrame` instances per matching file B) Also add a todo for a `NativeStorageClient.clear_range()` method.
2023-06-19 14:29:05 -04:00 · 2023-06-19 14:29:05 -04:00 · d704d631ba
parent 58c096bfad
commit d704d631ba
2 changed files with 164 additions and 77 deletions
--- a/piker/storage/cli.py
+++ b/piker/storage/cli.py
@ -20,10 +20,13 @@ Storage middle-ware CLIs.
 """
 from __future__ import annotations
 from pathlib import Path
+import time
+from typing import Generator
 # from typing import TYPE_CHECKING

 import polars as pl
 import numpy as np
+import tractor
 # import pendulum
 from rich.console import Console
 import trio
@ -32,6 +35,16 @@ import typer

 from piker.service import open_piker_runtime
 from piker.cli import cli
+from piker.config import get_conf_dir
+from piker.data import (
+    maybe_open_shm_array,
+    def_iohlcv_fields,
+    ShmArray,
+)
+from piker.data.history import (
+    _default_hist_size,
+    _default_rt_size,
+)
 from . import (
    log,
 )
@ -132,8 +145,6 @@ def anal(

 ) -> np.ndarray:

-    import tractor
-
    async def main():
        async with (
            open_piker_runtime(
@ -171,37 +182,49 @@ def anal(
    trio.run(main)


-@store.command()
-def clone(
-    fqme: str,
-) -> None:
-    import time
-    from piker.config import get_conf_dir
-    from piker.data import (
-        maybe_open_shm_array,
-        def_iohlcv_fields,
-    )
-    import polars as pl
+def iter_dfs_from_shms(fqme: str) -> Generator[
+    tuple[Path, ShmArray, pl.DataFrame],
+    None,
+    None,
+]:
+    # shm buffer size table based on known sample rates
+    sizes: dict[str, int] = {
+        'hist': _default_hist_size,
+        'rt': _default_rt_size,
+    }

-    # TODO: actually look up an existing shm buf (set) from
-    # an fqme and file name parsing..
-    # open existing shm buffer for kucoin backend
-    key: str = 'piker.brokerd[3595d316-3c15-46].xmrusdt.kucoin.hist'
-    shmpath: Path = Path('/dev/shm') / key
-    assert shmpath.is_file()
+    # load all detected shm buffer files which have the
+    # passed FQME pattern in the file name.
+    shmfiles: list[Path] = []
+    shmdir = Path('/dev/shm/')

-    async def main():
-        async with (
-            open_piker_runtime(
-                'polars_boi',
-                enable_modules=['piker.data._sharedmem'],
-            ),
+    for shmfile in shmdir.glob(f'*{fqme}*'):
+        filename: str = shmfile.name
+
+        # skip index files
+        if (
+            '_first' in filename
+            or '_last' in filename
        ):
+            continue
+
+        assert shmfile.is_file()
+        log.debug(f'Found matching shm buffer file: {filename}')
+        shmfiles.append(shmfile)
+
+    for shmfile in shmfiles:
+
+        # lookup array buffer size based on file suffix
+        # being either .rt or .hist
+        size: int = sizes[shmfile.name.rsplit('.')[-1]]
+
        # attach to any shm buffer, load array into polars df,
        # write to local parquet file.
        shm, opened = maybe_open_shm_array(
-                key=key,
+            key=shmfile.name,
+            size=size,
            dtype=def_iohlcv_fields,
+            readonly=True,
        )
        assert not opened
        ohlcv = shm.array
@ -218,24 +241,60 @@ def clone(
            time.time() - start,
            ndigits=6,
        )
-            print(
+        log.info(
            f'numpy -> polars conversion took {delay} secs\n'
            f'polars df: {df}'
        )

+        yield (
+            shmfile,
+            shm,
+            df,
+        )
+
+
+@store.command()
+def ldshm(
+    fqme: str,
+
+    write_parquet: bool = False,
+
+) -> None:
+    '''
+    Linux ONLY: load any fqme file name matching shm buffer from
+    /dev/shm/ into an OHLCV numpy array and polars DataFrame,
+    optionally write to .parquet file.
+
+    '''
+    async def main():
+        async with (
+            open_piker_runtime(
+                'polars_boi',
+                enable_modules=['piker.data._sharedmem'],
+            ),
+        ):
+
+            df: pl.DataFrame | None = None
+            for shmfile, shm, df in iter_dfs_from_shms(fqme):
+
                # compute ohlc properties for naming
-            times: np.ndarray = ohlcv['time']
+                times: np.ndarray = shm.array['time']
                secs: float = times[-1] - times[-2]
                if secs < 1.:
                    breakpoint()
                    raise ValueError(
-                    f'Something is wrong with time period for {shm}:\n{ohlcv}'
+                        f'Something is wrong with time period for {shm}:\n{times}'
                    )

+                # TODO: maybe only optionally enter this depending
+                # on some CLI flags and/or gap detection?
+                await tractor.breakpoint()
+
+                # write to parquet file?
+                if write_parquet:
                    timeframe: str = f'{secs}s'

-            # write to parquet file
-            datadir: Path = get_conf_dir() / 'parqdb'
+                    datadir: Path = get_conf_dir() / 'nativedb'
                    if not datadir.is_dir():
                        datadir.mkdir()

@ -248,7 +307,7 @@ def clone(
                        time.time() - start,
                        ndigits=6,
                    )
-            print(
+                    log.info(
                        f'parquet write took {delay} secs\n'
                        f'file path: {path}'
                    )
@ -265,6 +324,9 @@ def clone(
                        f'polars df: {read_df}'
                    )

+            if df is None:
+                log.error(f'No matching shm buffers for {fqme} ?')
+
    trio.run(main)


--- a/piker/storage/nativedb.py
+++ b/piker/storage/nativedb.py
@ -137,6 +137,14 @@ def mk_ohlcv_shm_keyed_filepath(
    return path


+def unpack_fqme_from_parquet_filepath(path: Path) -> str:
+
+    filename: str = str(path.name)
+    fqme, fmt_descr, suffix = filename.split('.')
+    assert suffix == 'parquet'
+    return fqme
+
+
 ohlc_key_map = None


@ -347,10 +355,27 @@ class NativeStorageClient:
            path.unlink()
            log.warning(f'Deleting parquet entry:\n{path}')
        else:
-            log.warning(f'No path exists:\n{path}')
+            log.error(f'No path exists:\n{path}')

        return path

+    # TODO: allow wiping and refetching a segment of the OHLCV timeseries
+    # data.
+    # def clear_range(
+    #     self,
+    #     key: str,
+    #     start_dt: datetime,
+    #     end_dt: datetime,
+    #     timeframe: int | None = None,
+    # ) -> pl.DataFrame:
+    #     '''
+    #     Clear and re-fetch a range of datums for the OHLCV time series.
+
+    #     Useful for series editing from a chart B)
+
+    #     '''
+    #     ...
+

@acm
 async def get_client(