Add `store ldshm` subcmd

Changed from the old `store clone` to instead simply load any shm buffer matching a user provided `FQME: str` pattern; writing to parquet file is only done if an explicit option flag is passed by user. Implement new `iter_dfs_from_shms()` generator which allows interatively loading both 1m and 1s buffers delivering the `Path`, `ShmArray` and `polars.DataFrame` instances per matching file B) Also add a todo for a `NativeStorageClient.clear_range()` method.
2023-06-19 14:29:05 -04:00 · 2023-06-19 14:29:05 -04:00 · d704d631ba
parent 58c096bfad
commit d704d631ba
2 changed files with 164 additions and 77 deletions
--- a/piker/storage/cli.py
+++ b/piker/storage/cli.py
@ -20,10 +20,13 @@ Storage middle-ware CLIs.
 """
 from __future__ import annotations
 from pathlib import Path
 import time
 from typing import Generator
 # from typing import TYPE_CHECKING
 import polars as pl
 import numpy as np
 import tractor
 # import pendulum
 from rich.console import Console
 import trio
@ -32,6 +35,16 @@ import typer
 from piker.service import open_piker_runtime
 from piker.cli import cli
 from piker.config import get_conf_dir
 from piker.data import (
    maybe_open_shm_array,
    def_iohlcv_fields,
    ShmArray,
 )
 from piker.data.history import (
    _default_hist_size,
    _default_rt_size,
 )
 from . import (
    log,
 )
@ -132,8 +145,6 @@ def anal(
 ) -> np.ndarray:
    import tractor
    async def main():
        async with (
            open_piker_runtime(
@ -171,37 +182,49 @@ def anal(
    trio.run(main)
-@store.command()
+def iter_dfs_from_shms(fqme: str) -> Generator[
-def clone(
+    tuple[Path, ShmArray, pl.DataFrame],
-    fqme: str,
+    None,
-) -> None:
+    None,
-    import time
+]:
-    from piker.config import get_conf_dir
+    # shm buffer size table based on known sample rates
-    from piker.data import (
+    sizes: dict[str, int] = {
-        maybe_open_shm_array,
+        'hist': _default_hist_size,
-        def_iohlcv_fields,
+        'rt': _default_rt_size,
-    )
+    }
    import polars as pl
-    # TODO: actually look up an existing shm buf (set) from
+    # load all detected shm buffer files which have the
-    # an fqme and file name parsing..
+    # passed FQME pattern in the file name.
-    # open existing shm buffer for kucoin backend
+    shmfiles: list[Path] = []
-    key: str = 'piker.brokerd[3595d316-3c15-46].xmrusdt.kucoin.hist'
+    shmdir = Path('/dev/shm/')
    shmpath: Path = Path('/dev/shm') / key
    assert shmpath.is_file()
-    async def main():
+    for shmfile in shmdir.glob(f'*{fqme}*'):
-        async with (
+        filename: str = shmfile.name
-            open_piker_runtime(
+
-                'polars_boi',
+        # skip index files
-                enable_modules=['piker.data._sharedmem'],
+        if (
-            ),
+            '_first' in filename
            or '_last' in filename
        ):
            continue
        assert shmfile.is_file()
        log.debug(f'Found matching shm buffer file: {filename}')
        shmfiles.append(shmfile)
    for shmfile in shmfiles:
        # lookup array buffer size based on file suffix
        # being either .rt or .hist
        size: int = sizes[shmfile.name.rsplit('.')[-1]]
        # attach to any shm buffer, load array into polars df,
        # write to local parquet file.
        shm, opened = maybe_open_shm_array(
-                key=key,
+            key=shmfile.name,
            size=size,
            dtype=def_iohlcv_fields,
            readonly=True,
        )
        assert not opened
        ohlcv = shm.array
@ -218,24 +241,60 @@ def clone(
            time.time() - start,
            ndigits=6,
        )
-            print(
+        log.info(
            f'numpy -> polars conversion took {delay} secs\n'
            f'polars df: {df}'
        )
        yield (
            shmfile,
            shm,
            df,
        )
@store.command()
 def ldshm(
    fqme: str,
    write_parquet: bool = False,
 ) -> None:
    '''
    Linux ONLY: load any fqme file name matching shm buffer from
    /dev/shm/ into an OHLCV numpy array and polars DataFrame,
    optionally write to .parquet file.
    '''
    async def main():
        async with (
            open_piker_runtime(
                'polars_boi',
                enable_modules=['piker.data._sharedmem'],
            ),
        ):
            df: pl.DataFrame | None = None
            for shmfile, shm, df in iter_dfs_from_shms(fqme):
                # compute ohlc properties for naming
-            times: np.ndarray = ohlcv['time']
+                times: np.ndarray = shm.array['time']
                secs: float = times[-1] - times[-2]
                if secs < 1.:
                    breakpoint()
                    raise ValueError(
-                    f'Something is wrong with time period for {shm}:\n{ohlcv}'
+                        f'Something is wrong with time period for {shm}:\n{times}'
                    )
                # TODO: maybe only optionally enter this depending
                # on some CLI flags and/or gap detection?
                await tractor.breakpoint()
                # write to parquet file?
                if write_parquet:
                    timeframe: str = f'{secs}s'
-            # write to parquet file
+                    datadir: Path = get_conf_dir() / 'nativedb'
            datadir: Path = get_conf_dir() / 'parqdb'
                    if not datadir.is_dir():
                        datadir.mkdir()
@ -248,7 +307,7 @@ def clone(
                        time.time() - start,
                        ndigits=6,
                    )
-            print(
+                    log.info(
                        f'parquet write took {delay} secs\n'
                        f'file path: {path}'
                    )
@ -265,6 +324,9 @@ def clone(
                        f'polars df: {read_df}'
                    )
            if df is None:
                log.error(f'No matching shm buffers for {fqme} ?')
    trio.run(main)
--- a/piker/storage/nativedb.py
+++ b/piker/storage/nativedb.py
@ -137,6 +137,14 @@ def mk_ohlcv_shm_keyed_filepath(
    return path
 def unpack_fqme_from_parquet_filepath(path: Path) -> str:
    filename: str = str(path.name)
    fqme, fmt_descr, suffix = filename.split('.')
    assert suffix == 'parquet'
    return fqme
 ohlc_key_map = None
@ -347,10 +355,27 @@ class NativeStorageClient:
            path.unlink()
            log.warning(f'Deleting parquet entry:\n{path}')
        else:
-            log.warning(f'No path exists:\n{path}')
+            log.error(f'No path exists:\n{path}')
        return path
    # TODO: allow wiping and refetching a segment of the OHLCV timeseries
    # data.
    # def clear_range(
    #     self,
    #     key: str,
    #     start_dt: datetime,
    #     end_dt: datetime,
    #     timeframe: int | None = None,
    # ) -> pl.DataFrame:
    #     '''
    #     Clear and re-fetch a range of datums for the OHLCV time series.
    #     Useful for series editing from a chart B)
    #     '''
    #     ...
@acm
 async def get_client(