Add `store ldshm` subcmd

Changed from the old `store clone` to instead simply load any shm buffer
matching a user provided `FQME: str` pattern; writing to parquet file is
only done if an explicit option flag is passed by user.

Implement new `iter_dfs_from_shms()` generator which allows interatively
loading both 1m and 1s buffers delivering the `Path`, `ShmArray` and
`polars.DataFrame` instances per matching file B)

Also add a todo for a `NativeStorageClient.clear_range()` method.
basic_buy_bot
Tyler Goodlet 2023-06-19 14:29:05 -04:00
parent 58c096bfad
commit d704d631ba
2 changed files with 164 additions and 77 deletions

View File

@ -20,10 +20,13 @@ Storage middle-ware CLIs.
""" """
from __future__ import annotations from __future__ import annotations
from pathlib import Path from pathlib import Path
import time
from typing import Generator
# from typing import TYPE_CHECKING # from typing import TYPE_CHECKING
import polars as pl import polars as pl
import numpy as np import numpy as np
import tractor
# import pendulum # import pendulum
from rich.console import Console from rich.console import Console
import trio import trio
@ -32,6 +35,16 @@ import typer
from piker.service import open_piker_runtime from piker.service import open_piker_runtime
from piker.cli import cli from piker.cli import cli
from piker.config import get_conf_dir
from piker.data import (
maybe_open_shm_array,
def_iohlcv_fields,
ShmArray,
)
from piker.data.history import (
_default_hist_size,
_default_rt_size,
)
from . import ( from . import (
log, log,
) )
@ -132,8 +145,6 @@ def anal(
) -> np.ndarray: ) -> np.ndarray:
import tractor
async def main(): async def main():
async with ( async with (
open_piker_runtime( open_piker_runtime(
@ -171,37 +182,49 @@ def anal(
trio.run(main) trio.run(main)
@store.command() def iter_dfs_from_shms(fqme: str) -> Generator[
def clone( tuple[Path, ShmArray, pl.DataFrame],
fqme: str, None,
) -> None: None,
import time ]:
from piker.config import get_conf_dir # shm buffer size table based on known sample rates
from piker.data import ( sizes: dict[str, int] = {
maybe_open_shm_array, 'hist': _default_hist_size,
def_iohlcv_fields, 'rt': _default_rt_size,
) }
import polars as pl
# TODO: actually look up an existing shm buf (set) from # load all detected shm buffer files which have the
# an fqme and file name parsing.. # passed FQME pattern in the file name.
# open existing shm buffer for kucoin backend shmfiles: list[Path] = []
key: str = 'piker.brokerd[3595d316-3c15-46].xmrusdt.kucoin.hist' shmdir = Path('/dev/shm/')
shmpath: Path = Path('/dev/shm') / key
assert shmpath.is_file()
async def main(): for shmfile in shmdir.glob(f'*{fqme}*'):
async with ( filename: str = shmfile.name
open_piker_runtime(
'polars_boi', # skip index files
enable_modules=['piker.data._sharedmem'], if (
), '_first' in filename
or '_last' in filename
): ):
continue
assert shmfile.is_file()
log.debug(f'Found matching shm buffer file: {filename}')
shmfiles.append(shmfile)
for shmfile in shmfiles:
# lookup array buffer size based on file suffix
# being either .rt or .hist
size: int = sizes[shmfile.name.rsplit('.')[-1]]
# attach to any shm buffer, load array into polars df, # attach to any shm buffer, load array into polars df,
# write to local parquet file. # write to local parquet file.
shm, opened = maybe_open_shm_array( shm, opened = maybe_open_shm_array(
key=key, key=shmfile.name,
size=size,
dtype=def_iohlcv_fields, dtype=def_iohlcv_fields,
readonly=True,
) )
assert not opened assert not opened
ohlcv = shm.array ohlcv = shm.array
@ -218,24 +241,60 @@ def clone(
time.time() - start, time.time() - start,
ndigits=6, ndigits=6,
) )
print( log.info(
f'numpy -> polars conversion took {delay} secs\n' f'numpy -> polars conversion took {delay} secs\n'
f'polars df: {df}' f'polars df: {df}'
) )
yield (
shmfile,
shm,
df,
)
@store.command()
def ldshm(
fqme: str,
write_parquet: bool = False,
) -> None:
'''
Linux ONLY: load any fqme file name matching shm buffer from
/dev/shm/ into an OHLCV numpy array and polars DataFrame,
optionally write to .parquet file.
'''
async def main():
async with (
open_piker_runtime(
'polars_boi',
enable_modules=['piker.data._sharedmem'],
),
):
df: pl.DataFrame | None = None
for shmfile, shm, df in iter_dfs_from_shms(fqme):
# compute ohlc properties for naming # compute ohlc properties for naming
times: np.ndarray = ohlcv['time'] times: np.ndarray = shm.array['time']
secs: float = times[-1] - times[-2] secs: float = times[-1] - times[-2]
if secs < 1.: if secs < 1.:
breakpoint() breakpoint()
raise ValueError( raise ValueError(
f'Something is wrong with time period for {shm}:\n{ohlcv}' f'Something is wrong with time period for {shm}:\n{times}'
) )
# TODO: maybe only optionally enter this depending
# on some CLI flags and/or gap detection?
await tractor.breakpoint()
# write to parquet file?
if write_parquet:
timeframe: str = f'{secs}s' timeframe: str = f'{secs}s'
# write to parquet file datadir: Path = get_conf_dir() / 'nativedb'
datadir: Path = get_conf_dir() / 'parqdb'
if not datadir.is_dir(): if not datadir.is_dir():
datadir.mkdir() datadir.mkdir()
@ -248,7 +307,7 @@ def clone(
time.time() - start, time.time() - start,
ndigits=6, ndigits=6,
) )
print( log.info(
f'parquet write took {delay} secs\n' f'parquet write took {delay} secs\n'
f'file path: {path}' f'file path: {path}'
) )
@ -265,6 +324,9 @@ def clone(
f'polars df: {read_df}' f'polars df: {read_df}'
) )
if df is None:
log.error(f'No matching shm buffers for {fqme} ?')
trio.run(main) trio.run(main)

View File

@ -137,6 +137,14 @@ def mk_ohlcv_shm_keyed_filepath(
return path return path
def unpack_fqme_from_parquet_filepath(path: Path) -> str:
filename: str = str(path.name)
fqme, fmt_descr, suffix = filename.split('.')
assert suffix == 'parquet'
return fqme
ohlc_key_map = None ohlc_key_map = None
@ -347,10 +355,27 @@ class NativeStorageClient:
path.unlink() path.unlink()
log.warning(f'Deleting parquet entry:\n{path}') log.warning(f'Deleting parquet entry:\n{path}')
else: else:
log.warning(f'No path exists:\n{path}') log.error(f'No path exists:\n{path}')
return path return path
# TODO: allow wiping and refetching a segment of the OHLCV timeseries
# data.
# def clear_range(
# self,
# key: str,
# start_dt: datetime,
# end_dt: datetime,
# timeframe: int | None = None,
# ) -> pl.DataFrame:
# '''
# Clear and re-fetch a range of datums for the OHLCV time series.
# Useful for series editing from a chart B)
# '''
# ...
@acm @acm
async def get_client( async def get_client(