Add `store ldshm` subcmd

Changed from the old `store clone` to instead simply load any shm buffer
matching a user provided `FQME: str` pattern; writing to parquet file is
only done if an explicit option flag is passed by user.

Implement new `iter_dfs_from_shms()` generator which allows interatively
loading both 1m and 1s buffers delivering the `Path`, `ShmArray` and
`polars.DataFrame` instances per matching file B)

Also add a todo for a `NativeStorageClient.clear_range()` method.
basic_buy_bot
Tyler Goodlet 2023-06-19 14:29:05 -04:00
parent 58c096bfad
commit d704d631ba
2 changed files with 164 additions and 77 deletions

View File

@ -20,10 +20,13 @@ Storage middle-ware CLIs.
""" """
from __future__ import annotations from __future__ import annotations
from pathlib import Path from pathlib import Path
import time
from typing import Generator
# from typing import TYPE_CHECKING # from typing import TYPE_CHECKING
import polars as pl import polars as pl
import numpy as np import numpy as np
import tractor
# import pendulum # import pendulum
from rich.console import Console from rich.console import Console
import trio import trio
@ -32,6 +35,16 @@ import typer
from piker.service import open_piker_runtime from piker.service import open_piker_runtime
from piker.cli import cli from piker.cli import cli
from piker.config import get_conf_dir
from piker.data import (
maybe_open_shm_array,
def_iohlcv_fields,
ShmArray,
)
from piker.data.history import (
_default_hist_size,
_default_rt_size,
)
from . import ( from . import (
log, log,
) )
@ -132,8 +145,6 @@ def anal(
) -> np.ndarray: ) -> np.ndarray:
import tractor
async def main(): async def main():
async with ( async with (
open_piker_runtime( open_piker_runtime(
@ -171,25 +182,90 @@ def anal(
trio.run(main) trio.run(main)
def iter_dfs_from_shms(fqme: str) -> Generator[
tuple[Path, ShmArray, pl.DataFrame],
None,
None,
]:
# shm buffer size table based on known sample rates
sizes: dict[str, int] = {
'hist': _default_hist_size,
'rt': _default_rt_size,
}
# load all detected shm buffer files which have the
# passed FQME pattern in the file name.
shmfiles: list[Path] = []
shmdir = Path('/dev/shm/')
for shmfile in shmdir.glob(f'*{fqme}*'):
filename: str = shmfile.name
# skip index files
if (
'_first' in filename
or '_last' in filename
):
continue
assert shmfile.is_file()
log.debug(f'Found matching shm buffer file: {filename}')
shmfiles.append(shmfile)
for shmfile in shmfiles:
# lookup array buffer size based on file suffix
# being either .rt or .hist
size: int = sizes[shmfile.name.rsplit('.')[-1]]
# attach to any shm buffer, load array into polars df,
# write to local parquet file.
shm, opened = maybe_open_shm_array(
key=shmfile.name,
size=size,
dtype=def_iohlcv_fields,
readonly=True,
)
assert not opened
ohlcv = shm.array
start = time.time()
# XXX: thanks to this SO answer for this conversion tip:
# https://stackoverflow.com/a/72054819
df = pl.DataFrame({
field_name: ohlcv[field_name]
for field_name in ohlcv.dtype.fields
})
delay: float = round(
time.time() - start,
ndigits=6,
)
log.info(
f'numpy -> polars conversion took {delay} secs\n'
f'polars df: {df}'
)
yield (
shmfile,
shm,
df,
)
@store.command() @store.command()
def clone( def ldshm(
fqme: str, fqme: str,
write_parquet: bool = False,
) -> None: ) -> None:
import time '''
from piker.config import get_conf_dir Linux ONLY: load any fqme file name matching shm buffer from
from piker.data import ( /dev/shm/ into an OHLCV numpy array and polars DataFrame,
maybe_open_shm_array, optionally write to .parquet file.
def_iohlcv_fields,
)
import polars as pl
# TODO: actually look up an existing shm buf (set) from
# an fqme and file name parsing..
# open existing shm buffer for kucoin backend
key: str = 'piker.brokerd[3595d316-3c15-46].xmrusdt.kucoin.hist'
shmpath: Path = Path('/dev/shm') / key
assert shmpath.is_file()
'''
async def main(): async def main():
async with ( async with (
open_piker_runtime( open_piker_runtime(
@ -197,73 +273,59 @@ def clone(
enable_modules=['piker.data._sharedmem'], enable_modules=['piker.data._sharedmem'],
), ),
): ):
# attach to any shm buffer, load array into polars df,
# write to local parquet file.
shm, opened = maybe_open_shm_array(
key=key,
dtype=def_iohlcv_fields,
)
assert not opened
ohlcv = shm.array
start = time.time() df: pl.DataFrame | None = None
for shmfile, shm, df in iter_dfs_from_shms(fqme):
# XXX: thanks to this SO answer for this conversion tip: # compute ohlc properties for naming
# https://stackoverflow.com/a/72054819 times: np.ndarray = shm.array['time']
df = pl.DataFrame({ secs: float = times[-1] - times[-2]
field_name: ohlcv[field_name] if secs < 1.:
for field_name in ohlcv.dtype.fields breakpoint()
}) raise ValueError(
delay: float = round( f'Something is wrong with time period for {shm}:\n{times}'
time.time() - start, )
ndigits=6,
)
print(
f'numpy -> polars conversion took {delay} secs\n'
f'polars df: {df}'
)
# compute ohlc properties for naming # TODO: maybe only optionally enter this depending
times: np.ndarray = ohlcv['time'] # on some CLI flags and/or gap detection?
secs: float = times[-1] - times[-2] await tractor.breakpoint()
if secs < 1.:
breakpoint()
raise ValueError(
f'Something is wrong with time period for {shm}:\n{ohlcv}'
)
timeframe: str = f'{secs}s' # write to parquet file?
if write_parquet:
timeframe: str = f'{secs}s'
# write to parquet file datadir: Path = get_conf_dir() / 'nativedb'
datadir: Path = get_conf_dir() / 'parqdb' if not datadir.is_dir():
if not datadir.is_dir(): datadir.mkdir()
datadir.mkdir()
path: Path = datadir / f'{fqme}.{timeframe}.parquet' path: Path = datadir / f'{fqme}.{timeframe}.parquet'
# write to fs # write to fs
start = time.time() start = time.time()
df.write_parquet(path) df.write_parquet(path)
delay: float = round( delay: float = round(
time.time() - start, time.time() - start,
ndigits=6, ndigits=6,
) )
print( log.info(
f'parquet write took {delay} secs\n' f'parquet write took {delay} secs\n'
f'file path: {path}' f'file path: {path}'
) )
# read back from fs # read back from fs
start = time.time() start = time.time()
read_df: pl.DataFrame = pl.read_parquet(path) read_df: pl.DataFrame = pl.read_parquet(path)
delay: float = round( delay: float = round(
time.time() - start, time.time() - start,
ndigits=6, ndigits=6,
) )
print( print(
f'parquet read took {delay} secs\n' f'parquet read took {delay} secs\n'
f'polars df: {read_df}' f'polars df: {read_df}'
) )
if df is None:
log.error(f'No matching shm buffers for {fqme} ?')
trio.run(main) trio.run(main)

View File

@ -137,6 +137,14 @@ def mk_ohlcv_shm_keyed_filepath(
return path return path
def unpack_fqme_from_parquet_filepath(path: Path) -> str:
filename: str = str(path.name)
fqme, fmt_descr, suffix = filename.split('.')
assert suffix == 'parquet'
return fqme
ohlc_key_map = None ohlc_key_map = None
@ -347,10 +355,27 @@ class NativeStorageClient:
path.unlink() path.unlink()
log.warning(f'Deleting parquet entry:\n{path}') log.warning(f'Deleting parquet entry:\n{path}')
else: else:
log.warning(f'No path exists:\n{path}') log.error(f'No path exists:\n{path}')
return path return path
# TODO: allow wiping and refetching a segment of the OHLCV timeseries
# data.
# def clear_range(
# self,
# key: str,
# start_dt: datetime,
# end_dt: datetime,
# timeframe: int | None = None,
# ) -> pl.DataFrame:
# '''
# Clear and re-fetch a range of datums for the OHLCV time series.
# Useful for series editing from a chart B)
# '''
# ...
@acm @acm
async def get_client( async def get_client(