Add `store ldshm` subcmd
Changed from the old `store clone` to instead simply load any shm buffer matching a user provided `FQME: str` pattern; writing to parquet file is only done if an explicit option flag is passed by user. Implement new `iter_dfs_from_shms()` generator which allows interatively loading both 1m and 1s buffers delivering the `Path`, `ShmArray` and `polars.DataFrame` instances per matching file B) Also add a todo for a `NativeStorageClient.clear_range()` method.basic_buy_bot
parent
58c096bfad
commit
d704d631ba
|
@ -20,10 +20,13 @@ Storage middle-ware CLIs.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
import time
|
||||||
|
from typing import Generator
|
||||||
# from typing import TYPE_CHECKING
|
# from typing import TYPE_CHECKING
|
||||||
|
|
||||||
import polars as pl
|
import polars as pl
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import tractor
|
||||||
# import pendulum
|
# import pendulum
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
import trio
|
import trio
|
||||||
|
@ -32,6 +35,16 @@ import typer
|
||||||
|
|
||||||
from piker.service import open_piker_runtime
|
from piker.service import open_piker_runtime
|
||||||
from piker.cli import cli
|
from piker.cli import cli
|
||||||
|
from piker.config import get_conf_dir
|
||||||
|
from piker.data import (
|
||||||
|
maybe_open_shm_array,
|
||||||
|
def_iohlcv_fields,
|
||||||
|
ShmArray,
|
||||||
|
)
|
||||||
|
from piker.data.history import (
|
||||||
|
_default_hist_size,
|
||||||
|
_default_rt_size,
|
||||||
|
)
|
||||||
from . import (
|
from . import (
|
||||||
log,
|
log,
|
||||||
)
|
)
|
||||||
|
@ -132,8 +145,6 @@ def anal(
|
||||||
|
|
||||||
) -> np.ndarray:
|
) -> np.ndarray:
|
||||||
|
|
||||||
import tractor
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
async with (
|
async with (
|
||||||
open_piker_runtime(
|
open_piker_runtime(
|
||||||
|
@ -171,25 +182,90 @@ def anal(
|
||||||
trio.run(main)
|
trio.run(main)
|
||||||
|
|
||||||
|
|
||||||
|
def iter_dfs_from_shms(fqme: str) -> Generator[
|
||||||
|
tuple[Path, ShmArray, pl.DataFrame],
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
]:
|
||||||
|
# shm buffer size table based on known sample rates
|
||||||
|
sizes: dict[str, int] = {
|
||||||
|
'hist': _default_hist_size,
|
||||||
|
'rt': _default_rt_size,
|
||||||
|
}
|
||||||
|
|
||||||
|
# load all detected shm buffer files which have the
|
||||||
|
# passed FQME pattern in the file name.
|
||||||
|
shmfiles: list[Path] = []
|
||||||
|
shmdir = Path('/dev/shm/')
|
||||||
|
|
||||||
|
for shmfile in shmdir.glob(f'*{fqme}*'):
|
||||||
|
filename: str = shmfile.name
|
||||||
|
|
||||||
|
# skip index files
|
||||||
|
if (
|
||||||
|
'_first' in filename
|
||||||
|
or '_last' in filename
|
||||||
|
):
|
||||||
|
continue
|
||||||
|
|
||||||
|
assert shmfile.is_file()
|
||||||
|
log.debug(f'Found matching shm buffer file: {filename}')
|
||||||
|
shmfiles.append(shmfile)
|
||||||
|
|
||||||
|
for shmfile in shmfiles:
|
||||||
|
|
||||||
|
# lookup array buffer size based on file suffix
|
||||||
|
# being either .rt or .hist
|
||||||
|
size: int = sizes[shmfile.name.rsplit('.')[-1]]
|
||||||
|
|
||||||
|
# attach to any shm buffer, load array into polars df,
|
||||||
|
# write to local parquet file.
|
||||||
|
shm, opened = maybe_open_shm_array(
|
||||||
|
key=shmfile.name,
|
||||||
|
size=size,
|
||||||
|
dtype=def_iohlcv_fields,
|
||||||
|
readonly=True,
|
||||||
|
)
|
||||||
|
assert not opened
|
||||||
|
ohlcv = shm.array
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
# XXX: thanks to this SO answer for this conversion tip:
|
||||||
|
# https://stackoverflow.com/a/72054819
|
||||||
|
df = pl.DataFrame({
|
||||||
|
field_name: ohlcv[field_name]
|
||||||
|
for field_name in ohlcv.dtype.fields
|
||||||
|
})
|
||||||
|
delay: float = round(
|
||||||
|
time.time() - start,
|
||||||
|
ndigits=6,
|
||||||
|
)
|
||||||
|
log.info(
|
||||||
|
f'numpy -> polars conversion took {delay} secs\n'
|
||||||
|
f'polars df: {df}'
|
||||||
|
)
|
||||||
|
|
||||||
|
yield (
|
||||||
|
shmfile,
|
||||||
|
shm,
|
||||||
|
df,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@store.command()
|
@store.command()
|
||||||
def clone(
|
def ldshm(
|
||||||
fqme: str,
|
fqme: str,
|
||||||
|
|
||||||
|
write_parquet: bool = False,
|
||||||
|
|
||||||
) -> None:
|
) -> None:
|
||||||
import time
|
'''
|
||||||
from piker.config import get_conf_dir
|
Linux ONLY: load any fqme file name matching shm buffer from
|
||||||
from piker.data import (
|
/dev/shm/ into an OHLCV numpy array and polars DataFrame,
|
||||||
maybe_open_shm_array,
|
optionally write to .parquet file.
|
||||||
def_iohlcv_fields,
|
|
||||||
)
|
|
||||||
import polars as pl
|
|
||||||
|
|
||||||
# TODO: actually look up an existing shm buf (set) from
|
|
||||||
# an fqme and file name parsing..
|
|
||||||
# open existing shm buffer for kucoin backend
|
|
||||||
key: str = 'piker.brokerd[3595d316-3c15-46].xmrusdt.kucoin.hist'
|
|
||||||
shmpath: Path = Path('/dev/shm') / key
|
|
||||||
assert shmpath.is_file()
|
|
||||||
|
|
||||||
|
'''
|
||||||
async def main():
|
async def main():
|
||||||
async with (
|
async with (
|
||||||
open_piker_runtime(
|
open_piker_runtime(
|
||||||
|
@ -197,73 +273,59 @@ def clone(
|
||||||
enable_modules=['piker.data._sharedmem'],
|
enable_modules=['piker.data._sharedmem'],
|
||||||
),
|
),
|
||||||
):
|
):
|
||||||
# attach to any shm buffer, load array into polars df,
|
|
||||||
# write to local parquet file.
|
|
||||||
shm, opened = maybe_open_shm_array(
|
|
||||||
key=key,
|
|
||||||
dtype=def_iohlcv_fields,
|
|
||||||
)
|
|
||||||
assert not opened
|
|
||||||
ohlcv = shm.array
|
|
||||||
|
|
||||||
start = time.time()
|
df: pl.DataFrame | None = None
|
||||||
|
for shmfile, shm, df in iter_dfs_from_shms(fqme):
|
||||||
|
|
||||||
# XXX: thanks to this SO answer for this conversion tip:
|
# compute ohlc properties for naming
|
||||||
# https://stackoverflow.com/a/72054819
|
times: np.ndarray = shm.array['time']
|
||||||
df = pl.DataFrame({
|
secs: float = times[-1] - times[-2]
|
||||||
field_name: ohlcv[field_name]
|
if secs < 1.:
|
||||||
for field_name in ohlcv.dtype.fields
|
breakpoint()
|
||||||
})
|
raise ValueError(
|
||||||
delay: float = round(
|
f'Something is wrong with time period for {shm}:\n{times}'
|
||||||
time.time() - start,
|
)
|
||||||
ndigits=6,
|
|
||||||
)
|
|
||||||
print(
|
|
||||||
f'numpy -> polars conversion took {delay} secs\n'
|
|
||||||
f'polars df: {df}'
|
|
||||||
)
|
|
||||||
|
|
||||||
# compute ohlc properties for naming
|
# TODO: maybe only optionally enter this depending
|
||||||
times: np.ndarray = ohlcv['time']
|
# on some CLI flags and/or gap detection?
|
||||||
secs: float = times[-1] - times[-2]
|
await tractor.breakpoint()
|
||||||
if secs < 1.:
|
|
||||||
breakpoint()
|
|
||||||
raise ValueError(
|
|
||||||
f'Something is wrong with time period for {shm}:\n{ohlcv}'
|
|
||||||
)
|
|
||||||
|
|
||||||
timeframe: str = f'{secs}s'
|
# write to parquet file?
|
||||||
|
if write_parquet:
|
||||||
|
timeframe: str = f'{secs}s'
|
||||||
|
|
||||||
# write to parquet file
|
datadir: Path = get_conf_dir() / 'nativedb'
|
||||||
datadir: Path = get_conf_dir() / 'parqdb'
|
if not datadir.is_dir():
|
||||||
if not datadir.is_dir():
|
datadir.mkdir()
|
||||||
datadir.mkdir()
|
|
||||||
|
|
||||||
path: Path = datadir / f'{fqme}.{timeframe}.parquet'
|
path: Path = datadir / f'{fqme}.{timeframe}.parquet'
|
||||||
|
|
||||||
# write to fs
|
# write to fs
|
||||||
start = time.time()
|
start = time.time()
|
||||||
df.write_parquet(path)
|
df.write_parquet(path)
|
||||||
delay: float = round(
|
delay: float = round(
|
||||||
time.time() - start,
|
time.time() - start,
|
||||||
ndigits=6,
|
ndigits=6,
|
||||||
)
|
)
|
||||||
print(
|
log.info(
|
||||||
f'parquet write took {delay} secs\n'
|
f'parquet write took {delay} secs\n'
|
||||||
f'file path: {path}'
|
f'file path: {path}'
|
||||||
)
|
)
|
||||||
|
|
||||||
# read back from fs
|
# read back from fs
|
||||||
start = time.time()
|
start = time.time()
|
||||||
read_df: pl.DataFrame = pl.read_parquet(path)
|
read_df: pl.DataFrame = pl.read_parquet(path)
|
||||||
delay: float = round(
|
delay: float = round(
|
||||||
time.time() - start,
|
time.time() - start,
|
||||||
ndigits=6,
|
ndigits=6,
|
||||||
)
|
)
|
||||||
print(
|
print(
|
||||||
f'parquet read took {delay} secs\n'
|
f'parquet read took {delay} secs\n'
|
||||||
f'polars df: {read_df}'
|
f'polars df: {read_df}'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if df is None:
|
||||||
|
log.error(f'No matching shm buffers for {fqme} ?')
|
||||||
|
|
||||||
trio.run(main)
|
trio.run(main)
|
||||||
|
|
||||||
|
|
|
@ -137,6 +137,14 @@ def mk_ohlcv_shm_keyed_filepath(
|
||||||
return path
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def unpack_fqme_from_parquet_filepath(path: Path) -> str:
|
||||||
|
|
||||||
|
filename: str = str(path.name)
|
||||||
|
fqme, fmt_descr, suffix = filename.split('.')
|
||||||
|
assert suffix == 'parquet'
|
||||||
|
return fqme
|
||||||
|
|
||||||
|
|
||||||
ohlc_key_map = None
|
ohlc_key_map = None
|
||||||
|
|
||||||
|
|
||||||
|
@ -347,10 +355,27 @@ class NativeStorageClient:
|
||||||
path.unlink()
|
path.unlink()
|
||||||
log.warning(f'Deleting parquet entry:\n{path}')
|
log.warning(f'Deleting parquet entry:\n{path}')
|
||||||
else:
|
else:
|
||||||
log.warning(f'No path exists:\n{path}')
|
log.error(f'No path exists:\n{path}')
|
||||||
|
|
||||||
return path
|
return path
|
||||||
|
|
||||||
|
# TODO: allow wiping and refetching a segment of the OHLCV timeseries
|
||||||
|
# data.
|
||||||
|
# def clear_range(
|
||||||
|
# self,
|
||||||
|
# key: str,
|
||||||
|
# start_dt: datetime,
|
||||||
|
# end_dt: datetime,
|
||||||
|
# timeframe: int | None = None,
|
||||||
|
# ) -> pl.DataFrame:
|
||||||
|
# '''
|
||||||
|
# Clear and re-fetch a range of datums for the OHLCV time series.
|
||||||
|
|
||||||
|
# Useful for series editing from a chart B)
|
||||||
|
|
||||||
|
# '''
|
||||||
|
# ...
|
||||||
|
|
||||||
|
|
||||||
@acm
|
@acm
|
||||||
async def get_client(
|
async def get_client(
|
||||||
|
|
Loading…
Reference in New Issue