Fix .parquet filenaming..
Apparently `.storage.nativedb.mk_ohlcv_shm_keyed_filepath()` was always kinda broken if you passed in a `period: float` with an actual non-`int` to the format string? Fixed it to strictly cast to `int()` before str-ifying so that you don't get weird `60.0s.parquet` in there.. Further this rejigs the `sotre ldshm` gap correction-annotation loop to, - use `StorageClient.write_ohlcv()` instead of hackily re-implementing it.. now that problem from above is fixed! - use a `needs_correction: bool` var to determine if gap markup and de-duplictated data should be pushed to the shm buffer, - go back to using `AnnotCtl.add_rect()` for all detected gaps such that they all persist (and thus are shown together) until the client disconnects.distribute_dis
parent
1d7e97a295
commit
a86573b5a2
|
@ -20,8 +20,12 @@ Storage middle-ware CLIs.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
# from datetime import datetime
|
# from datetime import datetime
|
||||||
|
# from contextlib import (
|
||||||
|
# AsyncExitStack,
|
||||||
|
# )
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import time
|
import time
|
||||||
|
from types import ModuleType
|
||||||
|
|
||||||
import polars as pl
|
import polars as pl
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
@ -34,7 +38,6 @@ import typer
|
||||||
|
|
||||||
from piker.service import open_piker_runtime
|
from piker.service import open_piker_runtime
|
||||||
from piker.cli import cli
|
from piker.cli import cli
|
||||||
from piker.config import get_conf_dir
|
|
||||||
from piker.data import (
|
from piker.data import (
|
||||||
ShmArray,
|
ShmArray,
|
||||||
)
|
)
|
||||||
|
@ -45,6 +48,7 @@ from . import (
|
||||||
from . import (
|
from . import (
|
||||||
__tsdbs__,
|
__tsdbs__,
|
||||||
open_storage_client,
|
open_storage_client,
|
||||||
|
StorageClient,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -232,7 +236,8 @@ def anal(
|
||||||
@store.command()
|
@store.command()
|
||||||
def ldshm(
|
def ldshm(
|
||||||
fqme: str,
|
fqme: str,
|
||||||
write_parquet: bool = False,
|
write_parquet: bool = True,
|
||||||
|
reload_parquet_to_shm: bool = True,
|
||||||
|
|
||||||
) -> None:
|
) -> None:
|
||||||
'''
|
'''
|
||||||
|
@ -242,15 +247,32 @@ def ldshm(
|
||||||
|
|
||||||
'''
|
'''
|
||||||
async def main():
|
async def main():
|
||||||
|
from piker.ui._remote_ctl import (
|
||||||
|
open_annot_ctl,
|
||||||
|
AnnotCtl,
|
||||||
|
)
|
||||||
|
actl: AnnotCtl
|
||||||
|
mod: ModuleType
|
||||||
|
client: StorageClient
|
||||||
async with (
|
async with (
|
||||||
open_piker_runtime(
|
open_piker_runtime(
|
||||||
'polars_boi',
|
'polars_boi',
|
||||||
enable_modules=['piker.data._sharedmem'],
|
enable_modules=['piker.data._sharedmem'],
|
||||||
debug_mode=True,
|
debug_mode=True,
|
||||||
),
|
),
|
||||||
|
open_storage_client() as (
|
||||||
|
mod,
|
||||||
|
client,
|
||||||
|
),
|
||||||
|
open_annot_ctl() as actl,
|
||||||
):
|
):
|
||||||
df: pl.DataFrame | None = None
|
shm_df: pl.DataFrame | None = None
|
||||||
for shmfile, shm, shm_df in tsp.iter_dfs_from_shms(fqme):
|
for (
|
||||||
|
shmfile,
|
||||||
|
shm,
|
||||||
|
# parquet_path,
|
||||||
|
shm_df,
|
||||||
|
) in tsp.iter_dfs_from_shms(fqme):
|
||||||
|
|
||||||
# compute ohlc properties for naming
|
# compute ohlc properties for naming
|
||||||
times: np.ndarray = shm.array['time']
|
times: np.ndarray = shm.array['time']
|
||||||
|
@ -275,122 +297,136 @@ def ldshm(
|
||||||
period=period_s,
|
period=period_s,
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO: maybe only optionally enter this depending
|
needs_correction: bool = (
|
||||||
# on some CLI flags and/or gap detection?
|
|
||||||
if (
|
|
||||||
not gaps.is_empty()
|
not gaps.is_empty()
|
||||||
or null_segs
|
or null_segs
|
||||||
):
|
)
|
||||||
from piker.ui._remote_ctl import (
|
# TODO: maybe only optionally enter this depending
|
||||||
open_annot_ctl,
|
# on some CLI flags and/or gap detection?
|
||||||
AnnotCtl,
|
if needs_correction:
|
||||||
)
|
for i in range(gaps.height):
|
||||||
annot_ctl: AnnotCtl
|
row: pl.DataFrame = gaps[i]
|
||||||
async with open_annot_ctl() as annot_ctl:
|
|
||||||
for i in range(gaps.height):
|
|
||||||
|
|
||||||
row: pl.DataFrame = gaps[i]
|
# TODO: can we eventually remove this
|
||||||
|
# once we figure out why the epoch cols
|
||||||
|
# don't match?
|
||||||
|
iend: int = row['index'][0]
|
||||||
|
# dt: datetime = row['dt'][0]
|
||||||
|
# dt_prev: datetime = row['dt_prev'][0]
|
||||||
|
|
||||||
# TODO: can we eventually remove this
|
# the gap's right-most bar's OPEN value
|
||||||
# once we figure out why the epoch cols
|
# at that time (sample) step.
|
||||||
# don't match?
|
# dt_end_t: float = dt.timestamp()
|
||||||
iend: int = row['index'][0]
|
|
||||||
# dt: datetime = row['dt'][0]
|
|
||||||
# dt_prev: datetime = row['dt_prev'][0]
|
|
||||||
|
|
||||||
# the gap's right-most bar's OPEN value
|
# TODO: FIX HOW/WHY these aren't matching
|
||||||
# at that time (sample) step.
|
# and are instead off by 4hours (EST
|
||||||
# dt_end_t: float = dt.timestamp()
|
# vs. UTC?!?!)
|
||||||
|
# end_t: float = row['time']
|
||||||
|
# assert (
|
||||||
|
# dt.timestamp()
|
||||||
|
# ==
|
||||||
|
# end_t
|
||||||
|
# )
|
||||||
|
|
||||||
# TODO: FIX HOW/WHY these aren't matching
|
# the gap's left-most bar's CLOSE value
|
||||||
# and are instead off by 4hours (EST
|
# at that time (sample) step.
|
||||||
# vs. UTC?!?!)
|
prev_r: pl.DataFrame = df.filter(
|
||||||
# end_t: float = row['time']
|
pl.col('index') == iend - 1
|
||||||
# assert (
|
)
|
||||||
# dt.timestamp()
|
istart: int = prev_r['index'][0]
|
||||||
# ==
|
# dt_start_t: float = dt_prev.timestamp()
|
||||||
# end_t
|
|
||||||
# )
|
|
||||||
|
|
||||||
# the gap's left-most bar's CLOSE value
|
# start_t: float = prev_r['time']
|
||||||
# at that time (sample) step.
|
# assert (
|
||||||
|
# dt_start_t
|
||||||
|
# ==
|
||||||
|
# start_t
|
||||||
|
# )
|
||||||
|
|
||||||
prev_r: pl.DataFrame = df.filter(
|
# TODO: implement px-col width measure
|
||||||
pl.col('index') == gaps[0]['index'] - 1
|
# and ensure at least as many px-cols
|
||||||
|
# shown per rect as configured by user.
|
||||||
|
gap_w: float = abs((iend - istart))
|
||||||
|
if gap_w < 6:
|
||||||
|
margin: float = 6
|
||||||
|
iend += margin
|
||||||
|
istart -= margin
|
||||||
|
|
||||||
|
ro: tuple[float, float] = (
|
||||||
|
# dt_end_t,
|
||||||
|
iend,
|
||||||
|
row['open'][0],
|
||||||
|
)
|
||||||
|
lc: tuple[float, float] = (
|
||||||
|
# dt_start_t,
|
||||||
|
istart,
|
||||||
|
prev_r['close'][0],
|
||||||
|
)
|
||||||
|
|
||||||
|
# async with actl.open_rect(
|
||||||
|
# ) as aid:
|
||||||
|
aid: int = await actl.add_rect(
|
||||||
|
fqme=fqme,
|
||||||
|
timeframe=period_s,
|
||||||
|
start_pos=lc,
|
||||||
|
end_pos=ro,
|
||||||
|
)
|
||||||
|
assert aid
|
||||||
|
|
||||||
|
# write to parquet file?
|
||||||
|
if (
|
||||||
|
write_parquet
|
||||||
|
):
|
||||||
|
# write to fs
|
||||||
|
start = time.time()
|
||||||
|
path: Path = await client.write_ohlcv(
|
||||||
|
fqme,
|
||||||
|
ohlcv=deduped,
|
||||||
|
timeframe=period_s,
|
||||||
|
)
|
||||||
|
write_delay: float = round(
|
||||||
|
time.time() - start,
|
||||||
|
ndigits=6,
|
||||||
|
)
|
||||||
|
|
||||||
|
# read back from fs
|
||||||
|
start = time.time()
|
||||||
|
read_df: pl.DataFrame = pl.read_parquet(path)
|
||||||
|
read_delay: float = round(
|
||||||
|
time.time() - start,
|
||||||
|
ndigits=6,
|
||||||
|
)
|
||||||
|
log.info(
|
||||||
|
f'parquet write took {write_delay} secs\n'
|
||||||
|
f'file path: {path}'
|
||||||
|
f'parquet read took {read_delay} secs\n'
|
||||||
|
f'polars df: {read_df}'
|
||||||
|
)
|
||||||
|
|
||||||
|
if reload_parquet_to_shm:
|
||||||
|
new = tsp.pl2np(
|
||||||
|
deduped,
|
||||||
|
dtype=shm.array.dtype,
|
||||||
)
|
)
|
||||||
istart: int = prev_r['index'][0]
|
# since normally readonly
|
||||||
# dt_start_t: float = dt_prev.timestamp()
|
shm._array.setflags(
|
||||||
|
write=int(1),
|
||||||
# start_t: float = prev_r['time']
|
|
||||||
# assert (
|
|
||||||
# dt_start_t
|
|
||||||
# ==
|
|
||||||
# start_t
|
|
||||||
# )
|
|
||||||
|
|
||||||
# TODO: implement px-col width measure
|
|
||||||
# and ensure at least as many px-cols
|
|
||||||
# shown per rect as configured by user.
|
|
||||||
gap_w: float = abs((iend - istart))
|
|
||||||
# await tractor.pause()
|
|
||||||
if gap_w < 6:
|
|
||||||
margin: float = 6
|
|
||||||
iend += margin
|
|
||||||
istart -= margin
|
|
||||||
|
|
||||||
ro: tuple[float, float] = (
|
|
||||||
# dt_end_t,
|
|
||||||
iend,
|
|
||||||
row['open'][0],
|
|
||||||
)
|
)
|
||||||
lc: tuple[float, float] = (
|
shm.push(
|
||||||
# dt_start_t,
|
new,
|
||||||
istart,
|
prepend=True,
|
||||||
prev_r['close'][0],
|
start=new['index'][-1],
|
||||||
|
update_first=False, # don't update ._first
|
||||||
)
|
)
|
||||||
|
|
||||||
aid: int = await annot_ctl.add_rect(
|
await tractor.pause()
|
||||||
fqme=fqme,
|
assert diff
|
||||||
timeframe=period_s,
|
|
||||||
start_pos=lc,
|
|
||||||
end_pos=ro,
|
|
||||||
)
|
|
||||||
assert aid
|
|
||||||
await tractor.pause()
|
|
||||||
|
|
||||||
# write to parquet file?
|
else:
|
||||||
if write_parquet:
|
# allow interaction even when no ts problems.
|
||||||
timeframe: str = f'{period_s}s'
|
await tractor.pause()
|
||||||
|
assert not diff
|
||||||
|
|
||||||
datadir: Path = get_conf_dir() / 'nativedb'
|
|
||||||
if not datadir.is_dir():
|
|
||||||
datadir.mkdir()
|
|
||||||
|
|
||||||
path: Path = datadir / f'{fqme}.{timeframe}.parquet'
|
|
||||||
|
|
||||||
# write to fs
|
|
||||||
start = time.time()
|
|
||||||
df.write_parquet(path)
|
|
||||||
delay: float = round(
|
|
||||||
time.time() - start,
|
|
||||||
ndigits=6,
|
|
||||||
)
|
|
||||||
log.info(
|
|
||||||
f'parquet write took {delay} secs\n'
|
|
||||||
f'file path: {path}'
|
|
||||||
)
|
|
||||||
|
|
||||||
# read back from fs
|
|
||||||
start = time.time()
|
|
||||||
read_df: pl.DataFrame = pl.read_parquet(path)
|
|
||||||
delay: float = round(
|
|
||||||
time.time() - start,
|
|
||||||
ndigits=6,
|
|
||||||
)
|
|
||||||
print(
|
|
||||||
f'parquet read took {delay} secs\n'
|
|
||||||
f'polars df: {read_df}'
|
|
||||||
)
|
|
||||||
|
|
||||||
if df is None:
|
if df is None:
|
||||||
log.error(f'No matching shm buffers for {fqme} ?')
|
log.error(f'No matching shm buffers for {fqme} ?')
|
||||||
|
|
|
@ -95,16 +95,19 @@ def detect_period(shm: ShmArray) -> float:
|
||||||
|
|
||||||
def mk_ohlcv_shm_keyed_filepath(
|
def mk_ohlcv_shm_keyed_filepath(
|
||||||
fqme: str,
|
fqme: str,
|
||||||
period: float, # ow known as the "timeframe"
|
period: float | int, # ow known as the "timeframe"
|
||||||
datadir: Path,
|
datadir: Path,
|
||||||
|
|
||||||
) -> str:
|
) -> Path:
|
||||||
|
|
||||||
if period < 1.:
|
if period < 1.:
|
||||||
raise ValueError('Sample period should be >= 1.!?')
|
raise ValueError('Sample period should be >= 1.!?')
|
||||||
|
|
||||||
period_s: str = f'{period}s'
|
path: Path = (
|
||||||
path: Path = datadir / f'{fqme}.ohlcv{period_s}.parquet'
|
datadir
|
||||||
|
/
|
||||||
|
f'{fqme}.ohlcv{int(period)}s.parquet'
|
||||||
|
)
|
||||||
return path
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
@ -227,6 +230,7 @@ class NativeStorageClient:
|
||||||
self,
|
self,
|
||||||
fqme: str,
|
fqme: str,
|
||||||
period: float,
|
period: float,
|
||||||
|
|
||||||
) -> Path:
|
) -> Path:
|
||||||
return mk_ohlcv_shm_keyed_filepath(
|
return mk_ohlcv_shm_keyed_filepath(
|
||||||
fqme=fqme,
|
fqme=fqme,
|
||||||
|
@ -239,6 +243,7 @@ class NativeStorageClient:
|
||||||
fqme: str,
|
fqme: str,
|
||||||
df: pl.DataFrame,
|
df: pl.DataFrame,
|
||||||
timeframe: float,
|
timeframe: float,
|
||||||
|
|
||||||
) -> None:
|
) -> None:
|
||||||
# cache df for later usage since we (currently) need to
|
# cache df for later usage since we (currently) need to
|
||||||
# convert to np.ndarrays to push to our `ShmArray` rt
|
# convert to np.ndarrays to push to our `ShmArray` rt
|
||||||
|
|
Loading…
Reference in New Issue