Add first-draft `trimeter` based concurrent ohlc history fetching
parent
8711465d96
commit
946d554716
|
@ -35,6 +35,7 @@ from typing import (
|
||||||
import trio
|
import trio
|
||||||
from trio.abc import ReceiveChannel
|
from trio.abc import ReceiveChannel
|
||||||
from trio_typing import TaskStatus
|
from trio_typing import TaskStatus
|
||||||
|
import trimeter
|
||||||
import tractor
|
import tractor
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
import pendulum
|
import pendulum
|
||||||
|
@ -263,32 +264,66 @@ async def start_backfill(
|
||||||
# let caller unblock and deliver latest history frame
|
# let caller unblock and deliver latest history frame
|
||||||
task_status.started((shm, start_dt, end_dt, bf_done))
|
task_status.started((shm, start_dt, end_dt, bf_done))
|
||||||
|
|
||||||
|
times = array['time']
|
||||||
|
step_size_s = (
|
||||||
|
pendulum.from_timestamp(times[-1]) -
|
||||||
|
pendulum.from_timestamp(times[-2])
|
||||||
|
).seconds
|
||||||
|
frame_step_s = (end_dt - start_dt).seconds
|
||||||
|
|
||||||
if last_tsdb_dt is None:
|
if last_tsdb_dt is None:
|
||||||
# maybe a better default (they don't seem to define epoch?!)
|
# maybe a better default (they don't seem to define epoch?!)
|
||||||
last_tsdb_dt = pendulum.now().subtract(days=1)
|
|
||||||
|
# based on the sample step size load a certain amount
|
||||||
|
# history
|
||||||
|
if step_size_s == 1:
|
||||||
|
last_tsdb_dt = pendulum.now().subtract(weeks=2)
|
||||||
|
|
||||||
|
elif step_size_s == 60:
|
||||||
|
last_tsdb_dt = pendulum.now().subtract(years=2)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
'`piker` only needs to support 1m and 1s sampling '
|
||||||
|
'but ur api is trying to deliver a longer '
|
||||||
|
f'timeframe of {step_size_s} ' 'seconds.. so ye, dun '
|
||||||
|
'do dat bruh.'
|
||||||
|
)
|
||||||
|
|
||||||
|
hist_period = pendulum.period(
|
||||||
|
start_dt.subtract(seconds=step_size_s),
|
||||||
|
last_tsdb_dt,
|
||||||
|
)
|
||||||
|
end_dts = list(hist_period.range('seconds', frame_step_s))
|
||||||
|
|
||||||
# pull new history frames until we hit latest
|
# pull new history frames until we hit latest
|
||||||
# already in the tsdb or a max count.
|
# already in the tsdb or a max count.
|
||||||
# mx_fills = 16
|
|
||||||
count = 0
|
count = 0
|
||||||
# while True:
|
frames = {}
|
||||||
while (
|
|
||||||
end_dt > last_tsdb_dt
|
async def get_ohlc_frame(
|
||||||
# and count < mx_fills
|
input_end_dt: datetime,
|
||||||
):
|
|
||||||
|
) -> np.ndarray:
|
||||||
|
|
||||||
|
nonlocal count
|
||||||
count += 1
|
count += 1
|
||||||
try:
|
try:
|
||||||
array, start_dt, end_dt = await hist(end_dt=start_dt)
|
array, start_dt, end_dt = await hist(end_dt=input_end_dt)
|
||||||
|
# if input_end_dt.timestamp() == end_dts[0].timestamp():
|
||||||
|
# await tractor.breakpoint()
|
||||||
|
|
||||||
except NoData:
|
except NoData:
|
||||||
# decrement by the diff in time last delivered.
|
# decrement by the diff in time last delivered.
|
||||||
end_dt = start_dt.subtract(seconds=(end_dt - start_dt).seconds)
|
end_dt = start_dt.subtract(seconds=(end_dt - start_dt).seconds)
|
||||||
continue
|
log.warning('no data for range {(end_dt - start_dt)} ?!?')
|
||||||
|
# continue
|
||||||
|
|
||||||
except DataUnavailable:
|
except DataUnavailable:
|
||||||
# broker is being a bish and we can't pull
|
# broker is being a bish and we can't pull
|
||||||
# any more..
|
# any more..
|
||||||
break
|
log.warning('backend halted on data deliver !?!?')
|
||||||
|
# break
|
||||||
|
|
||||||
to_push = diff_history(
|
to_push = diff_history(
|
||||||
array,
|
array,
|
||||||
|
@ -302,7 +337,63 @@ async def start_backfill(
|
||||||
print(f"PULLING {count}")
|
print(f"PULLING {count}")
|
||||||
log.info(f'Pushing {to_push.size} to shm!')
|
log.info(f'Pushing {to_push.size} to shm!')
|
||||||
|
|
||||||
if to_push.size < 1:
|
frames[input_end_dt.timestamp()] = (to_push, start_dt, end_dt)
|
||||||
|
|
||||||
|
return to_push, start_dt, end_dt
|
||||||
|
|
||||||
|
# if to_push.size < 1:
|
||||||
|
# print('UHHH SIZE <1 BREAKING!?')
|
||||||
|
# break
|
||||||
|
|
||||||
|
rate = erlangs = 5
|
||||||
|
async with trimeter.amap(
|
||||||
|
|
||||||
|
get_ohlc_frame,
|
||||||
|
end_dts,
|
||||||
|
|
||||||
|
capture_outcome=True,
|
||||||
|
include_value=True,
|
||||||
|
max_at_once=erlangs,
|
||||||
|
max_per_second=rate,
|
||||||
|
|
||||||
|
) as outcomes:
|
||||||
|
|
||||||
|
# Then iterate over the return values, as they become available
|
||||||
|
# (i.e., not necessarily in the original order)
|
||||||
|
async for input_end_dt, outcome in outcomes:
|
||||||
|
try:
|
||||||
|
out = outcome.unwrap()
|
||||||
|
except Exception:
|
||||||
|
log.exception('uhh trimeter bail')
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
to_push, start_dt, end_dt = out
|
||||||
|
|
||||||
|
# pipeline-style pull frames until we need to wait for
|
||||||
|
# the next in order to arrive.
|
||||||
|
i = end_dts.index(input_end_dt)
|
||||||
|
print(f'latest end_dt {end_dt} found at index {i}')
|
||||||
|
|
||||||
|
for epoch in reversed(sorted(frames)):
|
||||||
|
start = shm.array['time'][0]
|
||||||
|
|
||||||
|
# we don't yet have the next frame to push
|
||||||
|
# so break back to the async request loop.
|
||||||
|
diff = epoch - start
|
||||||
|
if abs(diff) > step_size_s:
|
||||||
|
if len(frames) > 20:
|
||||||
|
log.warning(
|
||||||
|
f'there appears to be a history gap of {diff}?'
|
||||||
|
)
|
||||||
|
# from pprint import pprint
|
||||||
|
# await tractor.breakpoint()
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
to_push, start_dt, end_dt = frames.pop(epoch)
|
||||||
|
print(f'pushing frame ending at {end_dt}')
|
||||||
|
|
||||||
|
if not len(to_push):
|
||||||
break
|
break
|
||||||
|
|
||||||
# bail on shm allocation overrun
|
# bail on shm allocation overrun
|
||||||
|
|
Loading…
Reference in New Issue