Add first-draft `trimeter` based concurrent ohlc history fetching

incr_update_backup
Tyler Goodlet 2022-05-01 12:40:51 -04:00
parent 8711465d96
commit 946d554716
1 changed files with 111 additions and 20 deletions

View File

@ -35,6 +35,7 @@ from typing import (
import trio import trio
from trio.abc import ReceiveChannel from trio.abc import ReceiveChannel
from trio_typing import TaskStatus from trio_typing import TaskStatus
import trimeter
import tractor import tractor
from pydantic import BaseModel from pydantic import BaseModel
import pendulum import pendulum
@ -263,32 +264,66 @@ async def start_backfill(
# let caller unblock and deliver latest history frame # let caller unblock and deliver latest history frame
task_status.started((shm, start_dt, end_dt, bf_done)) task_status.started((shm, start_dt, end_dt, bf_done))
times = array['time']
step_size_s = (
pendulum.from_timestamp(times[-1]) -
pendulum.from_timestamp(times[-2])
).seconds
frame_step_s = (end_dt - start_dt).seconds
if last_tsdb_dt is None: if last_tsdb_dt is None:
# maybe a better default (they don't seem to define epoch?!) # maybe a better default (they don't seem to define epoch?!)
last_tsdb_dt = pendulum.now().subtract(days=1)
# based on the sample step size load a certain amount
# history
if step_size_s == 1:
last_tsdb_dt = pendulum.now().subtract(weeks=2)
elif step_size_s == 60:
last_tsdb_dt = pendulum.now().subtract(years=2)
else:
raise ValueError(
'`piker` only needs to support 1m and 1s sampling '
'but ur api is trying to deliver a longer '
f'timeframe of {step_size_s} ' 'seconds.. so ye, dun '
'do dat bruh.'
)
hist_period = pendulum.period(
start_dt.subtract(seconds=step_size_s),
last_tsdb_dt,
)
end_dts = list(hist_period.range('seconds', frame_step_s))
# pull new history frames until we hit latest # pull new history frames until we hit latest
# already in the tsdb or a max count. # already in the tsdb or a max count.
# mx_fills = 16
count = 0 count = 0
# while True: frames = {}
while (
end_dt > last_tsdb_dt async def get_ohlc_frame(
# and count < mx_fills input_end_dt: datetime,
):
) -> np.ndarray:
nonlocal count
count += 1 count += 1
try: try:
array, start_dt, end_dt = await hist(end_dt=start_dt) array, start_dt, end_dt = await hist(end_dt=input_end_dt)
# if input_end_dt.timestamp() == end_dts[0].timestamp():
# await tractor.breakpoint()
except NoData: except NoData:
# decrement by the diff in time last delivered. # decrement by the diff in time last delivered.
end_dt = start_dt.subtract(seconds=(end_dt - start_dt).seconds) end_dt = start_dt.subtract(seconds=(end_dt - start_dt).seconds)
continue log.warning('no data for range {(end_dt - start_dt)} ?!?')
# continue
except DataUnavailable: except DataUnavailable:
# broker is being a bish and we can't pull # broker is being a bish and we can't pull
# any more.. # any more..
break log.warning('backend halted on data deliver !?!?')
# break
to_push = diff_history( to_push = diff_history(
array, array,
@ -302,7 +337,63 @@ async def start_backfill(
print(f"PULLING {count}") print(f"PULLING {count}")
log.info(f'Pushing {to_push.size} to shm!') log.info(f'Pushing {to_push.size} to shm!')
if to_push.size < 1: frames[input_end_dt.timestamp()] = (to_push, start_dt, end_dt)
return to_push, start_dt, end_dt
# if to_push.size < 1:
# print('UHHH SIZE <1 BREAKING!?')
# break
rate = erlangs = 5
async with trimeter.amap(
get_ohlc_frame,
end_dts,
capture_outcome=True,
include_value=True,
max_at_once=erlangs,
max_per_second=rate,
) as outcomes:
# Then iterate over the return values, as they become available
# (i.e., not necessarily in the original order)
async for input_end_dt, outcome in outcomes:
try:
out = outcome.unwrap()
except Exception:
log.exception('uhh trimeter bail')
raise
else:
to_push, start_dt, end_dt = out
# pipeline-style pull frames until we need to wait for
# the next in order to arrive.
i = end_dts.index(input_end_dt)
print(f'latest end_dt {end_dt} found at index {i}')
for epoch in reversed(sorted(frames)):
start = shm.array['time'][0]
# we don't yet have the next frame to push
# so break back to the async request loop.
diff = epoch - start
if abs(diff) > step_size_s:
if len(frames) > 20:
log.warning(
f'there appears to be a history gap of {diff}?'
)
# from pprint import pprint
# await tractor.breakpoint()
else:
break
to_push, start_dt, end_dt = frames.pop(epoch)
print(f'pushing frame ending at {end_dt}')
if not len(to_push):
break break
# bail on shm allocation overrun # bail on shm allocation overrun