Compare commits

..

6 Commits

Author SHA1 Message Date
Tyler Goodlet ef9bc7d1ed `.brokers.cli`: module type and todo for `--pdb` flag to NOT src from sub-cmd 2025-02-19 17:40:31 -05:00
Tyler Goodlet 0aa252507f Type loaded backend modules 2025-02-19 17:40:31 -05:00
Tyler Goodlet 9ac1745271 Bump various `.brokers.core` doc string content/style 2025-02-19 17:40:31 -05:00
Tyler Goodlet ded2705397 `.questrade`: link in ws-API issue! 2025-02-19 17:40:01 -05:00
Tyler Goodlet 65fbd435a8 `.kraken.broker`: need to `await verify_balances()` .. 2025-02-19 17:40:01 -05:00
Tyler Goodlet 7a21e87688 `.brokers.ib.feed`: better `tractor.to_asyncio` typing and var naming throughout! 2025-02-19 17:40:01 -05:00
18 changed files with 442 additions and 513 deletions

View File

@ -30,8 +30,7 @@ from types import ModuleType
from typing import ( from typing import (
Any, Any,
Iterator, Iterator,
Generator, Generator
TYPE_CHECKING,
) )
import pendulum import pendulum
@ -60,10 +59,8 @@ from ..clearing._messages import (
BrokerdPosition, BrokerdPosition,
) )
from piker.types import Struct from piker.types import Struct
from piker.log import get_logger
if TYPE_CHECKING:
from piker.data._symcache import SymbologyCache from piker.data._symcache import SymbologyCache
from ..log import get_logger
log = get_logger(__name__) log = get_logger(__name__)
@ -496,17 +493,6 @@ class Account(Struct):
_mktmap_table: dict[str, MktPair] | None = None, _mktmap_table: dict[str, MktPair] | None = None,
only_require: list[str]|True = True,
# ^list of fqmes that are "required" to be processed from
# this ledger pass; we often don't care about others and
# definitely shouldn't always error in such cases.
# (eg. broker backend loaded that doesn't yet supsport the
# symcache but also, inside the paper engine we don't ad-hoc
# request `get_mkt_info()` for every symbol in the ledger,
# only the one for which we're simulating against).
# TODO, not sure if there's a better soln for this, ideally
# all backends get symcache support afap i guess..
) -> dict[str, Position]: ) -> dict[str, Position]:
''' '''
Update the internal `.pps[str, Position]` table from input Update the internal `.pps[str, Position]` table from input
@ -549,32 +535,11 @@ class Account(Struct):
if _mktmap_table is None: if _mktmap_table is None:
raise raise
required: bool = (
only_require is True
or (
only_require is not True
and
fqme in only_require
)
)
# XXX: caller is allowed to provide a fallback # XXX: caller is allowed to provide a fallback
# mktmap table for the case where a new position is # mktmap table for the case where a new position is
# being added and the preloaded symcache didn't # being added and the preloaded symcache didn't
# have this entry prior (eg. with frickin IB..) # have this entry prior (eg. with frickin IB..)
if ( mkt = _mktmap_table[fqme]
not (mkt := _mktmap_table.get(fqme))
and
required
):
raise
elif not required:
continue
else:
# should be an entry retreived somewhere
assert mkt
if not (pos := pps.get(bs_mktid)): if not (pos := pps.get(bs_mktid)):
@ -691,7 +656,7 @@ class Account(Struct):
def write_config(self) -> None: def write_config(self) -> None:
''' '''
Write the current account state to the user's account TOML file, normally Write the current account state to the user's account TOML file, normally
something like `pps.toml`. something like ``pps.toml``.
''' '''
# TODO: show diff output? # TODO: show diff output?

View File

@ -168,6 +168,7 @@ class OrderClient(Struct):
async def relay_orders_from_sync_code( async def relay_orders_from_sync_code(
client: OrderClient, client: OrderClient,
symbol_key: str, symbol_key: str,
to_ems_stream: tractor.MsgStream, to_ems_stream: tractor.MsgStream,
@ -241,11 +242,6 @@ async def open_ems(
async with maybe_open_emsd( async with maybe_open_emsd(
broker, broker,
# XXX NOTE, LOL so this determines the daemon `emsd` loglevel
# then FYI.. that's kinda wrong no?
# -[ ] shouldn't it be set by `pikerd -l` or no?
# -[ ] would make a lot more sense to have a subsys ctl for
# levels.. like `-l emsd.info` or something?
loglevel=loglevel, loglevel=loglevel,
) as portal: ) as portal:

View File

@ -653,11 +653,7 @@ class Router(Struct):
flume = feed.flumes[fqme] flume = feed.flumes[fqme]
first_quote: dict = flume.first_quote first_quote: dict = flume.first_quote
book: DarkBook = self.get_dark_book(broker) book: DarkBook = self.get_dark_book(broker)
book.lasts[fqme]: float = float(first_quote['last'])
if not (last := first_quote.get('last')):
last: float = flume.rt_shm.array[-1]['close']
book.lasts[fqme]: float = float(last)
async with self.maybe_open_brokerd_dialog( async with self.maybe_open_brokerd_dialog(
brokermod=brokermod, brokermod=brokermod,
@ -720,7 +716,7 @@ class Router(Struct):
subs = self.subscribers[sub_key] subs = self.subscribers[sub_key]
sent_some: bool = False sent_some: bool = False
for client_stream in subs.copy(): for client_stream in subs:
try: try:
await client_stream.send(msg) await client_stream.send(msg)
sent_some = True sent_some = True
@ -1014,10 +1010,6 @@ async def translate_and_relay_brokerd_events(
status_msg.brokerd_msg = msg status_msg.brokerd_msg = msg
status_msg.src = msg.broker_details['name'] status_msg.src = msg.broker_details['name']
if not status_msg.req:
# likely some order change state?
await tractor.pause()
else:
await router.client_broadcast( await router.client_broadcast(
status_msg.req.symbol, status_msg.req.symbol,
status_msg, status_msg,

View File

@ -297,8 +297,6 @@ class PaperBoi(Struct):
# transmit pp msg to ems # transmit pp msg to ems
pp: Position = self.acnt.pps[bs_mktid] pp: Position = self.acnt.pps[bs_mktid]
# TODO, this will break if `require_only=True` was passed to
# `.update_from_ledger()`
pp_msg = BrokerdPosition( pp_msg = BrokerdPosition(
broker=self.broker, broker=self.broker,
@ -655,7 +653,6 @@ async def open_trade_dialog(
# in) use manually constructed table from calling # in) use manually constructed table from calling
# the `.get_mkt_info()` provider EP above. # the `.get_mkt_info()` provider EP above.
_mktmap_table=mkt_by_fqme, _mktmap_table=mkt_by_fqme,
only_require=list(mkt_by_fqme),
) )
pp_msgs: list[BrokerdPosition] = [] pp_msgs: list[BrokerdPosition] = []

View File

@ -30,7 +30,6 @@ subsys: str = 'piker.clearing'
log = get_logger(subsys) log = get_logger(subsys)
# TODO, oof doesn't this ignore the `loglevel` then???
get_console_log = partial( get_console_log = partial(
get_console_log, get_console_log,
name=subsys, name=subsys,

View File

@ -95,12 +95,6 @@ class Sampler:
# history loading. # history loading.
incr_task_cs: trio.CancelScope | None = None incr_task_cs: trio.CancelScope | None = None
bcast_errors: tuple[Exception] = (
trio.BrokenResourceError,
trio.ClosedResourceError,
trio.EndOfChannel,
)
# holds all the ``tractor.Context`` remote subscriptions for # holds all the ``tractor.Context`` remote subscriptions for
# a particular sample period increment event: all subscribers are # a particular sample period increment event: all subscribers are
# notified on a step. # notified on a step.
@ -264,15 +258,14 @@ class Sampler:
subs: set subs: set
last_ts, subs = pair last_ts, subs = pair
# NOTE, for debugging pub-sub issues task = trio.lowlevel.current_task()
# task = trio.lowlevel.current_task() log.debug(
# log.debug( f'SUBS {self.subscribers}\n'
# f'AlL-SUBS@{period_s!r}: {self.subscribers}\n' f'PAIR {pair}\n'
# f'PAIR: {pair}\n' f'TASK: {task}: {id(task)}\n'
# f'TASK: {task}: {id(task)}\n' f'broadcasting {period_s} -> {last_ts}\n'
# f'broadcasting {period_s} -> {last_ts}\n'
# f'consumers: {subs}' # f'consumers: {subs}'
# ) )
borked: set[MsgStream] = set() borked: set[MsgStream] = set()
sent: set[MsgStream] = set() sent: set[MsgStream] = set()
while True: while True:
@ -289,11 +282,12 @@ class Sampler:
await stream.send(msg) await stream.send(msg)
sent.add(stream) sent.add(stream)
except self.bcast_errors as err: except (
trio.BrokenResourceError,
trio.ClosedResourceError
):
log.error( log.error(
f'Connection dropped for IPC ctx\n' f'{stream._ctx.chan.uid} dropped connection'
f'{stream._ctx}\n\n'
f'Due to {type(err)}'
) )
borked.add(stream) borked.add(stream)
else: else:
@ -400,8 +394,7 @@ async def register_with_sampler(
finally: finally:
if ( if (
sub_for_broadcasts sub_for_broadcasts
and and subs
subs
): ):
try: try:
subs.remove(stream) subs.remove(stream)
@ -568,7 +561,8 @@ async def open_sample_stream(
async def sample_and_broadcast( async def sample_and_broadcast(
bus: _FeedsBus,
bus: _FeedsBus, # noqa
rt_shm: ShmArray, rt_shm: ShmArray,
hist_shm: ShmArray, hist_shm: ShmArray,
quote_stream: trio.abc.ReceiveChannel, quote_stream: trio.abc.ReceiveChannel,
@ -588,33 +582,11 @@ async def sample_and_broadcast(
overruns = Counter() overruns = Counter()
# NOTE, only used for debugging live-data-feed issues, though
# this should be resolved more correctly in the future using the
# new typed-msgspec feats of `tractor`!
#
# XXX, a multiline nested `dict` formatter (since rn quote-msgs
# are just that).
# pfmt: Callable[[str], str] = mk_repr()
# iterate stream delivered by broker # iterate stream delivered by broker
async for quotes in quote_stream: async for quotes in quote_stream:
# print(quotes) # print(quotes)
# XXX WARNING XXX only enable for debugging bc ow can cost # TODO: ``numba`` this!
# ALOT of perf with HF-feedz!!!
#
# log.info(
# 'Rx live quotes:\n'
# f'{pfmt(quotes)}'
# )
# TODO,
# -[ ] `numba` or `cython`-nize this loop possibly?
# |_alternatively could we do it in rust somehow by upacking
# arrow msgs instead of using `msgspec`?
# -[ ] use `msgspec.Struct` support in new typed-msging from
# `tractor` to ensure only allowed msgs are transmitted?
#
for broker_symbol, quote in quotes.items(): for broker_symbol, quote in quotes.items():
# TODO: in theory you can send the IPC msg *before* writing # TODO: in theory you can send the IPC msg *before* writing
# to the sharedmem array to decrease latency, however, that # to the sharedmem array to decrease latency, however, that
@ -687,21 +659,6 @@ async def sample_and_broadcast(
sub_key: str = broker_symbol.lower() sub_key: str = broker_symbol.lower()
subs: set[Sub] = bus.get_subs(sub_key) subs: set[Sub] = bus.get_subs(sub_key)
# TODO, figure out how to make this useful whilst
# incoporating feed "pausing" ..
#
# if not subs:
# all_bs_fqmes: list[str] = list(
# bus._subscribers.keys()
# )
# log.warning(
# f'No subscribers for {brokername!r} live-quote ??\n'
# f'broker_symbol: {broker_symbol}\n\n'
# f'Maybe the backend-sys symbol does not match one of,\n'
# f'{pfmt(all_bs_fqmes)}\n'
# )
# NOTE: by default the broker backend doesn't append # NOTE: by default the broker backend doesn't append
# it's own "name" into the fqme schema (but maybe it # it's own "name" into the fqme schema (but maybe it
# should?) so we have to manually generate the correct # should?) so we have to manually generate the correct
@ -771,14 +728,18 @@ async def sample_and_broadcast(
if lags > 10: if lags > 10:
await tractor.pause() await tractor.pause()
except Sampler.bcast_errors as ipc_err: except (
trio.BrokenResourceError,
trio.ClosedResourceError,
trio.EndOfChannel,
):
ctx: Context = ipc._ctx ctx: Context = ipc._ctx
chan: Channel = ctx.chan chan: Channel = ctx.chan
if ctx: if ctx:
log.warning( log.warning(
f'Dropped `brokerd`-feed for {broker_symbol!r} due to,\n' 'Dropped `brokerd`-quotes-feed connection:\n'
f'x>) {ctx.cid}@{chan.uid}' f'{broker_symbol}:'
f'|_{ipc_err!r}\n\n' f'{ctx.cid}@{chan.uid}'
) )
if sub.throttle_rate: if sub.throttle_rate:
assert ipc._closed assert ipc._closed
@ -795,11 +756,12 @@ async def sample_and_broadcast(
async def uniform_rate_send( async def uniform_rate_send(
rate: float, rate: float,
quote_stream: trio.abc.ReceiveChannel, quote_stream: trio.abc.ReceiveChannel,
stream: MsgStream, stream: MsgStream,
task_status: TaskStatus[None] = trio.TASK_STATUS_IGNORED, task_status: TaskStatus = trio.TASK_STATUS_IGNORED,
) -> None: ) -> None:
''' '''
@ -817,16 +779,13 @@ async def uniform_rate_send(
https://gist.github.com/njsmith/7ea44ec07e901cb78ebe1dd8dd846cb9 https://gist.github.com/njsmith/7ea44ec07e901cb78ebe1dd8dd846cb9
''' '''
# ?TODO? dynamically compute the **actual** approx overhead latency per cycle # TODO: compute the approx overhead latency per cycle
# instead of this magic # bidinezz? left_to_sleep = throttle_period = 1/rate - 0.000616
throttle_period: float = 1/rate - 0.000616
left_to_sleep: float = throttle_period
# send cycle state # send cycle state
first_quote: dict|None
first_quote = last_quote = None first_quote = last_quote = None
last_send: float = time.time() last_send = time.time()
diff: float = 0 diff = 0
task_status.started() task_status.started()
ticks_by_type: dict[ ticks_by_type: dict[
@ -837,28 +796,22 @@ async def uniform_rate_send(
clear_types = _tick_groups['clears'] clear_types = _tick_groups['clears']
while True: while True:
# compute the remaining time to sleep for this throttled cycle # compute the remaining time to sleep for this throttled cycle
left_to_sleep: float = throttle_period - diff left_to_sleep = throttle_period - diff
if left_to_sleep > 0: if left_to_sleep > 0:
cs: trio.CancelScope
with trio.move_on_after(left_to_sleep) as cs: with trio.move_on_after(left_to_sleep) as cs:
sym: str
last_quote: dict
try: try:
sym, last_quote = await quote_stream.receive() sym, last_quote = await quote_stream.receive()
except trio.EndOfChannel: except trio.EndOfChannel:
log.exception( log.exception(f"feed for {stream} ended?")
f'Live stream for feed for ended?\n'
f'<=c\n'
f' |_[{stream!r}\n'
)
break break
diff: float = time.time() - last_send diff = time.time() - last_send
if not first_quote: if not first_quote:
first_quote: float = last_quote first_quote = last_quote
# first_quote['tbt'] = ticks_by_type # first_quote['tbt'] = ticks_by_type
if (throttle_period - diff) > 0: if (throttle_period - diff) > 0:
@ -919,9 +872,7 @@ async def uniform_rate_send(
# TODO: now if only we could sync this to the display # TODO: now if only we could sync this to the display
# rate timing exactly lul # rate timing exactly lul
try: try:
await stream.send({ await stream.send({sym: first_quote})
sym: first_quote
})
except tractor.RemoteActorError as rme: except tractor.RemoteActorError as rme:
if rme.type is not tractor._exceptions.StreamOverrun: if rme.type is not tractor._exceptions.StreamOverrun:
raise raise
@ -932,28 +883,19 @@ async def uniform_rate_send(
f'{sym}:{ctx.cid}@{chan.uid}' f'{sym}:{ctx.cid}@{chan.uid}'
) )
# NOTE: any of these can be raised by `tractor`'s IPC except (
# NOTE: any of these can be raised by ``tractor``'s IPC
# transport-layer and we want to be highly resilient # transport-layer and we want to be highly resilient
# to consumers which crash or lose network connection. # to consumers which crash or lose network connection.
# I.e. we **DO NOT** want to crash and propagate up to # I.e. we **DO NOT** want to crash and propagate up to
# ``pikerd`` these kinds of errors! # ``pikerd`` these kinds of errors!
except ( trio.ClosedResourceError,
trio.BrokenResourceError,
ConnectionResetError, ConnectionResetError,
) + Sampler.bcast_errors as ipc_err: ):
match ipc_err:
case trio.EndOfChannel():
log.info(
f'{stream} terminated by peer,\n'
f'{ipc_err!r}'
)
case _:
# if the feed consumer goes down then drop # if the feed consumer goes down then drop
# out of this rate limiter # out of this rate limiter
log.warning( log.warning(f'{stream} closed')
f'{stream} closed due to,\n'
f'{ipc_err!r}'
)
await stream.aclose() await stream.aclose()
return return

View File

@ -31,7 +31,6 @@ from pathlib import Path
from pprint import pformat from pprint import pformat
from typing import ( from typing import (
Any, Any,
Callable,
Sequence, Sequence,
Hashable, Hashable,
TYPE_CHECKING, TYPE_CHECKING,
@ -57,7 +56,7 @@ from piker.brokers import (
) )
if TYPE_CHECKING: if TYPE_CHECKING:
from piker.accounting import ( from ..accounting import (
Asset, Asset,
MktPair, MktPair,
) )
@ -150,36 +149,19 @@ class SymbologyCache(Struct):
'Implement `Client.get_assets()`!' 'Implement `Client.get_assets()`!'
) )
get_mkt_pairs: Callable|None = getattr( if get_mkt_pairs := getattr(client, 'get_mkt_pairs', None):
client,
'get_mkt_pairs',
None,
)
if not get_mkt_pairs:
log.warning(
'No symbology cache `Pair` support for `{provider}`..\n'
'Implement `Client.get_mkt_pairs()`!'
)
return self
pairs: dict[str, Struct] = await get_mkt_pairs() pairs: dict[str, Struct] = await get_mkt_pairs()
if not pairs:
log.warning(
'No pairs from intial {provider!r} sym-cache request?\n\n'
'`Client.get_mkt_pairs()` -> {pairs!r} ?'
)
return self
for bs_fqme, pair in pairs.items(): for bs_fqme, pair in pairs.items():
# NOTE: every backend defined pair should
# declare it's ns path for roundtrip
# serialization lookup.
if not getattr(pair, 'ns_path', None): if not getattr(pair, 'ns_path', None):
# XXX: every backend defined pair must declare
# a `.ns_path: tractor.NamespacePath` to enable
# roundtrip serialization lookup from a local
# cache file.
raise TypeError( raise TypeError(
f'Pair-struct for {self.mod.name} MUST define a ' f'Pair-struct for {self.mod.name} MUST define a '
'`.ns_path: str`!\n\n' '`.ns_path: str`!\n'
f'{pair!r}' f'{pair}'
) )
entry = await self.mod.get_mkt_info(pair.bs_fqme) entry = await self.mod.get_mkt_info(pair.bs_fqme)
@ -213,6 +195,12 @@ class SymbologyCache(Struct):
pair, pair,
) )
else:
log.warning(
'No symbology cache `Pair` support for `{provider}`..\n'
'Implement `Client.get_mkt_pairs()`!'
)
return self return self
@classmethod @classmethod

View File

@ -273,7 +273,7 @@ async def _reconnect_forever(
nobsws._connected.set() nobsws._connected.set()
await trio.sleep_forever() await trio.sleep_forever()
except HandshakeError: except HandshakeError:
log.exception('Retrying connection') log.exception(f'Retrying connection')
# ws & nursery block ends # ws & nursery block ends
@ -359,8 +359,8 @@ async def open_autorecon_ws(
''' '''
JSONRPC response-request style machinery for transparent multiplexing JSONRPC response-request style machinery for transparent multiplexing of msgs
of msgs over a `NoBsWs`. over a NoBsWs.
''' '''
@ -377,82 +377,43 @@ async def open_jsonrpc_session(
url: str, url: str,
start_id: int = 0, start_id: int = 0,
response_type: type = JSONRPCResult, response_type: type = JSONRPCResult,
msg_recv_timeout: float = float('inf'), request_type: Optional[type] = None,
# ^NOTE, since only `deribit` is using this jsonrpc stuff atm request_hook: Optional[Callable] = None,
# and options mkts are generally "slow moving".. error_hook: Optional[Callable] = None,
#
# FURTHER if we break the underlying ws connection then since we
# don't pass a `fixture` to the task that manages `NoBsWs`, i.e.
# `_reconnect_forever()`, the jsonrpc "transport pipe" get's
# broken and never restored with wtv init sequence is required to
# re-establish a working req-resp session.
) -> Callable[[str, dict], dict]: ) -> Callable[[str, dict], dict]:
'''
Init a json-RPC-over-websocket connection to the provided `url`.
A `json_rpc: Callable[[str, dict], dict` is delivered to the
caller for sending requests and a bg-`trio.Task` handles
processing of response msgs including error reporting/raising in
the parent/caller task.
'''
# NOTE, store all request msgs so we can raise errors on the
# caller side!
req_msgs: dict[int, dict] = {}
async with ( async with (
trio.open_nursery() as tn, trio.open_nursery() as n,
open_autorecon_ws( open_autorecon_ws(url) as ws
url=url,
msg_recv_timeout=msg_recv_timeout,
) as ws
): ):
rpc_id: Iterable[int] = count(start_id) rpc_id: Iterable = count(start_id)
rpc_results: dict[int, dict] = {} rpc_results: dict[int, dict] = {}
async def json_rpc( async def json_rpc(method: str, params: dict) -> dict:
method: str,
params: dict,
) -> dict:
''' '''
perform a json rpc call and wait for the result, raise exception in perform a json rpc call and wait for the result, raise exception in
case of error field present on response case of error field present on response
''' '''
nonlocal req_msgs
req_id: int = next(rpc_id)
msg = { msg = {
'jsonrpc': '2.0', 'jsonrpc': '2.0',
'id': req_id, 'id': next(rpc_id),
'method': method, 'method': method,
'params': params 'params': params
} }
_id = msg['id'] _id = msg['id']
result = rpc_results[_id] = { rpc_results[_id] = {
'result': None, 'result': None,
'error': None, 'event': trio.Event()
'event': trio.Event(), # signal caller resp arrived
} }
req_msgs[_id] = msg
await ws.send_msg(msg) await ws.send_msg(msg)
# wait for reponse before unblocking requester code
await rpc_results[_id]['event'].wait() await rpc_results[_id]['event'].wait()
if (maybe_result := result['result']): ret = rpc_results[_id]['result']
ret = maybe_result
del rpc_results[_id]
else: del rpc_results[_id]
err = result['error']
raise Exception(
f'JSONRPC request failed\n'
f'req: {msg}\n'
f'resp: {err}\n'
)
if ret.error is not None: if ret.error is not None:
raise Exception(json.dumps(ret.error, indent=4)) raise Exception(json.dumps(ret.error, indent=4))
@ -467,7 +428,6 @@ async def open_jsonrpc_session(
the server side. the server side.
''' '''
nonlocal req_msgs
async for msg in ws: async for msg in ws:
match msg: match msg:
case { case {
@ -491,28 +451,19 @@ async def open_jsonrpc_session(
'params': _, 'params': _,
}: }:
log.debug(f'Recieved\n{msg}') log.debug(f'Recieved\n{msg}')
if request_hook:
await request_hook(request_type(**msg))
case { case {
'error': error 'error': error
}: }:
# retreive orig request msg, set error log.warning(f'Recieved\n{error}')
# response in original "result" msg, if error_hook:
# THEN FINALLY set the event to signal caller await error_hook(response_type(**msg))
# to raise the error in the parent task.
req_id: int = error['id']
req_msg: dict = req_msgs[req_id]
result: dict = rpc_results[req_id]
result['error'] = error
result['event'].set()
log.error(
f'JSONRPC request failed\n'
f'req: {req_msg}\n'
f'resp: {error}\n'
)
case _: case _:
log.warning(f'Unhandled JSON-RPC msg!?\n{msg}') log.warning(f'Unhandled JSON-RPC msg!?\n{msg}')
tn.start_soon(recv_task) n.start_soon(recv_task)
yield json_rpc yield json_rpc
tn.cancel_scope.cancel() n.cancel_scope.cancel()

View File

@ -786,6 +786,7 @@ async def install_brokerd_search(
@acm @acm
async def maybe_open_feed( async def maybe_open_feed(
fqmes: list[str], fqmes: list[str],
loglevel: str | None = None, loglevel: str | None = None,
@ -839,6 +840,7 @@ async def maybe_open_feed(
@acm @acm
async def open_feed( async def open_feed(
fqmes: list[str], fqmes: list[str],
loglevel: str | None = None, loglevel: str | None = None,

View File

@ -36,10 +36,10 @@ from ._sharedmem import (
ShmArray, ShmArray,
_Token, _Token,
) )
from piker.accounting import MktPair
if TYPE_CHECKING: if TYPE_CHECKING:
from piker.data.feed import Feed from ..accounting import MktPair
from .feed import Feed
class Flume(Struct): class Flume(Struct):

View File

@ -113,9 +113,9 @@ def validate_backend(
) )
if ep is None: if ep is None:
log.warning( log.warning(
f'Provider backend {mod.name!r} is missing ' f'Provider backend {mod.name} is missing '
f'{daemon_name!r} support?\n' f'{daemon_name} support :(\n'
f'|_module endpoint-func missing: {name!r}\n' f'The following endpoint is missing: {name}'
) )
inits: list[ inits: list[

View File

@ -19,10 +19,6 @@ Log like a forester!
""" """
import logging import logging
import json import json
import reprlib
from typing import (
Callable,
)
import tractor import tractor
from pygments import ( from pygments import (
@ -88,29 +84,3 @@ def colorize_json(
# likeable styles: algol_nu, tango, monokai # likeable styles: algol_nu, tango, monokai
formatters.TerminalTrueColorFormatter(style=style) formatters.TerminalTrueColorFormatter(style=style)
) )
# TODO, eventually defer to the version in `modden` once
# it becomes a dep!
def mk_repr(
**repr_kws,
) -> Callable[[str], str]:
'''
Allocate and deliver a `repr.Repr` instance with provided input
settings using the std-lib's `reprlib` mod,
* https://docs.python.org/3/library/reprlib.html
------ Ex. ------
An up to 6-layer-nested `dict` as multi-line:
- https://stackoverflow.com/a/79102479
- https://docs.python.org/3/library/reprlib.html#reprlib.Repr.maxlevel
'''
def_kws: dict[str, int] = dict(
indent=2,
maxlevel=6, # recursion levels
maxstring=66, # match editor line-len limit
)
def_kws |= repr_kws
reprr = reprlib.Repr(**def_kws)
return reprr.repr

View File

@ -119,10 +119,6 @@ async def open_piker_runtime(
# spawn other specialized daemons I think? # spawn other specialized daemons I think?
enable_modules=enable_modules, enable_modules=enable_modules,
# TODO: how to configure this?
# keep it on by default if debug mode is set?
maybe_enable_greenback=False,
**tractor_kwargs, **tractor_kwargs,
) as actor, ) as actor,

View File

@ -386,8 +386,6 @@ def ldshm(
open_annot_ctl() as actl, open_annot_ctl() as actl,
): ):
shm_df: pl.DataFrame | None = None shm_df: pl.DataFrame | None = None
tf2aids: dict[float, dict] = {}
for ( for (
shmfile, shmfile,
shm, shm,
@ -528,17 +526,16 @@ def ldshm(
new_df, new_df,
step_gaps, step_gaps,
) )
# last chance manual overwrites in REPL # last chance manual overwrites in REPL
# await tractor.pause() await tractor.pause()
assert aids assert aids
tf2aids[period_s] = aids
else: else:
# allow interaction even when no ts problems. # allow interaction even when no ts problems.
assert not diff
await tractor.pause() await tractor.pause()
log.info('Exiting TSP shm anal-izer!') # assert not diff
if shm_df is None: if shm_df is None:
log.error( log.error(

View File

@ -161,13 +161,7 @@ class NativeStorageClient:
def index_files(self): def index_files(self):
for path in self._datadir.iterdir(): for path in self._datadir.iterdir():
if ( if path.name in {'borked', 'expired',}:
path.is_dir()
or
'.parquet' not in str(path)
# or
# path.name in {'borked', 'expired',}
):
continue continue
key: str = path.name.rstrip('.parquet') key: str = path.name.rstrip('.parquet')

View File

@ -44,10 +44,8 @@ import trio
from trio_typing import TaskStatus from trio_typing import TaskStatus
import tractor import tractor
from pendulum import ( from pendulum import (
Interval,
DateTime, DateTime,
Duration, Duration,
duration as mk_duration,
from_timestamp, from_timestamp,
) )
import numpy as np import numpy as np
@ -216,8 +214,7 @@ async def maybe_fill_null_segments(
# pair, immediately stop backfilling? # pair, immediately stop backfilling?
if ( if (
start_dt start_dt
and and end_dt < start_dt
end_dt < start_dt
): ):
await tractor.pause() await tractor.pause()
break break
@ -265,7 +262,6 @@ async def maybe_fill_null_segments(
except tractor.ContextCancelled: except tractor.ContextCancelled:
# log.exception # log.exception
await tractor.pause() await tractor.pause()
raise
null_segs_detected.set() null_segs_detected.set()
# RECHECK for more null-gaps # RECHECK for more null-gaps
@ -353,7 +349,7 @@ async def maybe_fill_null_segments(
async def start_backfill( async def start_backfill(
get_hist, get_hist,
def_frame_duration: Duration, frame_types: dict[str, Duration] | None,
mod: ModuleType, mod: ModuleType,
mkt: MktPair, mkt: MktPair,
shm: ShmArray, shm: ShmArray,
@ -383,23 +379,22 @@ async def start_backfill(
update_start_on_prepend: bool = False update_start_on_prepend: bool = False
if backfill_until_dt is None: if backfill_until_dt is None:
# TODO: per-provider default history-durations? # TODO: drop this right and just expose the backfill
# -[ ] inside the `open_history_client()` config allow # limits inside a [storage] section in conf.toml?
# declaring the history duration limits instead of # when no tsdb "last datum" is provided, we just load
# guessing and/or applying the same limits to all? # some near-term history.
# # periods = {
# -[ ] allow declaring (default) per-provider backfill # 1: {'days': 1},
# limits inside a [storage] sub-section in conf.toml? # 60: {'days': 14},
# # }
# NOTE, when no tsdb "last datum" is provided, we just
# load some near-term history by presuming a "decently # do a decently sized backfill and load it into storage.
# large" 60s duration limit and a much shorter 1s range.
periods = { periods = {
1: {'days': 2}, 1: {'days': 2},
60: {'years': 6}, 60: {'years': 6},
} }
period_duration: int = periods[timeframe] period_duration: int = periods[timeframe]
update_start_on_prepend: bool = True update_start_on_prepend = True
# NOTE: manually set the "latest" datetime which we intend to # NOTE: manually set the "latest" datetime which we intend to
# backfill history "until" so as to adhere to the history # backfill history "until" so as to adhere to the history
@ -421,6 +416,7 @@ async def start_backfill(
f'backfill_until_dt: {backfill_until_dt}\n' f'backfill_until_dt: {backfill_until_dt}\n'
f'last_start_dt: {last_start_dt}\n' f'last_start_dt: {last_start_dt}\n'
) )
try: try:
( (
array, array,
@ -430,114 +426,71 @@ async def start_backfill(
timeframe, timeframe,
end_dt=last_start_dt, end_dt=last_start_dt,
) )
except NoData as _daterr: except NoData as _daterr:
orig_last_start_dt: datetime = last_start_dt # 3 cases:
gap_report: str = ( # - frame in the middle of a legit venue gap
f'EMPTY FRAME for `end_dt: {last_start_dt}`?\n' # - history actually began at the `last_start_dt`
f'{mod.name} -> tf@fqme: {timeframe}@{mkt.fqme}\n' # - some other unknown error (ib blocking the
f'last_start_dt: {orig_last_start_dt}\n\n' # history bc they don't want you seeing how they
f'bf_until: {backfill_until_dt}\n' # cucked all the tinas..)
if dur := frame_types.get(timeframe):
# decrement by a frame's worth of duration and
# retry a few times.
last_start_dt.subtract(
seconds=dur.total_seconds()
) )
# EMPTY FRAME signal with 3 (likely) causes: log.warning(
# f'{mod.name} -> EMPTY FRAME for end_dt?\n'
# 1. range contains legit gap in venue history f'tf@fqme: {timeframe}@{mkt.fqme}\n'
# 2. history actually (edge case) **began** at the 'bf_until <- last_start_dt:\n'
# value `last_start_dt` f'{backfill_until_dt} <- {last_start_dt}\n'
# 3. some other unknown error (ib blocking the f'Decrementing `end_dt` by {dur} and retry..\n'
# history-query bc they don't want you seeing how
# they cucked all the tinas.. like with options
# hist)
#
if def_frame_duration:
# decrement by a duration's (frame) worth of time
# as maybe indicated by the backend to see if we
# can get older data before this possible
# "history gap".
last_start_dt: datetime = last_start_dt.subtract(
seconds=def_frame_duration.total_seconds()
) )
gap_report += (
f'Decrementing `end_dt` and retrying with,\n'
f'def_frame_duration: {def_frame_duration}\n'
f'(new) last_start_dt: {last_start_dt}\n'
)
log.warning(gap_report)
# skip writing to shm/tsdb and try the next
# duration's worth of prior history.
continue continue
else:
# await tractor.pause()
raise DataUnavailable(gap_report)
# broker says there never was or is no more history to pull # broker says there never was or is no more history to pull
except DataUnavailable as due: except DataUnavailable:
message: str = due.args[0]
log.warning( log.warning(
f'Provider {mod.name!r} halted backfill due to,\n\n' f'NO-MORE-DATA in range?\n'
f'`{mod.name}` halted history:\n'
f'{message}\n' f'tf@fqme: {timeframe}@{mkt.fqme}\n'
'bf_until <- last_start_dt:\n'
f'fqme: {mkt.fqme}\n' f'{backfill_until_dt} <- {last_start_dt}\n'
f'timeframe: {timeframe}\n'
f'last_start_dt: {last_start_dt}\n'
f'bf_until: {backfill_until_dt}\n'
) )
# UGH: what's a better way?
# TODO: backends are responsible for being correct on # ugh, what's a better way?
# this right!? # TODO: fwiw, we probably want a way to signal a throttle
# -[ ] in the `ib` case we could maybe offer some way # condition (eg. with ib) so that we can halt the
# to halt the request loop until the condition is # request loop until the condition is resolved?
# resolved or should the backend be entirely in if timeframe > 1:
# charge of solving such faults? yes, right? await tractor.pause()
return return
time: np.ndarray = array['time']
assert ( assert (
time[0] array['time'][0]
== ==
next_start_dt.timestamp() next_start_dt.timestamp()
) )
assert time[-1] == next_end_dt.timestamp() diff = last_start_dt - next_start_dt
frame_time_diff_s = diff.seconds
expected_dur: Interval = last_start_dt - next_start_dt
# frame's worth of sample-period-steps, in seconds # frame's worth of sample-period-steps, in seconds
frame_size_s: float = len(array) * timeframe frame_size_s: float = len(array) * timeframe
recv_frame_dur: Duration = ( expected_frame_size_s: float = frame_size_s + timeframe
from_timestamp(array[-1]['time']) if frame_time_diff_s > expected_frame_size_s:
-
from_timestamp(array[0]['time'])
)
if (
(lt_frame := (recv_frame_dur < expected_dur))
or
(null_frame := (frame_size_s == 0))
# ^XXX, should NEVER hit now!
):
# XXX: query result includes a start point prior to our # XXX: query result includes a start point prior to our
# expected "frame size" and thus is likely some kind of # expected "frame size" and thus is likely some kind of
# history gap (eg. market closed period, outage, etc.) # history gap (eg. market closed period, outage, etc.)
# so just report it to console for now. # so just report it to console for now.
if lt_frame:
reason = 'Possible GAP (or first-datum)'
else:
assert null_frame
reason = 'NULL-FRAME'
missing_dur: Interval = expected_dur.end - recv_frame_dur.end
log.warning( log.warning(
f'{timeframe}s-series {reason} detected!\n' 'GAP DETECTED:\n'
f'fqme: {mkt.fqme}\n' f'last_start_dt: {last_start_dt}\n'
f'last_start_dt: {last_start_dt}\n\n' f'diff: {diff}\n'
f'recv interval: {recv_frame_dur}\n' f'frame_time_diff_s: {frame_time_diff_s}\n'
f'expected interval: {expected_dur}\n\n'
f'Missing duration of history of {missing_dur.in_words()!r}\n'
f'{missing_dur}\n'
) )
# await tractor.pause()
to_push = diff_history( to_push = diff_history(
array, array,
@ -612,27 +565,22 @@ async def start_backfill(
# long-term storage. # long-term storage.
if ( if (
storage is not None storage is not None
and and write_tsdb
write_tsdb
): ):
log.info( log.info(
f'Writing {ln} frame to storage:\n' f'Writing {ln} frame to storage:\n'
f'{next_start_dt} -> {last_start_dt}' f'{next_start_dt} -> {last_start_dt}'
) )
# NOTE, always drop the src asset token for # always drop the src asset token for
# non-currency-pair like market types (for now) # non-currency-pair like market types (for now)
#
# THAT IS, for now our table key schema is NOT
# including the dst[/src] source asset token. SO,
# 'tsla.nasdaq.ib' over 'tsla/usd.nasdaq.ib' for
# historical reasons ONLY.
if mkt.dst.atype not in { if mkt.dst.atype not in {
'crypto', 'crypto',
'crypto_currency', 'crypto_currency',
'fiat', # a "forex pair" 'fiat', # a "forex pair"
'perpetual_future', # stupid "perps" from cex land
}: }:
# for now, our table key schema is not including
# the dst[/src] source asset token.
col_sym_key: str = mkt.get_fqme( col_sym_key: str = mkt.get_fqme(
delim_char='', delim_char='',
without_src=True, without_src=True,
@ -737,7 +685,7 @@ async def back_load_from_tsdb(
last_tsdb_dt last_tsdb_dt
and latest_start_dt and latest_start_dt
): ):
backfilled_size_s: Duration = ( backfilled_size_s = (
latest_start_dt - last_tsdb_dt latest_start_dt - last_tsdb_dt
).seconds ).seconds
# if the shm buffer len is not large enough to contain # if the shm buffer len is not large enough to contain
@ -960,8 +908,6 @@ async def tsdb_backfill(
f'{pformat(config)}\n' f'{pformat(config)}\n'
) )
# concurrently load the provider's most-recent-frame AND any
# pre-existing tsdb history already saved in `piker` storage.
dt_eps: list[DateTime, DateTime] = [] dt_eps: list[DateTime, DateTime] = []
async with trio.open_nursery() as tn: async with trio.open_nursery() as tn:
tn.start_soon( tn.start_soon(
@ -972,6 +918,7 @@ async def tsdb_backfill(
timeframe, timeframe,
config, config,
) )
tsdb_entry: tuple = await load_tsdb_hist( tsdb_entry: tuple = await load_tsdb_hist(
storage, storage,
mkt, mkt,
@ -1000,25 +947,6 @@ async def tsdb_backfill(
mr_end_dt, mr_end_dt,
) = dt_eps ) = dt_eps
first_frame_dur_s: Duration = (mr_end_dt - mr_start_dt).seconds
calced_frame_size: Duration = mk_duration(
seconds=first_frame_dur_s,
)
# NOTE, attempt to use the backend declared default frame
# sizing (as allowed by their time-series query APIs) and
# if not provided try to construct a default from the
# first frame received above.
def_frame_durs: dict[
int,
Duration,
]|None = config.get('frame_types', None)
if def_frame_durs:
def_frame_size: Duration = def_frame_durs[timeframe]
assert def_frame_size == calced_frame_size
else:
# use what we calced from first frame above.
def_frame_size = calced_frame_size
# NOTE: when there's no offline data, there's 2 cases: # NOTE: when there's no offline data, there's 2 cases:
# - data backend doesn't support timeframe/sample # - data backend doesn't support timeframe/sample
# period (in which case `dt_eps` should be `None` and # period (in which case `dt_eps` should be `None` and
@ -1049,7 +977,7 @@ async def tsdb_backfill(
partial( partial(
start_backfill, start_backfill,
get_hist=get_hist, get_hist=get_hist,
def_frame_duration=def_frame_size, frame_types=config.get('frame_types', None),
mod=mod, mod=mod,
mkt=mkt, mkt=mkt,
shm=shm, shm=shm,

View File

@ -616,18 +616,6 @@ def detect_price_gaps(
# ]) # ])
... ...
# TODO: probably just use the null_segs impl above?
def detect_vlm_gaps(
df: pl.DataFrame,
col: str = 'volume',
) -> pl.DataFrame:
vnull: pl.DataFrame = w_dts.filter(
pl.col(col) == 0
)
return vnull
def dedupe( def dedupe(
src_df: pl.DataFrame, src_df: pl.DataFrame,
@ -638,6 +626,7 @@ def dedupe(
) -> tuple[ ) -> tuple[
pl.DataFrame, # with dts pl.DataFrame, # with dts
pl.DataFrame, # gaps
pl.DataFrame, # with deduplicated dts (aka gap/repeat removal) pl.DataFrame, # with deduplicated dts (aka gap/repeat removal)
int, # len diff between input and deduped int, # len diff between input and deduped
]: ]:
@ -650,22 +639,19 @@ def dedupe(
''' '''
wdts: pl.DataFrame = with_dts(src_df) wdts: pl.DataFrame = with_dts(src_df)
deduped = wdts
# remove duplicated datetime samples/sections
deduped: pl.DataFrame = wdts.unique(
# subset=['dt'],
subset=['time'],
maintain_order=True,
)
# maybe sort on any time field # maybe sort on any time field
if sort: if sort:
deduped = deduped.sort(by='time') wdts = wdts.sort(by='time')
# TODO: detect out-of-order segments which were corrected! # TODO: detect out-of-order segments which were corrected!
# -[ ] report in log msg # -[ ] report in log msg
# -[ ] possibly return segment sections which were moved? # -[ ] possibly return segment sections which were moved?
# remove duplicated datetime samples/sections
deduped: pl.DataFrame = wdts.unique(
subset=['dt'],
maintain_order=True,
)
diff: int = ( diff: int = (
wdts.height wdts.height
- -

View File

@ -21,4 +21,230 @@ Extensions to built-in or (heavily used but 3rd party) friend-lib
types. types.
''' '''
from tractor.msg import Struct as Struct from __future__ import annotations
from collections import UserList
from pprint import (
saferepr,
)
from typing import Any
from msgspec import (
msgpack,
Struct as _Struct,
structs,
)
class DiffDump(UserList):
'''
Very simple list delegator that repr() dumps (presumed) tuple
elements of the form `tuple[str, Any, Any]` in a nice
multi-line readable form for analyzing `Struct` diffs.
'''
def __repr__(self) -> str:
if not len(self):
return super().__repr__()
# format by displaying item pair's ``repr()`` on multiple,
# indented lines such that they are more easily visually
# comparable when printed to console when printed to
# console.
repstr: str = '[\n'
for k, left, right in self:
repstr += (
f'({k},\n'
f'\t{repr(left)},\n'
f'\t{repr(right)},\n'
')\n'
)
repstr += ']\n'
return repstr
class Struct(
_Struct,
# https://jcristharif.com/msgspec/structs.html#tagged-unions
# tag='pikerstruct',
# tag=True,
):
'''
A "human friendlier" (aka repl buddy) struct subtype.
'''
def _sin_props(self) -> Iterator[
tuple[
structs.FieldIinfo,
str,
Any,
]
]:
'''
Iterate over all non-@property fields of this struct.
'''
fi: structs.FieldInfo
for fi in structs.fields(self):
key: str = fi.name
val: Any = getattr(self, key)
yield fi, key, val
def to_dict(
self,
include_non_members: bool = True,
) -> dict:
'''
Like it sounds.. direct delegation to:
https://jcristharif.com/msgspec/api.html#msgspec.structs.asdict
BUT, by default we pop all non-member (aka not defined as
struct fields) fields by default.
'''
asdict: dict = structs.asdict(self)
if include_non_members:
return asdict
# only return a dict of the struct members
# which were provided as input, NOT anything
# added as type-defined `@property` methods!
sin_props: dict = {}
fi: structs.FieldInfo
for fi, k, v in self._sin_props():
sin_props[k] = asdict[k]
return sin_props
def pformat(
self,
field_indent: int = 2,
indent: int = 0,
) -> str:
'''
Recursion-safe `pprint.pformat()` style formatting of
a `msgspec.Struct` for sane reading by a human using a REPL.
'''
# global whitespace indent
ws: str = ' '*indent
# field whitespace indent
field_ws: str = ' '*(field_indent + indent)
# qtn: str = ws + self.__class__.__qualname__
qtn: str = self.__class__.__qualname__
obj_str: str = '' # accumulator
fi: structs.FieldInfo
k: str
v: Any
for fi, k, v in self._sin_props():
# TODO: how can we prefer `Literal['option1', 'option2,
# ..]` over .__name__ == `Literal` but still get only the
# latter for simple types like `str | int | None` etc..?
ft: type = fi.type
typ_name: str = getattr(ft, '__name__', str(ft))
# recurse to get sub-struct's `.pformat()` output Bo
if isinstance(v, Struct):
val_str: str = v.pformat(
indent=field_indent + indent,
field_indent=indent + field_indent,
)
else: # the `pprint` recursion-safe format:
# https://docs.python.org/3.11/library/pprint.html#pprint.saferepr
val_str: str = saferepr(v)
obj_str += (field_ws + f'{k}: {typ_name} = {val_str},\n')
return (
f'{qtn}(\n'
f'{obj_str}'
f'{ws})'
)
# TODO: use a pprint.PrettyPrinter instance around ONLY rendering
# inside a known tty?
# def __repr__(self) -> str:
# ...
# __str__ = __repr__ = pformat
__repr__ = pformat
def copy(
self,
update: dict | None = None,
) -> Struct:
'''
Validate-typecast all self defined fields, return a copy of
us with all such fields.
NOTE: This is kinda like the default behaviour in
`pydantic.BaseModel` except a copy of the object is
returned making it compat with `frozen=True`.
'''
if update:
for k, v in update.items():
setattr(self, k, v)
# NOTE: roundtrip serialize to validate
# - enode to msgpack binary format,
# - decode that back to a struct.
return msgpack.Decoder(type=type(self)).decode(
msgpack.Encoder().encode(self)
)
def typecast(
self,
# TODO: allow only casting a named subset?
# fields: set[str] | None = None,
) -> None:
'''
Cast all fields using their declared type annotations
(kinda like what `pydantic` does by default).
NOTE: this of course won't work on frozen types, use
``.copy()`` above in such cases.
'''
# https://jcristharif.com/msgspec/api.html#msgspec.structs.fields
fi: structs.FieldInfo
for fi in structs.fields(self):
setattr(
self,
fi.name,
fi.type(getattr(self, fi.name)),
)
def __sub__(
self,
other: Struct,
) -> DiffDump[tuple[str, Any, Any]]:
'''
Compare fields/items key-wise and return a ``DiffDump``
for easy visual REPL comparison B)
'''
diffs: DiffDump[tuple[str, Any, Any]] = DiffDump()
for fi in structs.fields(self):
attr_name: str = fi.name
ours: Any = getattr(self, attr_name)
theirs: Any = getattr(other, attr_name)
if ours != theirs:
diffs.append((
attr_name,
ours,
theirs,
))
return diffs