`.brokers.cli`: module type and todo for `--pdb` flag to NOT src from sub-cmd

Type loaded backend modules
Bump various `.brokers.core` doc string content/style
2025-02-19 17:40:31 -05:00 · 2025-02-19 17:40:31 -05:00 · 2025-02-19 17:40:31 -05:00 · 2025-02-19 17:40:01 -05:00 · 2025-02-19 17:40:01 -05:00 · 2025-02-19 17:40:01 -05:00
18 changed files with 442 additions and 513 deletions
--- a/piker/accounting/_pos.py
+++ b/piker/accounting/_pos.py
@ -30,8 +30,7 @@ from types import ModuleType
 from typing import (
    Any,
    Iterator,
-    Generator,
+    Generator
    TYPE_CHECKING,
 )
 import pendulum
@ -60,10 +59,8 @@ from ..clearing._messages import (
    BrokerdPosition,
 )
 from piker.types import Struct
 from piker.log import get_logger
 if TYPE_CHECKING:
 from piker.data._symcache import SymbologyCache
 from ..log import get_logger
 log = get_logger(__name__)
@ -496,17 +493,6 @@ class Account(Struct):
        _mktmap_table: dict[str, MktPair] | None = None,
        only_require: list[str]|True = True,
        # ^list of fqmes that are "required" to be processed from
        # this ledger pass; we often don't care about others and
        # definitely shouldn't always error in such cases.
        # (eg. broker backend loaded that doesn't yet supsport the
        # symcache but also, inside the paper engine we don't ad-hoc
        # request `get_mkt_info()` for every symbol in the ledger,
        # only the one for which we're simulating against).
        # TODO, not sure if there's a better soln for this, ideally
        # all backends get symcache support afap i guess..
    ) -> dict[str, Position]:
        '''
        Update the internal `.pps[str, Position]` table from input
@ -549,32 +535,11 @@ class Account(Struct):
                if _mktmap_table is None:
                    raise
                required: bool = (
                    only_require is True
                    or (
                        only_require is not True
                        and
                        fqme in only_require
                    )
                )
                # XXX: caller is allowed to provide a fallback
                # mktmap table for the case where a new position is
                # being added and the preloaded symcache didn't
                # have this entry prior (eg. with frickin IB..)
-                if (
+                mkt = _mktmap_table[fqme]
                    not (mkt := _mktmap_table.get(fqme))
                    and
                    required
                ):
                    raise
                elif not required:
                    continue
                else:
                    # should be an entry retreived somewhere
                    assert mkt
            if not (pos := pps.get(bs_mktid)):
@ -691,7 +656,7 @@ class Account(Struct):
    def write_config(self) -> None:
        '''
        Write the current account state to the user's account TOML file, normally
-        something like `pps.toml`.
+        something like ``pps.toml``.
        '''
        # TODO: show diff output?
--- a/piker/clearing/_client.py
+++ b/piker/clearing/_client.py
@ -168,6 +168,7 @@ class OrderClient(Struct):
 async def relay_orders_from_sync_code(
    client: OrderClient,
    symbol_key: str,
    to_ems_stream: tractor.MsgStream,
@ -241,11 +242,6 @@ async def open_ems(
    async with maybe_open_emsd(
        broker,
        # XXX NOTE, LOL so this determines the daemon `emsd` loglevel
        # then FYI.. that's kinda wrong no?
        # -[ ] shouldn't it be set by `pikerd -l` or no?
        # -[ ] would make a lot more sense to have a subsys ctl for
        #     levels.. like `-l emsd.info` or something?
        loglevel=loglevel,
    ) as portal:
--- a/piker/clearing/_ems.py
+++ b/piker/clearing/_ems.py
@ -653,11 +653,7 @@ class Router(Struct):
            flume = feed.flumes[fqme]
            first_quote: dict = flume.first_quote
            book: DarkBook = self.get_dark_book(broker)
-
+            book.lasts[fqme]: float = float(first_quote['last'])
            if not (last := first_quote.get('last')):
                last: float = flume.rt_shm.array[-1]['close']
            book.lasts[fqme]: float = float(last)
            async with self.maybe_open_brokerd_dialog(
                brokermod=brokermod,
@ -720,7 +716,7 @@ class Router(Struct):
            subs = self.subscribers[sub_key]
        sent_some: bool = False
-        for client_stream in subs.copy():
+        for client_stream in subs:
            try:
                await client_stream.send(msg)
                sent_some = True
@ -1014,10 +1010,6 @@ async def translate_and_relay_brokerd_events(
                status_msg.brokerd_msg = msg
                status_msg.src = msg.broker_details['name']
                if not status_msg.req:
                    # likely some order change state?
                    await tractor.pause()
                else:
                await router.client_broadcast(
                    status_msg.req.symbol,
                    status_msg,
--- a/piker/clearing/_paper_engine.py
+++ b/piker/clearing/_paper_engine.py
@ -297,8 +297,6 @@ class PaperBoi(Struct):
        # transmit pp msg to ems
        pp: Position = self.acnt.pps[bs_mktid]
        # TODO, this will break if `require_only=True` was passed to
        # `.update_from_ledger()`
        pp_msg = BrokerdPosition(
            broker=self.broker,
@ -655,7 +653,6 @@ async def open_trade_dialog(
                # in) use manually constructed table from calling
                # the `.get_mkt_info()` provider EP above.
                _mktmap_table=mkt_by_fqme,
                only_require=list(mkt_by_fqme),
            )
            pp_msgs: list[BrokerdPosition] = []
--- a/piker/clearing/_util.py
+++ b/piker/clearing/_util.py
@ -30,7 +30,6 @@ subsys: str = 'piker.clearing'
 log = get_logger(subsys)
 # TODO, oof doesn't this ignore the `loglevel` then???
 get_console_log = partial(
    get_console_log,
    name=subsys,
--- a/piker/data/_sampling.py
+++ b/piker/data/_sampling.py
@ -95,12 +95,6 @@ class Sampler:
    # history loading.
    incr_task_cs: trio.CancelScope | None = None
    bcast_errors: tuple[Exception] = (
        trio.BrokenResourceError,
        trio.ClosedResourceError,
        trio.EndOfChannel,
    )
    # holds all the ``tractor.Context`` remote subscriptions for
    # a particular sample period increment event: all subscribers are
    # notified on a step.
@ -264,15 +258,14 @@ class Sampler:
        subs: set
        last_ts, subs = pair
-        # NOTE, for debugging pub-sub issues
+        task = trio.lowlevel.current_task()
-        # task = trio.lowlevel.current_task()
+        log.debug(
-        # log.debug(
+            f'SUBS {self.subscribers}\n'
-        #     f'AlL-SUBS@{period_s!r}: {self.subscribers}\n'
+            f'PAIR {pair}\n'
-        #     f'PAIR: {pair}\n'
+            f'TASK: {task}: {id(task)}\n'
-        #     f'TASK: {task}: {id(task)}\n'
+            f'broadcasting {period_s} -> {last_ts}\n'
        #     f'broadcasting {period_s} -> {last_ts}\n'
            # f'consumers: {subs}'
-        # )
+        )
        borked: set[MsgStream] = set()
        sent: set[MsgStream] = set()
        while True:
@ -289,11 +282,12 @@ class Sampler:
                        await stream.send(msg)
                        sent.add(stream)
-                    except self.bcast_errors as err:
+                    except (
                        trio.BrokenResourceError,
                        trio.ClosedResourceError
                    ):
                        log.error(
-                            f'Connection dropped for IPC ctx\n'
+                            f'{stream._ctx.chan.uid} dropped connection'
                            f'{stream._ctx}\n\n'
                            f'Due to {type(err)}'
                        )
                        borked.add(stream)
                else:
@ -400,8 +394,7 @@ async def register_with_sampler(
                finally:
                    if (
                        sub_for_broadcasts
-                        and
+                        and subs
                        subs
                    ):
                        try:
                            subs.remove(stream)
@ -568,7 +561,8 @@ async def open_sample_stream(
 async def sample_and_broadcast(
-    bus: _FeedsBus,
+
    bus: _FeedsBus,  # noqa
    rt_shm: ShmArray,
    hist_shm: ShmArray,
    quote_stream: trio.abc.ReceiveChannel,
@ -588,33 +582,11 @@ async def sample_and_broadcast(
    overruns = Counter()
    # NOTE, only used for debugging live-data-feed issues, though
    # this should be resolved more correctly in the future using the
    # new typed-msgspec feats of `tractor`!
    #
    # XXX, a multiline nested `dict` formatter (since rn quote-msgs
    # are just that).
    # pfmt: Callable[[str], str] = mk_repr()
    # iterate stream delivered by broker
    async for quotes in quote_stream:
        # print(quotes)
-        # XXX WARNING XXX only enable for debugging bc ow can cost
+        # TODO: ``numba`` this!
        # ALOT of perf with HF-feedz!!!
        #
        # log.info(
        #     'Rx live quotes:\n'
        #     f'{pfmt(quotes)}'
        # )
        # TODO,
        # -[ ] `numba` or `cython`-nize this loop possibly?
        #  |_alternatively could we do it in rust somehow by upacking
        #    arrow msgs instead of using `msgspec`?
        # -[ ] use `msgspec.Struct` support in new typed-msging from
        #     `tractor` to ensure only allowed msgs are transmitted?
        #
        for broker_symbol, quote in quotes.items():
            # TODO: in theory you can send the IPC msg *before* writing
            # to the sharedmem array to decrease latency, however, that
@ -687,21 +659,6 @@ async def sample_and_broadcast(
            sub_key: str = broker_symbol.lower()
            subs: set[Sub] = bus.get_subs(sub_key)
            # TODO, figure out how to make this useful whilst
            # incoporating feed "pausing" ..
            #
            # if not subs:
            #     all_bs_fqmes: list[str] = list(
            #         bus._subscribers.keys()
            #     )
            #     log.warning(
            #         f'No subscribers for {brokername!r} live-quote ??\n'
            #         f'broker_symbol: {broker_symbol}\n\n'
            #         f'Maybe the backend-sys symbol does not match one of,\n'
            #         f'{pfmt(all_bs_fqmes)}\n'
            #     )
            # NOTE: by default the broker backend doesn't append
            # it's own "name" into the fqme schema (but maybe it
            # should?) so we have to manually generate the correct
@ -771,14 +728,18 @@ async def sample_and_broadcast(
                        if lags > 10:
                            await tractor.pause()
-                except Sampler.bcast_errors as ipc_err:
+                except (
                    trio.BrokenResourceError,
                    trio.ClosedResourceError,
                    trio.EndOfChannel,
                ):
                    ctx: Context = ipc._ctx
                    chan: Channel = ctx.chan
                    if ctx:
                        log.warning(
-                            f'Dropped `brokerd`-feed for {broker_symbol!r} due to,\n'
+                            'Dropped `brokerd`-quotes-feed connection:\n'
-                            f'x>) {ctx.cid}@{chan.uid}'
+                            f'{broker_symbol}:'
-                            f'|_{ipc_err!r}\n\n'
+                            f'{ctx.cid}@{chan.uid}'
                        )
                    if sub.throttle_rate:
                        assert ipc._closed
@ -795,11 +756,12 @@ async def sample_and_broadcast(
 async def uniform_rate_send(
    rate: float,
    quote_stream: trio.abc.ReceiveChannel,
    stream: MsgStream,
-    task_status: TaskStatus[None] = trio.TASK_STATUS_IGNORED,
+    task_status: TaskStatus = trio.TASK_STATUS_IGNORED,
 ) -> None:
    '''
@ -817,16 +779,13 @@ async def uniform_rate_send(
    https://gist.github.com/njsmith/7ea44ec07e901cb78ebe1dd8dd846cb9
    '''
-    # ?TODO? dynamically compute the **actual** approx overhead latency per cycle
+    # TODO: compute the approx overhead latency per cycle
-    # instead of this magic # bidinezz?
+    left_to_sleep = throttle_period = 1/rate - 0.000616
    throttle_period: float = 1/rate - 0.000616
    left_to_sleep: float = throttle_period
    # send cycle state
    first_quote: dict|None
    first_quote = last_quote = None
-    last_send: float = time.time()
+    last_send = time.time()
-    diff: float = 0
+    diff = 0
    task_status.started()
    ticks_by_type: dict[
@ -837,28 +796,22 @@ async def uniform_rate_send(
    clear_types = _tick_groups['clears']
    while True:
        # compute the remaining time to sleep for this throttled cycle
-        left_to_sleep: float = throttle_period - diff
+        left_to_sleep = throttle_period - diff
        if left_to_sleep > 0:
            cs: trio.CancelScope
            with trio.move_on_after(left_to_sleep) as cs:
                sym: str
                last_quote: dict
                try:
                    sym, last_quote = await quote_stream.receive()
                except trio.EndOfChannel:
-                    log.exception(
+                    log.exception(f"feed for {stream} ended?")
                        f'Live stream for feed for ended?\n'
                        f'<=c\n'
                        f'  |_[{stream!r}\n'
                    )
                    break
-                diff: float = time.time() - last_send
+                diff = time.time() - last_send
                if not first_quote:
-                    first_quote: float = last_quote
+                    first_quote = last_quote
                    # first_quote['tbt'] = ticks_by_type
                if (throttle_period - diff) > 0:
@ -919,9 +872,7 @@ async def uniform_rate_send(
        # TODO: now if only we could sync this to the display
        # rate timing exactly lul
        try:
-            await stream.send({
+            await stream.send({sym: first_quote})
                sym: first_quote
            })
        except tractor.RemoteActorError as rme:
            if rme.type is not tractor._exceptions.StreamOverrun:
                raise
@ -932,28 +883,19 @@ async def uniform_rate_send(
                f'{sym}:{ctx.cid}@{chan.uid}'
            )
-        # NOTE: any of these can be raised by `tractor`'s IPC
+        except (
            # NOTE: any of these can be raised by ``tractor``'s IPC
            # transport-layer and we want to be highly resilient
            # to consumers which crash or lose network connection.
            # I.e. we **DO NOT** want to crash and propagate up to
            # ``pikerd`` these kinds of errors!
-        except (
+            trio.ClosedResourceError,
            trio.BrokenResourceError,
            ConnectionResetError,
-        ) + Sampler.bcast_errors as ipc_err:
+        ):
            match ipc_err:
                case trio.EndOfChannel():
                    log.info(
                        f'{stream} terminated by peer,\n'
                        f'{ipc_err!r}'
                    )
                case _:
            # if the feed consumer goes down then drop
            # out of this rate limiter
-                    log.warning(
+            log.warning(f'{stream} closed')
                        f'{stream} closed due to,\n'
                        f'{ipc_err!r}'
                    )
            await stream.aclose()
            return
--- a/piker/data/_symcache.py
+++ b/piker/data/_symcache.py
@ -31,7 +31,6 @@ from pathlib import Path
 from pprint import pformat
 from typing import (
    Any,
    Callable,
    Sequence,
    Hashable,
    TYPE_CHECKING,
@ -57,7 +56,7 @@ from piker.brokers import (
 )
 if TYPE_CHECKING:
-    from piker.accounting import (
+    from ..accounting import (
        Asset,
        MktPair,
    )
@ -150,36 +149,19 @@ class SymbologyCache(Struct):
                    'Implement `Client.get_assets()`!'
                )
-            get_mkt_pairs: Callable|None = getattr(
+            if get_mkt_pairs := getattr(client, 'get_mkt_pairs', None):
                client,
                'get_mkt_pairs',
                None,
            )
            if not get_mkt_pairs:
                log.warning(
                    'No symbology cache `Pair` support for `{provider}`..\n'
                    'Implement `Client.get_mkt_pairs()`!'
                )
                return self
                pairs: dict[str, Struct] = await get_mkt_pairs()
            if not pairs:
                log.warning(
                    'No pairs from intial {provider!r} sym-cache request?\n\n'
                    '`Client.get_mkt_pairs()` -> {pairs!r} ?'
                )
                return self
                for bs_fqme, pair in pairs.items():
                    # NOTE: every backend defined pair should
                    # declare it's ns path for roundtrip
                    # serialization lookup.
                    if not getattr(pair, 'ns_path', None):
                    # XXX: every backend defined pair must declare
                    # a `.ns_path: tractor.NamespacePath` to enable
                    # roundtrip serialization lookup from a local
                    # cache file.
                        raise TypeError(
                            f'Pair-struct for {self.mod.name} MUST define a '
-                        '`.ns_path: str`!\n\n'
+                            '`.ns_path: str`!\n'
-                        f'{pair!r}'
+                            f'{pair}'
                        )
                    entry = await self.mod.get_mkt_info(pair.bs_fqme)
@ -213,6 +195,12 @@ class SymbologyCache(Struct):
                    pair,
                )
            else:
                log.warning(
                    'No symbology cache `Pair` support for `{provider}`..\n'
                    'Implement `Client.get_mkt_pairs()`!'
                )
        return self
    @classmethod
--- a/piker/data/_web_bs.py
+++ b/piker/data/_web_bs.py
@ -273,7 +273,7 @@ async def _reconnect_forever(
                nobsws._connected.set()
                await trio.sleep_forever()
        except HandshakeError:
-            log.exception('Retrying connection')
+            log.exception(f'Retrying connection')
        # ws & nursery block ends
@ -359,8 +359,8 @@ async def open_autorecon_ws(
 '''
-JSONRPC response-request style machinery for transparent multiplexing
+JSONRPC response-request style machinery for transparent multiplexing of msgs
-of msgs over a `NoBsWs`.
+over a NoBsWs.
 '''
@ -377,82 +377,43 @@ async def open_jsonrpc_session(
    url: str,
    start_id: int = 0,
    response_type: type = JSONRPCResult,
-    msg_recv_timeout: float = float('inf'),
+    request_type: Optional[type] = None,
-    # ^NOTE, since only `deribit` is using this jsonrpc stuff atm
+    request_hook: Optional[Callable] = None,
-    # and options mkts are generally "slow moving"..
+    error_hook: Optional[Callable] = None,
    #
    # FURTHER if we break the underlying ws connection then since we
    # don't pass a `fixture` to the task that manages `NoBsWs`, i.e.
    # `_reconnect_forever()`, the jsonrpc "transport pipe" get's
    # broken and never restored with wtv init sequence is required to
    # re-establish a working req-resp session.
 ) -> Callable[[str, dict], dict]:
    '''
    Init a json-RPC-over-websocket connection to the provided `url`.
    A `json_rpc: Callable[[str, dict], dict` is delivered to the
    caller for sending requests and a bg-`trio.Task` handles
    processing of response msgs including error reporting/raising in
    the parent/caller task.
    '''
    # NOTE, store all request msgs so we can raise errors on the
    # caller side!
    req_msgs: dict[int, dict] = {}
    async with (
-        trio.open_nursery() as tn,
+        trio.open_nursery() as n,
-        open_autorecon_ws(
+        open_autorecon_ws(url) as ws
            url=url,
            msg_recv_timeout=msg_recv_timeout,
        ) as ws
    ):
-        rpc_id: Iterable[int] = count(start_id)
+        rpc_id: Iterable = count(start_id)
        rpc_results: dict[int, dict] = {}
-        async def json_rpc(
+        async def json_rpc(method: str, params: dict) -> dict:
            method: str,
            params: dict,
        ) -> dict:
            '''
            perform a json rpc call and wait for the result, raise exception in
            case of error field present on response
            '''
            nonlocal req_msgs
            req_id: int = next(rpc_id)
            msg = {
                'jsonrpc': '2.0',
-                'id': req_id,
+                'id': next(rpc_id),
                'method': method,
                'params': params
            }
            _id = msg['id']
-            result = rpc_results[_id] = {
+            rpc_results[_id] = {
                'result': None,
-                'error': None,
+                'event': trio.Event()
                'event': trio.Event(),  # signal caller resp arrived
            }
            req_msgs[_id] = msg
            await ws.send_msg(msg)
            # wait for reponse before unblocking requester code
            await rpc_results[_id]['event'].wait()
-            if (maybe_result := result['result']):
+            ret = rpc_results[_id]['result']
                ret = maybe_result
                del rpc_results[_id]
-            else:
+            del rpc_results[_id]
                err = result['error']
                raise Exception(
                    f'JSONRPC request failed\n'
                    f'req: {msg}\n'
                    f'resp: {err}\n'
                )
            if ret.error is not None:
                raise Exception(json.dumps(ret.error, indent=4))
@ -467,7 +428,6 @@ async def open_jsonrpc_session(
            the server side.
            '''
            nonlocal req_msgs
            async for msg in ws:
                match msg:
                    case {
@ -491,28 +451,19 @@ async def open_jsonrpc_session(
                        'params': _,
                    }:
                        log.debug(f'Recieved\n{msg}')
                        if request_hook:
                            await request_hook(request_type(**msg))
                    case {
                        'error': error
                    }:
-                        # retreive orig request msg, set error
+                        log.warning(f'Recieved\n{error}')
-                        # response in original "result" msg,
+                        if error_hook:
-                        # THEN FINALLY set the event to signal caller
+                            await error_hook(response_type(**msg))
                        # to raise the error in the parent task.
                        req_id: int = error['id']
                        req_msg: dict = req_msgs[req_id]
                        result: dict = rpc_results[req_id]
                        result['error'] = error
                        result['event'].set()
                        log.error(
                            f'JSONRPC request failed\n'
                            f'req: {req_msg}\n'
                            f'resp: {error}\n'
                        )
                    case _:
                        log.warning(f'Unhandled JSON-RPC msg!?\n{msg}')
-        tn.start_soon(recv_task)
+        n.start_soon(recv_task)
        yield json_rpc
-        tn.cancel_scope.cancel()
+        n.cancel_scope.cancel()
--- a/piker/data/feed.py
+++ b/piker/data/feed.py
@ -786,6 +786,7 @@ async def install_brokerd_search(
@acm
 async def maybe_open_feed(
    fqmes: list[str],
    loglevel: str | None = None,
@ -839,6 +840,7 @@ async def maybe_open_feed(
@acm
 async def open_feed(
    fqmes: list[str],
    loglevel: str | None = None,
--- a/piker/data/flows.py
+++ b/piker/data/flows.py
@ -36,10 +36,10 @@ from ._sharedmem import (
    ShmArray,
    _Token,
 )
 from piker.accounting import MktPair
 if TYPE_CHECKING:
-    from piker.data.feed import Feed
+    from ..accounting import MktPair
    from .feed import Feed
 class Flume(Struct):
--- a/piker/data/validate.py
+++ b/piker/data/validate.py
@ -113,9 +113,9 @@ def validate_backend(
            )
            if ep is None:
                log.warning(
-                    f'Provider backend {mod.name!r} is missing '
+                    f'Provider backend {mod.name} is missing '
-                    f'{daemon_name!r} support?\n'
+                    f'{daemon_name} support :(\n'
-                    f'|_module endpoint-func missing: {name!r}\n'
+                    f'The following endpoint is missing: {name}'
                )
    inits: list[
--- a/piker/log.py
+++ b/piker/log.py
@ -19,10 +19,6 @@ Log like a forester!
 """
 import logging
 import json
 import reprlib
 from typing import (
    Callable,
 )
 import tractor
 from pygments import (
@ -88,29 +84,3 @@ def colorize_json(
        # likeable styles: algol_nu, tango, monokai
        formatters.TerminalTrueColorFormatter(style=style)
    )
 # TODO, eventually defer to the version in `modden` once
 # it becomes a dep!
 def mk_repr(
    **repr_kws,
 ) -> Callable[[str], str]:
    '''
    Allocate and deliver a `repr.Repr` instance with provided input
    settings using the std-lib's `reprlib` mod,
     * https://docs.python.org/3/library/reprlib.html
    ------ Ex. ------
    An up to 6-layer-nested `dict` as multi-line:
    - https://stackoverflow.com/a/79102479
    - https://docs.python.org/3/library/reprlib.html#reprlib.Repr.maxlevel
    '''
    def_kws: dict[str, int] = dict(
        indent=2,
        maxlevel=6,  # recursion levels
        maxstring=66,  # match editor line-len limit
    )
    def_kws |= repr_kws
    reprr = reprlib.Repr(**def_kws)
    return reprr.repr
--- a/piker/service/_actor_runtime.py
+++ b/piker/service/_actor_runtime.py
@ -119,10 +119,6 @@ async def open_piker_runtime(
                # spawn other specialized daemons I think?
                enable_modules=enable_modules,
                # TODO: how to configure this?
                # keep it on by default if debug mode is set?
                maybe_enable_greenback=False,
                **tractor_kwargs,
            ) as actor,
--- a/piker/storage/cli.py
+++ b/piker/storage/cli.py
@ -386,8 +386,6 @@ def ldshm(
            open_annot_ctl() as actl,
        ):
            shm_df: pl.DataFrame | None = None
            tf2aids: dict[float, dict] = {}
            for (
                shmfile,
                shm,
@ -528,17 +526,16 @@ def ldshm(
                            new_df,
                            step_gaps,
                        )
                        # last chance manual overwrites in REPL
-                        # await tractor.pause()
+                        await tractor.pause()
                        assert aids
                        tf2aids[period_s] = aids
                else:
                    # allow interaction even when no ts problems.
                    assert not diff
                    await tractor.pause()
-            log.info('Exiting TSP shm anal-izer!')
+                    # assert not diff
            if shm_df is None:
                log.error(
--- a/piker/storage/nativedb.py
+++ b/piker/storage/nativedb.py
@ -161,13 +161,7 @@ class NativeStorageClient:
    def index_files(self):
        for path in self._datadir.iterdir():
-            if (
+            if path.name in {'borked', 'expired',}:
                path.is_dir()
                or
                '.parquet' not in str(path)
                # or
                # path.name in {'borked', 'expired',}
            ):
                continue
            key: str = path.name.rstrip('.parquet')
--- a/piker/tsp/init.py
+++ b/piker/tsp/init.py
@ -44,10 +44,8 @@ import trio
 from trio_typing import TaskStatus
 import tractor
 from pendulum import (
    Interval,
    DateTime,
    Duration,
    duration as mk_duration,
    from_timestamp,
 )
 import numpy as np
@ -216,8 +214,7 @@ async def maybe_fill_null_segments(
        # pair, immediately stop backfilling?
        if (
            start_dt
-            and
+            and end_dt < start_dt
            end_dt < start_dt
        ):
            await tractor.pause()
            break
@ -265,7 +262,6 @@ async def maybe_fill_null_segments(
        except tractor.ContextCancelled:
            # log.exception
            await tractor.pause()
            raise
    null_segs_detected.set()
    # RECHECK for more null-gaps
@ -353,7 +349,7 @@ async def maybe_fill_null_segments(
 async def start_backfill(
    get_hist,
-    def_frame_duration: Duration,
+    frame_types: dict[str, Duration] | None,
    mod: ModuleType,
    mkt: MktPair,
    shm: ShmArray,
@ -383,23 +379,22 @@ async def start_backfill(
        update_start_on_prepend: bool = False
        if backfill_until_dt is None:
-            # TODO: per-provider default history-durations?
+            # TODO: drop this right and just expose the backfill
-            # -[ ] inside the `open_history_client()` config allow
+            # limits inside a [storage] section in conf.toml?
-            #    declaring the history duration limits instead of
+            # when no tsdb "last datum" is provided, we just load
-            #    guessing and/or applying the same limits to all?
+            # some near-term history.
-            #
+            # periods = {
-            # -[ ] allow declaring (default) per-provider backfill
+            #     1: {'days': 1},
-            #     limits inside a [storage] sub-section in conf.toml?
+            #     60: {'days': 14},
-            #
+            # }
-            # NOTE, when no tsdb "last datum" is provided, we just
+
-            # load some near-term history by presuming a "decently
+            # do a decently sized backfill and load it into storage.
            # large" 60s duration limit and a much shorter 1s range.
            periods = {
                1: {'days': 2},
                60: {'years': 6},
            }
            period_duration: int = periods[timeframe]
-            update_start_on_prepend: bool = True
+            update_start_on_prepend = True
            # NOTE: manually set the "latest" datetime which we intend to
            # backfill history "until" so as to adhere to the history
@ -421,6 +416,7 @@ async def start_backfill(
                f'backfill_until_dt: {backfill_until_dt}\n'
                f'last_start_dt: {last_start_dt}\n'
            )
            try:
                (
                    array,
@ -430,114 +426,71 @@ async def start_backfill(
                    timeframe,
                    end_dt=last_start_dt,
                )
            except NoData as _daterr:
-                orig_last_start_dt: datetime = last_start_dt
+                # 3 cases:
-                gap_report: str = (
+                # - frame in the middle of a legit venue gap
-                    f'EMPTY FRAME for `end_dt: {last_start_dt}`?\n'
+                # - history actually began at the `last_start_dt`
-                    f'{mod.name} -> tf@fqme: {timeframe}@{mkt.fqme}\n'
+                # - some other unknown error (ib blocking the
-                    f'last_start_dt: {orig_last_start_dt}\n\n'
+                #   history bc they don't want you seeing how they
-                    f'bf_until: {backfill_until_dt}\n'
+                #   cucked all the tinas..)
                if dur := frame_types.get(timeframe):
                    # decrement by a frame's worth of duration and
                    # retry a few times.
                    last_start_dt.subtract(
                        seconds=dur.total_seconds()
                    )
-                # EMPTY FRAME signal with 3 (likely) causes:
+                    log.warning(
-                #
+                        f'{mod.name} -> EMPTY FRAME for end_dt?\n'
-                # 1. range contains legit gap in venue history
+                        f'tf@fqme: {timeframe}@{mkt.fqme}\n'
-                # 2. history actually (edge case) **began** at the
+                        'bf_until <- last_start_dt:\n'
-                #    value `last_start_dt`
+                        f'{backfill_until_dt} <- {last_start_dt}\n'
-                # 3. some other unknown error (ib blocking the
+                        f'Decrementing `end_dt` by {dur} and retry..\n'
                #    history-query bc they don't want you seeing how
                #    they cucked all the tinas.. like with options
                #    hist)
                #
                if def_frame_duration:
                    # decrement by a duration's (frame) worth of time
                    # as maybe indicated by the backend to see if we
                    # can get older data before this possible
                    # "history gap".
                    last_start_dt: datetime = last_start_dt.subtract(
                        seconds=def_frame_duration.total_seconds()
                    )
                    gap_report += (
                        f'Decrementing `end_dt` and retrying with,\n'
                        f'def_frame_duration: {def_frame_duration}\n'
                        f'(new) last_start_dt: {last_start_dt}\n'
                    )
                    log.warning(gap_report)
                    # skip writing to shm/tsdb and try the next
                    # duration's worth of prior history.
                    continue
                else:
                    # await tractor.pause()
                    raise DataUnavailable(gap_report)
            # broker says there never was or is no more history to pull
-            except DataUnavailable as due:
+            except DataUnavailable:
                message: str = due.args[0]
                log.warning(
-                    f'Provider {mod.name!r} halted backfill due to,\n\n'
+                    f'NO-MORE-DATA in range?\n'
-
+                    f'`{mod.name}` halted history:\n'
-                    f'{message}\n'
+                    f'tf@fqme: {timeframe}@{mkt.fqme}\n'
-
+                    'bf_until <- last_start_dt:\n'
-                    f'fqme: {mkt.fqme}\n'
+                    f'{backfill_until_dt} <- {last_start_dt}\n'
                    f'timeframe: {timeframe}\n'
                    f'last_start_dt: {last_start_dt}\n'
                    f'bf_until: {backfill_until_dt}\n'
                )
-                # UGH: what's a better way?
+
-                # TODO: backends are responsible for being correct on
+                # ugh, what's a better way?
-                # this right!?
+                # TODO: fwiw, we probably want a way to signal a throttle
-                # -[ ] in the `ib` case we could maybe offer some way
+                # condition (eg. with ib) so that we can halt the
-                #     to halt the request loop until the condition is
+                # request loop until the condition is resolved?
-                #     resolved or should the backend be entirely in
+                if timeframe > 1:
-                #     charge of solving such faults? yes, right?
+                    await tractor.pause()
                return
            time: np.ndarray = array['time']
            assert (
-                time[0]
+                array['time'][0]
                ==
                next_start_dt.timestamp()
            )
-            assert time[-1] == next_end_dt.timestamp()
+            diff = last_start_dt - next_start_dt
-
+            frame_time_diff_s = diff.seconds
            expected_dur: Interval = last_start_dt - next_start_dt
            # frame's worth of sample-period-steps, in seconds
            frame_size_s: float = len(array) * timeframe
-            recv_frame_dur: Duration = (
+            expected_frame_size_s: float = frame_size_s + timeframe
-                from_timestamp(array[-1]['time'])
+            if frame_time_diff_s > expected_frame_size_s:
-                -
+
                from_timestamp(array[0]['time'])
            )
            if (
                (lt_frame := (recv_frame_dur < expected_dur))
                or
                (null_frame := (frame_size_s == 0))
                # ^XXX, should NEVER hit now!
            ):
                # XXX: query result includes a start point prior to our
                # expected "frame size" and thus is likely some kind of
                # history gap (eg. market closed period, outage, etc.)
                # so just report it to console for now.
                if lt_frame:
                    reason = 'Possible GAP (or first-datum)'
                else:
                    assert null_frame
                    reason = 'NULL-FRAME'
                missing_dur: Interval = expected_dur.end - recv_frame_dur.end
                log.warning(
-                    f'{timeframe}s-series {reason} detected!\n'
+                    'GAP DETECTED:\n'
-                    f'fqme: {mkt.fqme}\n'
+                    f'last_start_dt: {last_start_dt}\n'
-                    f'last_start_dt: {last_start_dt}\n\n'
+                    f'diff: {diff}\n'
-                    f'recv interval: {recv_frame_dur}\n'
+                    f'frame_time_diff_s: {frame_time_diff_s}\n'
                    f'expected interval: {expected_dur}\n\n'
                    f'Missing duration of history of {missing_dur.in_words()!r}\n'
                    f'{missing_dur}\n'
                )
                # await tractor.pause()
            to_push = diff_history(
                array,
@ -612,27 +565,22 @@ async def start_backfill(
            # long-term storage.
            if (
                storage is not None
-                and
+                and write_tsdb
                write_tsdb
            ):
                log.info(
                    f'Writing {ln} frame to storage:\n'
                    f'{next_start_dt} -> {last_start_dt}'
                )
-                # NOTE, always drop the src asset token for
+                # always drop the src asset token for
                # non-currency-pair like market types (for now)
                #
                # THAT IS, for now our table key schema is NOT
                # including the dst[/src] source asset token. SO,
                # 'tsla.nasdaq.ib' over 'tsla/usd.nasdaq.ib' for
                # historical reasons ONLY.
                if mkt.dst.atype not in {
                    'crypto',
                    'crypto_currency',
                    'fiat',  # a "forex pair"
                    'perpetual_future',  # stupid "perps" from cex land
                }:
                    # for now, our table key schema is not including
                    # the dst[/src] source asset token.
                    col_sym_key: str = mkt.get_fqme(
                        delim_char='',
                        without_src=True,
@ -737,7 +685,7 @@ async def back_load_from_tsdb(
        last_tsdb_dt
        and latest_start_dt
    ):
-        backfilled_size_s: Duration = (
+        backfilled_size_s = (
            latest_start_dt - last_tsdb_dt
        ).seconds
        # if the shm buffer len is not large enough to contain
@ -960,8 +908,6 @@ async def tsdb_backfill(
            f'{pformat(config)}\n'
        )
        # concurrently load the provider's most-recent-frame AND any
        # pre-existing tsdb history already saved in `piker` storage.
        dt_eps: list[DateTime, DateTime] = []
        async with trio.open_nursery() as tn:
            tn.start_soon(
@ -972,6 +918,7 @@ async def tsdb_backfill(
                timeframe,
                config,
            )
            tsdb_entry: tuple = await load_tsdb_hist(
                storage,
                mkt,
@ -1000,25 +947,6 @@ async def tsdb_backfill(
                mr_end_dt,
            ) = dt_eps
            first_frame_dur_s: Duration = (mr_end_dt - mr_start_dt).seconds
            calced_frame_size: Duration = mk_duration(
                seconds=first_frame_dur_s,
            )
            # NOTE, attempt to use the backend declared default frame
            # sizing (as allowed by their time-series query APIs) and
            # if not provided try to construct a default from the
            # first frame received above.
            def_frame_durs: dict[
                int,
                Duration,
            ]|None = config.get('frame_types', None)
            if def_frame_durs:
                def_frame_size: Duration = def_frame_durs[timeframe]
                assert def_frame_size == calced_frame_size
            else:
                # use what we calced from first frame above.
                def_frame_size = calced_frame_size
            # NOTE: when there's no offline data, there's 2 cases:
            # - data backend doesn't support timeframe/sample
            #   period (in which case `dt_eps` should be `None` and
@ -1049,7 +977,7 @@ async def tsdb_backfill(
                    partial(
                        start_backfill,
                        get_hist=get_hist,
-                        def_frame_duration=def_frame_size,
+                        frame_types=config.get('frame_types', None),
                        mod=mod,
                        mkt=mkt,
                        shm=shm,
--- a/piker/tsp/_anal.py
+++ b/piker/tsp/_anal.py
@ -616,18 +616,6 @@ def detect_price_gaps(
    # ])
    ...
 # TODO: probably just use the null_segs impl above?
 def detect_vlm_gaps(
    df: pl.DataFrame,
    col: str = 'volume',
 ) -> pl.DataFrame:
    vnull: pl.DataFrame = w_dts.filter(
        pl.col(col) == 0
    )
    return vnull
 def dedupe(
    src_df: pl.DataFrame,
@ -638,6 +626,7 @@ def dedupe(
 ) -> tuple[
    pl.DataFrame,  # with dts
    pl.DataFrame,  # gaps
    pl.DataFrame,  # with deduplicated dts (aka gap/repeat removal)
    int,  # len diff between input and deduped
 ]:
@ -650,22 +639,19 @@ def dedupe(
    '''
    wdts: pl.DataFrame = with_dts(src_df)
    deduped = wdts
    # remove duplicated datetime samples/sections
    deduped: pl.DataFrame = wdts.unique(
        # subset=['dt'],
        subset=['time'],
        maintain_order=True,
    )
    # maybe sort on any time field
    if sort:
-        deduped = deduped.sort(by='time')
+        wdts = wdts.sort(by='time')
        # TODO: detect out-of-order segments which were corrected!
        # -[ ] report in log msg
        # -[ ] possibly return segment sections which were moved?
    # remove duplicated datetime samples/sections
    deduped: pl.DataFrame = wdts.unique(
        subset=['dt'],
        maintain_order=True,
    )
    diff: int = (
        wdts.height
        -
--- a/piker/types.py
+++ b/piker/types.py
@ -21,4 +21,230 @@ Extensions to built-in or (heavily used but 3rd party) friend-lib
 types.
 '''
-from tractor.msg import Struct as Struct
+from __future__ import annotations
 from collections import UserList
 from pprint import (
    saferepr,
 )
 from typing import Any
 from msgspec import (
    msgpack,
    Struct as _Struct,
    structs,
 )
 class DiffDump(UserList):
    '''
    Very simple list delegator that repr() dumps (presumed) tuple
    elements of the form `tuple[str, Any, Any]` in a nice
    multi-line readable form for analyzing `Struct` diffs.
    '''
    def __repr__(self) -> str:
        if not len(self):
            return super().__repr__()
        # format by displaying item pair's ``repr()`` on multiple,
        # indented lines such that they are more easily visually
        # comparable when printed to console when printed to
        # console.
        repstr: str = '[\n'
        for k, left, right in self:
            repstr += (
                f'({k},\n'
                f'\t{repr(left)},\n'
                f'\t{repr(right)},\n'
                ')\n'
            )
        repstr += ']\n'
        return repstr
 class Struct(
    _Struct,
    # https://jcristharif.com/msgspec/structs.html#tagged-unions
    # tag='pikerstruct',
    # tag=True,
 ):
    '''
    A "human friendlier" (aka repl buddy) struct subtype.
    '''
    def _sin_props(self) -> Iterator[
        tuple[
            structs.FieldIinfo,
            str,
            Any,
        ]
    ]:
        '''
        Iterate over all non-@property fields of this struct.
        '''
        fi: structs.FieldInfo
        for fi in structs.fields(self):
            key: str = fi.name
            val: Any = getattr(self, key)
            yield fi, key, val
    def to_dict(
        self,
        include_non_members: bool = True,
    ) -> dict:
        '''
        Like it sounds.. direct delegation to:
        https://jcristharif.com/msgspec/api.html#msgspec.structs.asdict
        BUT, by default we pop all non-member (aka not defined as
        struct fields) fields by default.
        '''
        asdict: dict = structs.asdict(self)
        if include_non_members:
            return asdict
        # only return a dict of the struct members
        # which were provided as input, NOT anything
        # added as type-defined `@property` methods!
        sin_props: dict = {}
        fi: structs.FieldInfo
        for fi, k, v in self._sin_props():
            sin_props[k] = asdict[k]
        return sin_props
    def pformat(
        self,
        field_indent: int = 2,
        indent: int = 0,
    ) -> str:
        '''
        Recursion-safe `pprint.pformat()` style formatting of
        a `msgspec.Struct` for sane reading by a human using a REPL.
        '''
        # global whitespace indent
        ws: str = ' '*indent
        # field whitespace indent
        field_ws: str = ' '*(field_indent + indent)
        # qtn: str = ws + self.__class__.__qualname__
        qtn: str = self.__class__.__qualname__
        obj_str: str = ''  # accumulator
        fi: structs.FieldInfo
        k: str
        v: Any
        for fi, k, v in self._sin_props():
            # TODO: how can we prefer `Literal['option1',  'option2,
            # ..]` over .__name__ == `Literal` but still get only the
            # latter for simple types like `str | int | None` etc..?
            ft: type = fi.type
            typ_name: str = getattr(ft, '__name__', str(ft))
            # recurse to get sub-struct's `.pformat()` output Bo
            if isinstance(v, Struct):
                val_str: str =  v.pformat(
                    indent=field_indent + indent,
                    field_indent=indent + field_indent,
                )
            else:  # the `pprint` recursion-safe format:
                # https://docs.python.org/3.11/library/pprint.html#pprint.saferepr
                val_str: str = saferepr(v)
            obj_str += (field_ws + f'{k}: {typ_name} = {val_str},\n')
        return (
            f'{qtn}(\n'
            f'{obj_str}'
            f'{ws})'
        )
    # TODO: use a pprint.PrettyPrinter instance around ONLY rendering
    # inside a known tty?
    # def __repr__(self) -> str:
    #     ...
    # __str__ = __repr__ = pformat
    __repr__ = pformat
    def copy(
        self,
        update: dict | None = None,
    ) -> Struct:
        '''
        Validate-typecast all self defined fields, return a copy of
        us with all such fields.
        NOTE: This is kinda like the default behaviour in
        `pydantic.BaseModel` except a copy of the object is
        returned making it compat with `frozen=True`.
        '''
        if update:
            for k, v in update.items():
                setattr(self, k, v)
        # NOTE: roundtrip serialize to validate
        # - enode to msgpack binary format,
        # - decode that back to a struct.
        return msgpack.Decoder(type=type(self)).decode(
            msgpack.Encoder().encode(self)
        )
    def typecast(
        self,
        # TODO: allow only casting a named subset?
        # fields: set[str] | None = None,
    ) -> None:
        '''
        Cast all fields using their declared type annotations
        (kinda like what `pydantic` does by default).
        NOTE: this of course won't work on frozen types, use
        ``.copy()`` above in such cases.
        '''
        # https://jcristharif.com/msgspec/api.html#msgspec.structs.fields
        fi: structs.FieldInfo
        for fi in structs.fields(self):
            setattr(
                self,
                fi.name,
                fi.type(getattr(self, fi.name)),
            )
    def __sub__(
        self,
        other: Struct,
    ) -> DiffDump[tuple[str, Any, Any]]:
        '''
        Compare fields/items key-wise and return a ``DiffDump``
        for easy visual REPL comparison B)
        '''
        diffs: DiffDump[tuple[str, Any, Any]] = DiffDump()
        for fi in structs.fields(self):
            attr_name: str = fi.name
            ours: Any = getattr(self, attr_name)
            theirs: Any = getattr(other, attr_name)
            if ours != theirs:
                diffs.append((
                    attr_name,
                    ours,
                    theirs,
                ))
        return diffs
Author	SHA1	Message	Date
Tyler Goodlet	ef9bc7d1ed	`.brokers.cli`: module type and todo for `--pdb` flag to NOT src from sub-cmd	2025-02-19 17:40:31 -05:00
Tyler Goodlet	0aa252507f	Type loaded backend modules	2025-02-19 17:40:31 -05:00
Tyler Goodlet	9ac1745271	Bump various `.brokers.core` doc string content/style	2025-02-19 17:40:31 -05:00
Tyler Goodlet	ded2705397	`.questrade`: link in ws-API issue!	2025-02-19 17:40:01 -05:00
Tyler Goodlet	65fbd435a8	`.kraken.broker`: need to `await verify_balances()` ..	2025-02-19 17:40:01 -05:00
Tyler Goodlet	7a21e87688	`.brokers.ib.feed`: better `tractor.to_asyncio` typing and var naming throughout!	2025-02-19 17:40:01 -05:00