`.brokers.cli`: module type and todo for `--pdb` flag to NOT src from sub-cmd

Type loaded backend modules
Bump various `.brokers.core` doc string content/style
2025-02-21 16:49:13 -05:00 · 2025-02-21 16:49:13 -05:00 · 2025-02-21 16:49:13 -05:00 · 2025-02-21 16:37:42 -05:00 · 2025-02-21 16:37:42 -05:00 · 2025-02-21 16:37:42 -05:00
14 changed files with 409 additions and 187 deletions
--- a/piker/accounting/_pos.py
+++ b/piker/accounting/_pos.py
@ -30,7 +30,8 @@ from types import ModuleType
 from typing import (
    Any,
    Iterator,
-    Generator
+    Generator,
    TYPE_CHECKING,
 )
 import pendulum
@ -59,8 +60,10 @@ from ..clearing._messages import (
    BrokerdPosition,
 )
 from piker.types import Struct
-from piker.data._symcache import SymbologyCache
+from piker.log import get_logger
-from ..log import get_logger
+
 if TYPE_CHECKING:
    from piker.data._symcache import SymbologyCache
 log = get_logger(__name__)
@ -493,6 +496,17 @@ class Account(Struct):
        _mktmap_table: dict[str, MktPair] | None = None,
        only_require: list[str]|True = True,
        # ^list of fqmes that are "required" to be processed from
        # this ledger pass; we often don't care about others and
        # definitely shouldn't always error in such cases.
        # (eg. broker backend loaded that doesn't yet supsport the
        # symcache but also, inside the paper engine we don't ad-hoc
        # request `get_mkt_info()` for every symbol in the ledger,
        # only the one for which we're simulating against).
        # TODO, not sure if there's a better soln for this, ideally
        # all backends get symcache support afap i guess..
    ) -> dict[str, Position]:
        '''
        Update the internal `.pps[str, Position]` table from input
@ -535,11 +549,32 @@ class Account(Struct):
                if _mktmap_table is None:
                    raise
                required: bool = (
                    only_require is True
                    or (
                        only_require is not True
                        and
                        fqme in only_require
                    )
                )
                # XXX: caller is allowed to provide a fallback
                # mktmap table for the case where a new position is
                # being added and the preloaded symcache didn't
                # have this entry prior (eg. with frickin IB..)
-                mkt = _mktmap_table[fqme]
+                if (
                    not (mkt := _mktmap_table.get(fqme))
                    and
                    required
                ):
                    raise
                elif not required:
                    continue
                else:
                    # should be an entry retreived somewhere
                    assert mkt
            if not (pos := pps.get(bs_mktid)):
@ -656,7 +691,7 @@ class Account(Struct):
    def write_config(self) -> None:
        '''
        Write the current account state to the user's account TOML file, normally
-        something like ``pps.toml``.
+        something like `pps.toml`.
        '''
        # TODO: show diff output?
--- a/piker/brokers/init.py
+++ b/piker/brokers/init.py
@ -98,13 +98,14 @@ async def open_cached_client(
    If one has not been setup do it and cache it.
    '''
-    brokermod = get_brokermod(brokername)
+    brokermod: ModuleType = get_brokermod(brokername)
    # TODO: make abstract or `typing.Protocol`
    # client: Client
    async with maybe_open_context(
        acm_func=brokermod.get_client,
        kwargs=kwargs,
    ) as (cache_hit, client):
        if cache_hit:
            log.runtime(f'Reusing existing {client}')
--- a/piker/brokers/cli.py
+++ b/piker/brokers/cli.py
@ -471,11 +471,15 @@ def search(
    '''
    # global opts
-    brokermods = list(config['brokermods'].values())
+    brokermods: list[ModuleType] = list(config['brokermods'].values())
    # TODO: this is coming from the `search --pdb` NOT from
    # the `piker --pdb` XD ..
    # -[ ] pull from the parent click ctx's values..dumdum
    # assert pdb
    # define tractor entrypoint
    async def main(func):
        async with maybe_open_pikerd(
            loglevel=config['loglevel'],
            debug_mode=pdb,
--- a/piker/brokers/core.py
+++ b/piker/brokers/core.py
@ -22,7 +22,9 @@ routines should be primitive data types where possible.
 """
 import inspect
 from types import ModuleType
-from typing import List, Dict, Any, Optional
+from typing import (
    Any,
 )
 import trio
@ -34,8 +36,10 @@ from ..accounting import MktPair
 async def api(brokername: str, methname: str, **kwargs) -> dict:
-    """Make (proxy through) a broker API call by name and return its result.
+    '''
-    """
+    Make (proxy through) a broker API call by name and return its result.
    '''
    brokermod = get_brokermod(brokername)
    async with brokermod.get_client() as client:
        meth = getattr(client, methname, None)
@ -62,10 +66,14 @@ async def api(brokername: str, methname: str, **kwargs) -> dict:
 async def stocks_quote(
    brokermod: ModuleType,
-    tickers: List[str]
+    tickers: list[str]
-) -> Dict[str, Dict[str, Any]]:
+
-    """Return quotes dict for ``tickers``.
+) -> dict[str, dict[str, Any]]:
-    """
+    '''
    Return a `dict` of snapshot quotes for the provided input
    `tickers`: a `list` of fqmes.
    '''
    async with brokermod.get_client() as client:
        return await client.quote(tickers)
@ -74,13 +82,15 @@ async def stocks_quote(
 async def option_chain(
    brokermod: ModuleType,
    symbol: str,
-    date: Optional[str] = None,
+    date: str|None = None,
-) -> Dict[str, Dict[str, Dict[str, Any]]]:
+) -> dict[str, dict[str, dict[str, Any]]]:
-    """Return option chain for ``symbol`` for ``date``.
+    '''
    Return option chain for ``symbol`` for ``date``.
    By default all expiries are returned. If ``date`` is provided
    then contract quotes for that single expiry are returned.
-    """
+
    '''
    async with brokermod.get_client() as client:
        if date:
            id = int((await client.tickers2ids([symbol]))[symbol])
@ -98,7 +108,7 @@ async def option_chain(
 # async def contracts(
 #     brokermod: ModuleType,
 #     symbol: str,
-# ) -> Dict[str, Dict[str, Dict[str, Any]]]:
+# ) -> dict[str, dict[str, dict[str, Any]]]:
 #     """Return option contracts (all expiries) for ``symbol``.
 #     """
 #     async with brokermod.get_client() as client:
@ -110,15 +120,24 @@ async def bars(
    brokermod: ModuleType,
    symbol: str,
    **kwargs,
-) -> Dict[str, Dict[str, Dict[str, Any]]]:
+) -> dict[str, dict[str, dict[str, Any]]]:
-    """Return option contracts (all expiries) for ``symbol``.
+    '''
-    """
+    Return option contracts (all expiries) for ``symbol``.
    '''
    async with brokermod.get_client() as client:
        return await client.bars(symbol, **kwargs)
-async def search_w_brokerd(name: str, pattern: str) -> dict:
+async def search_w_brokerd(
    name: str,
    pattern: str,
 ) -> dict:
    # TODO: WHY NOT WORK!?!
    # when we `step` through the next block?
    # import tractor
    # await tractor.pause()
    async with open_cached_client(name) as client:
        # TODO: support multiple asset type concurrent searches.
@ -130,12 +149,12 @@ async def symbol_search(
    pattern: str,
    **kwargs,
-) -> Dict[str, Dict[str, Dict[str, Any]]]:
+) -> dict[str, dict[str, dict[str, Any]]]:
    '''
    Return symbol info from broker.
    '''
-    results = []
+    results: list[str] = []
    async def search_backend(
        brokermod: ModuleType
@ -143,6 +162,13 @@ async def symbol_search(
        brokername: str = mod.name
        # TODO: figure this the FUCK OUT
        # -> ok so obvi in the root actor any async task that's
        # spawned outside the main tractor-root-actor task needs to
        # call this..
        # await tractor.devx._debug.maybe_init_greenback()
        # tractor.pause_from_sync()
        async with maybe_spawn_brokerd(
            mod.name,
            infect_asyncio=getattr(
@ -162,7 +188,6 @@ async def symbol_search(
            ))
    async with trio.open_nursery() as n:
        for mod in brokermods:
            n.start_soon(search_backend, mod.name)
@ -172,11 +197,13 @@ async def symbol_search(
 async def mkt_info(
    brokermod: ModuleType,
    fqme: str,
    **kwargs,
 ) -> MktPair:
    '''
-    Return MktPair info from broker including src and dst assets.
+    Return the `piker.accounting.MktPair` info struct from a given
    backend broker tradable src/dst asset pair.
    '''
    async with open_cached_client(brokermod.name) as client:
--- a/piker/clearing/_paper_engine.py
+++ b/piker/clearing/_paper_engine.py
@ -653,6 +653,7 @@ async def open_trade_dialog(
                # in) use manually constructed table from calling
                # the `.get_mkt_info()` provider EP above.
                _mktmap_table=mkt_by_fqme,
                only_require=list(mkt_by_fqme),
            )
            pp_msgs: list[BrokerdPosition] = []
--- a/piker/data/_symcache.py
+++ b/piker/data/_symcache.py
@ -31,6 +31,7 @@ from pathlib import Path
 from pprint import pformat
 from typing import (
    Any,
    Callable,
    Sequence,
    Hashable,
    TYPE_CHECKING,
@ -56,7 +57,7 @@ from piker.brokers import (
 )
 if TYPE_CHECKING:
-    from ..accounting import (
+    from piker.accounting import (
        Asset,
        MktPair,
    )
@ -149,19 +150,36 @@ class SymbologyCache(Struct):
                    'Implement `Client.get_assets()`!'
                )
-            if get_mkt_pairs := getattr(client, 'get_mkt_pairs', None):
+            get_mkt_pairs: Callable|None = getattr(
                client,
                'get_mkt_pairs',
                None,
            )
            if not get_mkt_pairs:
                log.warning(
                    'No symbology cache `Pair` support for `{provider}`..\n'
                    'Implement `Client.get_mkt_pairs()`!'
                )
                return self
            pairs: dict[str, Struct] = await get_mkt_pairs()
-                for bs_fqme, pair in pairs.items():
+            if not pairs:
                log.warning(
                    'No pairs from intial {provider!r} sym-cache request?\n\n'
                    '`Client.get_mkt_pairs()` -> {pairs!r} ?'
                )
                return self
-                    # NOTE: every backend defined pair should
+            for bs_fqme, pair in pairs.items():
                    # declare it's ns path for roundtrip
                    # serialization lookup.
                if not getattr(pair, 'ns_path', None):
                    # XXX: every backend defined pair must declare
                    # a `.ns_path: tractor.NamespacePath` to enable
                    # roundtrip serialization lookup from a local
                    # cache file.
                    raise TypeError(
                        f'Pair-struct for {self.mod.name} MUST define a '
-                            '`.ns_path: str`!\n'
+                        '`.ns_path: str`!\n\n'
-                            f'{pair}'
+                        f'{pair!r}'
                    )
                entry = await self.mod.get_mkt_info(pair.bs_fqme)
@ -195,12 +213,6 @@ class SymbologyCache(Struct):
                pair,
            )
            else:
                log.warning(
                    'No symbology cache `Pair` support for `{provider}`..\n'
                    'Implement `Client.get_mkt_pairs()`!'
                )
        return self
    @classmethod
--- a/piker/data/_web_bs.py
+++ b/piker/data/_web_bs.py
@ -273,7 +273,7 @@ async def _reconnect_forever(
                nobsws._connected.set()
                await trio.sleep_forever()
        except HandshakeError:
-            log.exception(f'Retrying connection')
+            log.exception('Retrying connection')
        # ws & nursery block ends
@ -359,8 +359,8 @@ async def open_autorecon_ws(
 '''
-JSONRPC response-request style machinery for transparent multiplexing of msgs
+JSONRPC response-request style machinery for transparent multiplexing
-over a NoBsWs.
+of msgs over a `NoBsWs`.
 '''
@ -377,44 +377,83 @@ async def open_jsonrpc_session(
    url: str,
    start_id: int = 0,
    response_type: type = JSONRPCResult,
-    request_type: Optional[type] = None,
+    msg_recv_timeout: float = float('inf'),
-    request_hook: Optional[Callable] = None,
+    # ^NOTE, since only `deribit` is using this jsonrpc stuff atm
-    error_hook: Optional[Callable] = None,
+    # and options mkts are generally "slow moving"..
    #
    # FURTHER if we break the underlying ws connection then since we
    # don't pass a `fixture` to the task that manages `NoBsWs`, i.e.
    # `_reconnect_forever()`, the jsonrpc "transport pipe" get's
    # broken and never restored with wtv init sequence is required to
    # re-establish a working req-resp session.
 ) -> Callable[[str, dict], dict]:
    '''
    Init a json-RPC-over-websocket connection to the provided `url`.
    A `json_rpc: Callable[[str, dict], dict` is delivered to the
    caller for sending requests and a bg-`trio.Task` handles
    processing of response msgs including error reporting/raising in
    the parent/caller task.
    '''
    # NOTE, store all request msgs so we can raise errors on the
    # caller side!
    req_msgs: dict[int, dict] = {}
    async with (
-        trio.open_nursery() as n,
+        trio.open_nursery() as tn,
-        open_autorecon_ws(url) as ws
+        open_autorecon_ws(
            url=url,
            msg_recv_timeout=msg_recv_timeout,
        ) as ws
    ):
-        rpc_id: Iterable = count(start_id)
+        rpc_id: Iterable[int] = count(start_id)
        rpc_results: dict[int, dict] = {}
-        async def json_rpc(method: str, params: dict) -> dict:
+        async def json_rpc(
            method: str,
            params: dict,
        ) -> dict:
            '''
            perform a json rpc call and wait for the result, raise exception in
            case of error field present on response
            '''
            nonlocal req_msgs
            req_id: int = next(rpc_id)
            msg = {
                'jsonrpc': '2.0',
-                'id': next(rpc_id),
+                'id': req_id,
                'method': method,
                'params': params
            }
            _id = msg['id']
-            rpc_results[_id] = {
+            result = rpc_results[_id] = {
                'result': None,
-                'event': trio.Event()
+                'error': None,
                'event': trio.Event(),  # signal caller resp arrived
            }
            req_msgs[_id] = msg
            await ws.send_msg(msg)
            # wait for reponse before unblocking requester code
            await rpc_results[_id]['event'].wait()
-            ret = rpc_results[_id]['result']
+            if (maybe_result := result['result']):
-
+                ret = maybe_result
                del rpc_results[_id]
            else:
                err = result['error']
                raise Exception(
                    f'JSONRPC request failed\n'
                    f'req: {msg}\n'
                    f'resp: {err}\n'
                )
            if ret.error is not None:
                raise Exception(json.dumps(ret.error, indent=4))
@ -428,6 +467,7 @@ async def open_jsonrpc_session(
            the server side.
            '''
            nonlocal req_msgs
            async for msg in ws:
                match msg:
                    case {
@ -451,19 +491,28 @@ async def open_jsonrpc_session(
                        'params': _,
                    }:
                        log.debug(f'Recieved\n{msg}')
                        if request_hook:
                            await request_hook(request_type(**msg))
                    case {
                        'error': error
                    }:
-                        log.warning(f'Recieved\n{error}')
+                        # retreive orig request msg, set error
-                        if error_hook:
+                        # response in original "result" msg,
-                            await error_hook(response_type(**msg))
+                        # THEN FINALLY set the event to signal caller
                        # to raise the error in the parent task.
                        req_id: int = error['id']
                        req_msg: dict = req_msgs[req_id]
                        result: dict = rpc_results[req_id]
                        result['error'] = error
                        result['event'].set()
                        log.error(
                            f'JSONRPC request failed\n'
                            f'req: {req_msg}\n'
                            f'resp: {error}\n'
                        )
                    case _:
                        log.warning(f'Unhandled JSON-RPC msg!?\n{msg}')
-        n.start_soon(recv_task)
+        tn.start_soon(recv_task)
        yield json_rpc
-        n.cancel_scope.cancel()
+        tn.cancel_scope.cancel()
--- a/piker/data/feed.py
+++ b/piker/data/feed.py
@ -786,7 +786,6 @@ async def install_brokerd_search(
@acm
 async def maybe_open_feed(
    fqmes: list[str],
    loglevel: str | None = None,
@ -840,13 +839,12 @@ async def maybe_open_feed(
@acm
 async def open_feed(
    fqmes: list[str],
-    loglevel: str | None = None,
+    loglevel: str|None = None,
    allow_overruns: bool = True,
    start_stream: bool = True,
-    tick_throttle: float | None = None,  # Hz
+    tick_throttle: float|None = None,  # Hz
    allow_remote_ctl_ui: bool = False,
--- a/piker/data/flows.py
+++ b/piker/data/flows.py
@ -36,10 +36,10 @@ from ._sharedmem import (
    ShmArray,
    _Token,
 )
 from piker.accounting import MktPair
 if TYPE_CHECKING:
-    from ..accounting import MktPair
+    from piker.data.feed import Feed
    from .feed import Feed
 class Flume(Struct):
@ -82,7 +82,7 @@ class Flume(Struct):
    # TODO: do we need this really if we can pull the `Portal` from
    # ``tractor``'s internals?
-    feed: Feed | None = None
+    feed: Feed|None = None
    @property
    def rt_shm(self) -> ShmArray:
--- a/piker/data/validate.py
+++ b/piker/data/validate.py
@ -113,9 +113,9 @@ def validate_backend(
            )
            if ep is None:
                log.warning(
-                    f'Provider backend {mod.name} is missing '
+                    f'Provider backend {mod.name!r} is missing '
-                    f'{daemon_name} support :(\n'
+                    f'{daemon_name!r} support?\n'
-                    f'The following endpoint is missing: {name}'
+                    f'|_module endpoint-func missing: {name!r}\n'
                )
    inits: list[
--- a/piker/storage/cli.py
+++ b/piker/storage/cli.py
@ -386,6 +386,8 @@ def ldshm(
            open_annot_ctl() as actl,
        ):
            shm_df: pl.DataFrame | None = None
            tf2aids: dict[float, dict] = {}
            for (
                shmfile,
                shm,
@ -526,16 +528,17 @@ def ldshm(
                            new_df,
                            step_gaps,
                        )
                        # last chance manual overwrites in REPL
-                        await tractor.pause()
+                        # await tractor.pause()
                        assert aids
                        tf2aids[period_s] = aids
                else:
                    # allow interaction even when no ts problems.
-                    await tractor.pause()
+                    assert not diff
                    # assert not diff
            await tractor.pause()
            log.info('Exiting TSP shm anal-izer!')
            if shm_df is None:
                log.error(
--- a/piker/storage/nativedb.py
+++ b/piker/storage/nativedb.py
@ -161,7 +161,13 @@ class NativeStorageClient:
    def index_files(self):
        for path in self._datadir.iterdir():
-            if path.name in {'borked', 'expired',}:
+            if (
                path.is_dir()
                or
                '.parquet' not in str(path)
                # or
                # path.name in {'borked', 'expired',}
            ):
                continue
            key: str = path.name.rstrip('.parquet')
--- a/piker/tsp/init.py
+++ b/piker/tsp/init.py
@ -44,8 +44,10 @@ import trio
 from trio_typing import TaskStatus
 import tractor
 from pendulum import (
    Interval,
    DateTime,
    Duration,
    duration as mk_duration,
    from_timestamp,
 )
 import numpy as np
@ -214,7 +216,8 @@ async def maybe_fill_null_segments(
        # pair, immediately stop backfilling?
        if (
            start_dt
-            and end_dt < start_dt
+            and
            end_dt < start_dt
        ):
            await tractor.pause()
            break
@ -262,6 +265,7 @@ async def maybe_fill_null_segments(
        except tractor.ContextCancelled:
            # log.exception
            await tractor.pause()
            raise
    null_segs_detected.set()
    # RECHECK for more null-gaps
@ -349,7 +353,7 @@ async def maybe_fill_null_segments(
 async def start_backfill(
    get_hist,
-    frame_types: dict[str, Duration] | None,
+    def_frame_duration: Duration,
    mod: ModuleType,
    mkt: MktPair,
    shm: ShmArray,
@ -379,22 +383,23 @@ async def start_backfill(
        update_start_on_prepend: bool = False
        if backfill_until_dt is None:
-            # TODO: drop this right and just expose the backfill
+            # TODO: per-provider default history-durations?
-            # limits inside a [storage] section in conf.toml?
+            # -[ ] inside the `open_history_client()` config allow
-            # when no tsdb "last datum" is provided, we just load
+            #    declaring the history duration limits instead of
-            # some near-term history.
+            #    guessing and/or applying the same limits to all?
-            # periods = {
+            #
-            #     1: {'days': 1},
+            # -[ ] allow declaring (default) per-provider backfill
-            #     60: {'days': 14},
+            #     limits inside a [storage] sub-section in conf.toml?
-            # }
+            #
-
+            # NOTE, when no tsdb "last datum" is provided, we just
-            # do a decently sized backfill and load it into storage.
+            # load some near-term history by presuming a "decently
            # large" 60s duration limit and a much shorter 1s range.
            periods = {
                1: {'days': 2},
                60: {'years': 6},
            }
            period_duration: int = periods[timeframe]
-            update_start_on_prepend = True
+            update_start_on_prepend: bool = True
            # NOTE: manually set the "latest" datetime which we intend to
            # backfill history "until" so as to adhere to the history
@ -416,7 +421,6 @@ async def start_backfill(
                f'backfill_until_dt: {backfill_until_dt}\n'
                f'last_start_dt: {last_start_dt}\n'
            )
            try:
                (
                    array,
@ -426,71 +430,114 @@ async def start_backfill(
                    timeframe,
                    end_dt=last_start_dt,
                )
            except NoData as _daterr:
-                # 3 cases:
+                orig_last_start_dt: datetime = last_start_dt
-                # - frame in the middle of a legit venue gap
+                gap_report: str = (
-                # - history actually began at the `last_start_dt`
+                    f'EMPTY FRAME for `end_dt: {last_start_dt}`?\n'
-                # - some other unknown error (ib blocking the
+                    f'{mod.name} -> tf@fqme: {timeframe}@{mkt.fqme}\n'
-                #   history bc they don't want you seeing how they
+                    f'last_start_dt: {orig_last_start_dt}\n\n'
-                #   cucked all the tinas..)
+                    f'bf_until: {backfill_until_dt}\n'
                if dur := frame_types.get(timeframe):
                    # decrement by a frame's worth of duration and
                    # retry a few times.
                    last_start_dt.subtract(
                        seconds=dur.total_seconds()
                )
-                    log.warning(
+                # EMPTY FRAME signal with 3 (likely) causes:
-                        f'{mod.name} -> EMPTY FRAME for end_dt?\n'
+                #
-                        f'tf@fqme: {timeframe}@{mkt.fqme}\n'
+                # 1. range contains legit gap in venue history
-                        'bf_until <- last_start_dt:\n'
+                # 2. history actually (edge case) **began** at the
-                        f'{backfill_until_dt} <- {last_start_dt}\n'
+                #    value `last_start_dt`
-                        f'Decrementing `end_dt` by {dur} and retry..\n'
+                # 3. some other unknown error (ib blocking the
                #    history-query bc they don't want you seeing how
                #    they cucked all the tinas.. like with options
                #    hist)
                #
                if def_frame_duration:
                    # decrement by a duration's (frame) worth of time
                    # as maybe indicated by the backend to see if we
                    # can get older data before this possible
                    # "history gap".
                    last_start_dt: datetime = last_start_dt.subtract(
                        seconds=def_frame_duration.total_seconds()
                    )
                    gap_report += (
                        f'Decrementing `end_dt` and retrying with,\n'
                        f'def_frame_duration: {def_frame_duration}\n'
                        f'(new) last_start_dt: {last_start_dt}\n'
                    )
                    log.warning(gap_report)
                    # skip writing to shm/tsdb and try the next
                    # duration's worth of prior history.
                    continue
-            # broker says there never was or is no more history to pull
+                else:
-            except DataUnavailable:
+                    # await tractor.pause()
-                log.warning(
+                    raise DataUnavailable(gap_report)
                    f'NO-MORE-DATA in range?\n'
                    f'`{mod.name}` halted history:\n'
                    f'tf@fqme: {timeframe}@{mkt.fqme}\n'
                    'bf_until <- last_start_dt:\n'
                    f'{backfill_until_dt} <- {last_start_dt}\n'
                )
-                # ugh, what's a better way?
+            # broker says there never was or is no more history to pull
-                # TODO: fwiw, we probably want a way to signal a throttle
+            except DataUnavailable as due:
-                # condition (eg. with ib) so that we can halt the
+                message: str = due.args[0]
-                # request loop until the condition is resolved?
+                log.warning(
-                if timeframe > 1:
+                    f'Provider {mod.name!r} halted backfill due to,\n\n'
-                    await tractor.pause()
+
                    f'{message}\n'
                    f'fqme: {mkt.fqme}\n'
                    f'timeframe: {timeframe}\n'
                    f'last_start_dt: {last_start_dt}\n'
                    f'bf_until: {backfill_until_dt}\n'
                )
                # UGH: what's a better way?
                # TODO: backends are responsible for being correct on
                # this right!?
                # -[ ] in the `ib` case we could maybe offer some way
                #     to halt the request loop until the condition is
                #     resolved or should the backend be entirely in
                #     charge of solving such faults? yes, right?
                return
            time: np.ndarray = array['time']
            assert (
-                array['time'][0]
+                time[0]
                ==
                next_start_dt.timestamp()
            )
-            diff = last_start_dt - next_start_dt
+            assert time[-1] == next_end_dt.timestamp()
-            frame_time_diff_s = diff.seconds
+
            expected_dur: Interval = last_start_dt - next_start_dt
            # frame's worth of sample-period-steps, in seconds
            frame_size_s: float = len(array) * timeframe
-            expected_frame_size_s: float = frame_size_s + timeframe
+            recv_frame_dur: Duration = (
-            if frame_time_diff_s > expected_frame_size_s:
+                from_timestamp(array[-1]['time'])
-
+                -
                from_timestamp(array[0]['time'])
            )
            if (
                (lt_frame := (recv_frame_dur < expected_dur))
                or
                (null_frame := (frame_size_s == 0))
                # ^XXX, should NEVER hit now!
            ):
                # XXX: query result includes a start point prior to our
                # expected "frame size" and thus is likely some kind of
                # history gap (eg. market closed period, outage, etc.)
                # so just report it to console for now.
                if lt_frame:
                    reason = 'Possible GAP (or first-datum)'
                else:
                    assert null_frame
                    reason = 'NULL-FRAME'
                missing_dur: Interval = expected_dur.end - recv_frame_dur.end
                log.warning(
-                    'GAP DETECTED:\n'
+                    f'{timeframe}s-series {reason} detected!\n'
-                    f'last_start_dt: {last_start_dt}\n'
+                    f'fqme: {mkt.fqme}\n'
-                    f'diff: {diff}\n'
+                    f'last_start_dt: {last_start_dt}\n\n'
-                    f'frame_time_diff_s: {frame_time_diff_s}\n'
+                    f'recv interval: {recv_frame_dur}\n'
                    f'expected interval: {expected_dur}\n\n'
                    f'Missing duration of history of {missing_dur.in_words()!r}\n'
                    f'{missing_dur}\n'
                )
                # await tractor.pause()
            to_push = diff_history(
                array,
@ -565,22 +612,27 @@ async def start_backfill(
            # long-term storage.
            if (
                storage is not None
-                and write_tsdb
+                and
                write_tsdb
            ):
                log.info(
                    f'Writing {ln} frame to storage:\n'
                    f'{next_start_dt} -> {last_start_dt}'
                )
-                # always drop the src asset token for
+                # NOTE, always drop the src asset token for
                # non-currency-pair like market types (for now)
                #
                # THAT IS, for now our table key schema is NOT
                # including the dst[/src] source asset token. SO,
                # 'tsla.nasdaq.ib' over 'tsla/usd.nasdaq.ib' for
                # historical reasons ONLY.
                if mkt.dst.atype not in {
                    'crypto',
                    'crypto_currency',
                    'fiat',  # a "forex pair"
                    'perpetual_future',  # stupid "perps" from cex land
                }:
                    # for now, our table key schema is not including
                    # the dst[/src] source asset token.
                    col_sym_key: str = mkt.get_fqme(
                        delim_char='',
                        without_src=True,
@ -685,7 +737,7 @@ async def back_load_from_tsdb(
        last_tsdb_dt
        and latest_start_dt
    ):
-        backfilled_size_s = (
+        backfilled_size_s: Duration = (
            latest_start_dt - last_tsdb_dt
        ).seconds
        # if the shm buffer len is not large enough to contain
@ -908,6 +960,8 @@ async def tsdb_backfill(
            f'{pformat(config)}\n'
        )
        # concurrently load the provider's most-recent-frame AND any
        # pre-existing tsdb history already saved in `piker` storage.
        dt_eps: list[DateTime, DateTime] = []
        async with trio.open_nursery() as tn:
            tn.start_soon(
@ -918,7 +972,6 @@ async def tsdb_backfill(
                timeframe,
                config,
            )
            tsdb_entry: tuple = await load_tsdb_hist(
                storage,
                mkt,
@ -947,6 +1000,25 @@ async def tsdb_backfill(
                mr_end_dt,
            ) = dt_eps
            first_frame_dur_s: Duration = (mr_end_dt - mr_start_dt).seconds
            calced_frame_size: Duration = mk_duration(
                seconds=first_frame_dur_s,
            )
            # NOTE, attempt to use the backend declared default frame
            # sizing (as allowed by their time-series query APIs) and
            # if not provided try to construct a default from the
            # first frame received above.
            def_frame_durs: dict[
                int,
                Duration,
            ]|None = config.get('frame_types', None)
            if def_frame_durs:
                def_frame_size: Duration = def_frame_durs[timeframe]
                assert def_frame_size == calced_frame_size
            else:
                # use what we calced from first frame above.
                def_frame_size = calced_frame_size
            # NOTE: when there's no offline data, there's 2 cases:
            # - data backend doesn't support timeframe/sample
            #   period (in which case `dt_eps` should be `None` and
@ -977,7 +1049,7 @@ async def tsdb_backfill(
                    partial(
                        start_backfill,
                        get_hist=get_hist,
-                        frame_types=config.get('frame_types', None),
+                        def_frame_duration=def_frame_size,
                        mod=mod,
                        mkt=mkt,
                        shm=shm,
--- a/piker/tsp/_anal.py
+++ b/piker/tsp/_anal.py
@ -616,6 +616,18 @@ def detect_price_gaps(
    # ])
    ...
 # TODO: probably just use the null_segs impl above?
 def detect_vlm_gaps(
    df: pl.DataFrame,
    col: str = 'volume',
 ) -> pl.DataFrame:
    vnull: pl.DataFrame = w_dts.filter(
        pl.col(col) == 0
    )
    return vnull
 def dedupe(
    src_df: pl.DataFrame,
@ -626,7 +638,6 @@ def dedupe(
 ) -> tuple[
    pl.DataFrame,  # with dts
    pl.DataFrame,  # gaps
    pl.DataFrame,  # with deduplicated dts (aka gap/repeat removal)
    int,  # len diff between input and deduped
 ]:
@ -639,19 +650,22 @@ def dedupe(
    '''
    wdts: pl.DataFrame = with_dts(src_df)
-    # maybe sort on any time field
+    deduped = wdts
    if sort:
        wdts = wdts.sort(by='time')
        # TODO: detect out-of-order segments which were corrected!
        # -[ ] report in log msg
        # -[ ] possibly return segment sections which were moved?
    # remove duplicated datetime samples/sections
    deduped: pl.DataFrame = wdts.unique(
-        subset=['dt'],
+        # subset=['dt'],
        subset=['time'],
        maintain_order=True,
    )
    # maybe sort on any time field
    if sort:
        deduped = deduped.sort(by='time')
        # TODO: detect out-of-order segments which were corrected!
        # -[ ] report in log msg
        # -[ ] possibly return segment sections which were moved?
    diff: int = (
        wdts.height
        -
Author	SHA1	Message	Date
Tyler Goodlet	521a194819	`.brokers.cli`: module type and todo for `--pdb` flag to NOT src from sub-cmd	2025-02-21 16:49:13 -05:00
Tyler Goodlet	a64b4272f4	Type loaded backend modules	2025-02-21 16:49:13 -05:00
Tyler Goodlet	b536c093d5	Bump various `.brokers.core` doc string content/style	2025-02-21 16:49:13 -05:00
Tyler Goodlet	c3f3b25524	Teensie `piker.data` styling tweaks - use more compact optional value style with `\|`-union - fix `.flows` typing-only import since we need `MktPair` to be immediately defined for use on a `msgspec.Struct` field. - more "tree-like" warning msg in `.validate()` reporting.	2025-02-21 16:37:42 -05:00
Tyler Goodlet	8dcdf7c9a9	Invert `getattr()` check for `get_mkt_pairs()` ep Such that we `return` early when not defined by the provider backend to reduce an indent level in `SymbologyCache.load()`.	2025-02-21 16:37:42 -05:00
Tyler Goodlet	4d15b9bfdd	Allow ledger passes to ignore (symcache) unknown fqmes For example in the paper-eng, if you have a backend that doesn't fully support a symcache (yet) it's handy to be able to ignore processing other paper-eng txns when all you care about at the moment is the simulated symbol. NOTE, that currently this will still result in a key-error when you load more then one mkt with the paper engine (for which the backend does not have the symcache implemented) since no fqme ad-hoc query was made for the 2nd symbol (and i'm not sure we should support that kinda hackery over just encouraging the sym-cache being added?). Def needs a little more thought depending on how many backends are never going to be able to (easily) support caching..	2025-02-21 16:37:42 -05:00
Gud Boi	5e371f1d73	Merge pull request 'jsonrpc_err_in_rent' (#41 ) from jsonrpc_err_in_rent into gitea_feats Reviewed-on: #41	2025-02-21 21:21:02 +00:00
Gud Boi	6c221bb348	Merge pull request 'tsp_gaps: fixes for fault-less OHLCV time-series loads' (#35 ) from tsp_gaps into gitea_feats Reviewed-on: #35	2025-02-21 20:46:37 +00:00
Tyler Goodlet	e391c896f8	Mk jsronrpc's underlying ws timeout `float('inf')` Since currently we're only using this IPC subsys for `deribit`, and generally speaking we're primarly supporting options markets (which are fairly "slow moving"), flip to a default of NOT resetting the `NoBsWs` on timeout since doing so normally breaks the jsron-rpc IPC session. Without a proper `fixture` passed to `open_autorecon_ws()` (which we should eventually implement!!) relying on a timeout-to-reset more or less will just cause breakage issues - a proper reconnect sequence must be implemented before using that feature. Deats, - expose and proxy through the `msg_recv_timeout` from `open_jsonrpc_session()` into the underlying `open_autorecon_ws()` call.	2025-02-19 17:05:13 -05:00
Tyler Goodlet	5633f5614d	Doc-n-clean `.data._web_bs.open_jsonrpc_session()` Add a doc-string reflecting recent refinements, drop all the old hook params, rename `n: trio.Nursery` -> `tn` for "task nursery" fitting with code base's naming style.	2025-02-19 17:05:13 -05:00
Tyler Goodlet	76735189de	data._web_bs: try to raise jsonrpc errors in parent task	2025-02-19 17:05:13 -05:00
Tyler Goodlet	d49608f74e	Refine history gap/termination signalling Namely handling backends which do not provide a default "frame size-duration" in their init-config by making the backfiller guess the value based on the first frame received. Deats, - adjust `start_backfill()` to take a more explicit `def_frame_duration: Duration` expected to be unpacked from any backend hist init-config by the `tsdb_backfill()` caller which now also computes a value from the first received frame when the config section isn't provided. - in `start_backfill()` we now always expect the `def_frame_duration` input and always decrement the query range by this value whenever a `NoData` is raised by the provider-backend paired with an explicit `log.warning()` about the handling. - also relay any `DataUnavailable.args[0]` message from the provider in the handler. - repair "gap reporting" which checks for expected frame duration vs. that received with much better humanized logging on the missing segment using `pendulum.Interval/Duration.in_words()` output.	2025-02-19 17:01:24 -05:00
Tyler Goodlet	bf0ac93aa3	Only use `frame_types` if delivered during enter The `open_history_client()` provider endpoint can optionally deliver a `frame_types: dict[int, pendulum.Duration]` subsection in its `config: dict[str, dict]` (as was implemented with the `ib` backend). This allows the `tsp` backfilling machinery to use this "recommended frame duration" to subtract from the `last_start_dt` any time a `NoData` gap is signalled by the `get_hist()` call allowing gaps to be ignored safely without missing history by knowing the next earliest dt we can query from using the `end_dt`. However, currently all crypto$ providers haven't implemented this feat yet.. As such only try to use the `frame_types` feature if provided when handling `NoData` conditions inside `tsp.start_backfill()` and otherwise raise as normal.	2025-02-19 17:01:24 -05:00
Tyler Goodlet	d7179d47b0	`.tsp._anal`: add (unused) `detect_vlm_gaps()`	2025-02-19 17:01:24 -05:00
Tyler Goodlet	c390e87536	`.storage.cli`: collect gap-markup-aids into `tf2aids: dict` prior to pause for introspection	2025-02-19 17:01:24 -05:00
Tyler Goodlet	5e4a6d61c7	Ignore any non-`.parquet` files under `.config/piker/nativedb/` subdir	2025-02-19 17:01:24 -05:00
Tyler Goodlet	3caaa30b03	Mask no-data pause, add perps to no-`/src`-in-fqme asset set Was orig for debugging an issue with `kucoin` i think but definitely shouldn't be left in XD Also add `'perpetual_future'` to the `.start_backfill()` input literal set since we don't expect the 'btc/usd.perp.binance' for now.	2025-02-19 17:01:24 -05:00