Add concurrent multi-time-frame history loading

Our default sample periods are 60s (1m) for the history chart and 1s for the fast chart. This patch adds concurrent loading of both (or more) different sample period data sets using the existing loading code but with new support for looping through a passed "timeframe" table which points to each shm instance. More detailed adjustments include: - breaking the "basic" and tsdb loading into 2 new funcs: `basic_backfill()` and `tsdb_backfill()` the latter of which is run when the tsdb daemon is discovered. - adjust the fast shm buffer to offset with one day's worth of 1s so that only up to a day is backfilled as history in the fast chart. - adjust bus task starting in `manage_history()` to deliver back the offset indices for both fast and slow shms and set them on the `Feed` object as `.izero_hist/rt: int` values: - allows the chart-UI linked view region handlers to use the offsets in the view-linking-transform math to index-align the history and fast chart.
2022-10-20 21:19:26 -04:00 · 2022-10-20 21:19:26 -04:00 · 956c7d3435
parent 330d16262e
commit 956c7d3435
1 changed files with 286 additions and 195 deletions
--- a/piker/data/feed.py
+++ b/piker/data/feed.py
@ -21,7 +21,7 @@ This module is enabled for ``brokerd`` daemons.

 """
 from __future__ import annotations
-from contextlib import asynccontextmanager
+from contextlib import asynccontextmanager as acm
 from dataclasses import (
    dataclass,
    field,
@ -287,7 +287,7 @@ async def start_backfill(
            - pendulum.from_timestamp(times[-2])
        ).seconds

-        # "frame"'s worth of sample period steps in seconds
+        # frame's worth of sample-period-steps, in seconds
        frame_size_s = len(array) * step_size_s

        to_push = diff_history(
@ -298,7 +298,7 @@ async def start_backfill(
        )

        log.info(f'Pushing {to_push.size} to shm!')
-        shm.push(to_push)
+        shm.push(to_push, prepend=True)

        # TODO: *** THIS IS A BUG ***
        # we need to only broadcast to subscribers for this fqsn..
@ -310,7 +310,11 @@ async def start_backfill(
        bf_done = trio.Event()

        # let caller unblock and deliver latest history frame
-        task_status.started((start_dt, end_dt, bf_done))
+        task_status.started((
+            start_dt,
+            end_dt,
+            bf_done,
+        ))

        # based on the sample step size, maybe load a certain amount history
        if last_tsdb_dt is None:
@ -325,14 +329,14 @@ async def start_backfill(
            # when no tsdb "last datum" is provided, we just load
            # some near-term history.
            periods = {
-                1: {'seconds': 4000},
+                1: {'days': 1},
                60: {'days': 14},
            }

            if tsdb_is_up:
                # do a decently sized backfill and load it into storage.
                periods = {
-                    1: {'days': 6},
+                    1: {'days': 1},
                    60: {'years': 6},
                }

@ -461,6 +465,186 @@ async def start_backfill(
        return


+async def basic_backfill(
+    bus: _FeedsBus,
+    mod: ModuleType,
+    bfqsn: str,
+    shms: dict[int, ShmArray],
+
+) -> None:
+
+    # do a legacy incremental backfill from the provider.
+    log.info('No TSDB (marketstored) found, doing basic backfill..')
+
+    # start history backfill task ``backfill_bars()`` is
+    # a required backend func this must block until shm is
+    # filled with first set of ohlc bars
+    for timeframe, shm in shms.items():
+        await bus.nursery.start(
+            partial(
+                start_backfill,
+                mod,
+                bfqsn,
+                shm,
+                timeframe=timeframe,
+            )
+        )
+
+
+async def tsdb_backfill(
+    mod: ModuleType,
+    marketstore: ModuleType,
+    bus: _FeedsBus,
+    storage: Storage,
+    fqsn: str,
+    bfqsn: str,
+    shms: dict[int, ShmArray],
+
+    # some_data_ready: trio.Event,
+    task_status: TaskStatus[
+        tuple[ShmArray, ShmArray]
+    ] = trio.TASK_STATUS_IGNORED,
+
+) -> None:
+
+    # TODO: this should be used verbatim for the pure
+    # shm backfiller approach below.
+    dts_per_tf: dict[int, datetime] = {}
+
+    # start history anal and load missing new data via backend.
+    for timeframe, shm in shms.items():
+        series, _, last_tsdb_dt = await storage.load(
+            fqsn,
+            timeframe=timeframe,
+        )
+
+        broker, symbol, expiry = unpack_fqsn(fqsn)
+        (
+            latest_start_dt,
+            latest_end_dt,
+            bf_done,
+        ) = await bus.nursery.start(
+            partial(
+                start_backfill,
+                mod,
+                bfqsn,
+                shm,
+                timeframe=timeframe,
+                last_tsdb_dt=last_tsdb_dt,
+                tsdb_is_up=True,
+                storage=storage,
+            )
+        )
+        dts_per_tf[timeframe] = (
+            series.get(timeframe),
+            last_tsdb_dt,
+            latest_start_dt,
+            latest_end_dt,
+        )
+
+        # if len(hist_shm.array) < 2:
+        # TODO: there's an edge case here to solve where if the last
+        # frame before market close (at least on ib) was pushed and
+        # there was only "1 new" row pushed from the first backfill
+        # query-iteration, then the sample step sizing calcs will
+        # break upstream from here since you can't diff on at least
+        # 2 steps... probably should also add logic to compute from
+        # the tsdb series and stash that somewhere as meta data on
+        # the shm buffer?.. no se.
+
+    # unblock the feed bus management task
+    assert len(shms[1].array)
+    task_status.started((
+        shms[60],
+        shms[1],
+    ))
+
+    # sync to backend history task's query/load completion
+    await bf_done.wait()
+
+    for timeframe, shm in shms.items():
+        (
+            tsdb_history,
+            last_tsdb_dt,
+            latest_start_dt,
+            latest_end_dt,
+        ) = dts_per_tf[timeframe]
+
+        # do diff against last start frame of history and only fill
+        # in from the tsdb an allotment that allows for most recent
+        # to be loaded into mem *before* tsdb data.
+        if last_tsdb_dt:
+            dt_diff_s = (
+                latest_start_dt - last_tsdb_dt
+            ).seconds
+        else:
+            dt_diff_s = 0
+
+        # TODO: see if there's faster multi-field reads:
+        # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields
+        # re-index  with a `time` and index field
+        prepend_start = shm._first.value
+
+        # sanity check on most-recent-data loading
+        assert prepend_start > dt_diff_s
+
+        if tsdb_history and len(tsdb_history):
+            to_push = tsdb_history[:prepend_start]
+
+            shm.push(
+                to_push,
+
+                # insert the history pre a "days worth" of samples
+                # to leave some real-time buffer space at the end.
+                prepend=True,
+                # update_first=False,
+                # start=prepend_start,
+                field_map=marketstore.ohlc_key_map,
+            )
+
+        # load as much from storage into shm as space will
+        # allow according to user's shm size settings.
+        end = tsdb_history['Epoch'][0]
+
+        while shm._first.value > 0:
+            series = await storage.read_ohlcv(
+                fqsn,
+                end=end,
+                timeframe=timeframe,
+            )
+            prepend_start -= len(to_push)
+            to_push = tsdb_history[:prepend_start]
+
+            shm.push(
+                to_push,
+
+                # insert the history pre a "days worth" of samples
+                # to leave some real-time buffer space at the end.
+                prepend=True,
+                # update_first=False,
+                # start=prepend_start,
+                field_map=marketstore.ohlc_key_map,
+            )
+
+            # manually trigger step update to update charts/fsps
+            # which need an incremental update.
+            # NOTE: the way this works is super duper
+            # un-intuitive right now:
+            # - the broadcaster fires a msg to the fsp subsystem.
+            # - fsp subsys then checks for a sample step diff and
+            #   possibly recomputes prepended history.
+            # - the fsp then sends back to the parent actor
+            #   (usually a chart showing graphics for said fsp)
+            #   which tells the chart to conduct a manual full
+            #   graphics loop cycle.
+            for delay_s in sampler.subscribers:
+                await broadcast(delay_s)
+
+        log.info(f'Loaded {to_push.shape} datums from storage')
+
+        # TODO: write new data to tsdb to be ready to for next read.
+
+
 async def manage_history(
    mod: ModuleType,
    bus: _FeedsBus,
@ -469,7 +653,9 @@ async def manage_history(
    feed_is_live: trio.Event,
    timeframe: float = 60,  # in seconds

-    task_status: TaskStatus = trio.TASK_STATUS_IGNORED,
+    task_status: TaskStatus[
+        tuple[ShmArray, ShmArray]
+    ] = trio.TASK_STATUS_IGNORED,

 ) -> None:
    '''
@ -490,6 +676,8 @@ async def manage_history(
        # we expect the sub-actor to write
        readonly=False,
    )
+    hist_zero_index = hist_shm.index - 1
+
    # TODO: history validation
    if not opened:
        raise RuntimeError(
@ -506,184 +694,93 @@ async def manage_history(
        readonly=False,
        size=3*_secs_in_day,
    )
+
+    # (for now) set the rt (hft) shm array with space to prepend
+    # only a days worth of 1s history.
+    days = 1
+    rt_shm._first.value = days*_secs_in_day
+    rt_shm._last.value = days*_secs_in_day
+    rt_zero_index = rt_shm.index - 1
+
    if not opened:
        raise RuntimeError(
            "Persistent shm for sym was already open?!"
        )

    log.info('Scanning for existing `marketstored`')
-
-    is_up = await check_for_service('marketstored')
-
-    # for now only do backfilling if no tsdb can be found
-    do_legacy_backfill = not is_up and opened
+    tsdb_is_up = await check_for_service('marketstored')

    bfqsn = fqsn.replace('.' + mod.name, '')
    open_history_client = getattr(mod, 'open_history_client', None)
    assert open_history_client

-    if is_up and opened and open_history_client:
-
+    if (
+        tsdb_is_up
+        and opened
+        and open_history_client
+    ):
        log.info('Found existing `marketstored`')
+
        from . import marketstore
-        async with marketstore.open_storage_client(
-            fqsn,
-        ) as storage:
-
-            # TODO: this should be used verbatim for the pure
-            # shm backfiller approach below.
-
-            # start history anal and load missing new data via backend.
-            series, _, last_tsdb_dt = await storage.load(
+        async with (
+            marketstore.open_storage_client(fqsn)as storage,
+        ):
+            hist_shm, rt_shm = await bus.nursery.start(
+                tsdb_backfill,
+                mod,
+                marketstore,
+                bus,
+                storage,
                fqsn,
-                timeframe=timeframe,
+                bfqsn,
+                {
+                    1: rt_shm,
+                    60: hist_shm,
+                },
+                # some_data_ready=some_data_ready,
+                # task_status=task_status,
            )

-            broker, symbol, expiry = unpack_fqsn(fqsn)
-            (
-                latest_start_dt,
-                latest_end_dt,
-                bf_done,
-            ) = await bus.nursery.start(
-                partial(
-                    start_backfill,
-                    mod,
-                    bfqsn,
-                    hist_shm,
-                    timeframe=timeframe,
-                    last_tsdb_dt=last_tsdb_dt,
-                    tsdb_is_up=True,
-                    storage=storage,
-                )
-            )
+            # yield back after client connect with filled shm
+            task_status.started((
+                hist_zero_index,
+                hist_shm,
+                rt_zero_index,
+                rt_shm,
+            ))

-            # if len(hist_shm.array) < 2:
-            # TODO: there's an edge case here to solve where if the last
-            # frame before market close (at least on ib) was pushed and
-            # there was only "1 new" row pushed from the first backfill
-            # query-iteration, then the sample step sizing calcs will
-            # break upstream from here since you can't diff on at least
-            # 2 steps... probably should also add logic to compute from
-            # the tsdb series and stash that somewhere as meta data on
-            # the shm buffer?.. no se.
-
-            task_status.started((hist_shm, rt_shm))
+            # indicate to caller that feed can be delivered to
+            # remote requesting client since we've loaded history
+            # data that can be used.
            some_data_ready.set()

-            await bf_done.wait()
-            # do diff against last start frame of history and only fill
-            # in from the tsdb an allotment that allows for most recent
-            # to be loaded into mem *before* tsdb data.
-            if last_tsdb_dt:
-                dt_diff_s = (
-                    latest_start_dt - last_tsdb_dt
-                ).seconds
-            else:
-                dt_diff_s = 0
+            # history retreival loop depending on user interaction and thus
+            # a small RPC-prot for remotely controllinlg what data is loaded
+            # for viewing.
+            await trio.sleep_forever()

-            # TODO: see if there's faster multi-field reads:
-            # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields
-            # re-index  with a `time` and index field
-            prepend_start = hist_shm._first.value
-
-            # sanity check on most-recent-data loading
-            assert prepend_start > dt_diff_s
-
-            history = list(series.values())
-            if history:
-                fastest = history[0]
-                to_push = fastest[:prepend_start]
-
-                hist_shm.push(
-                    to_push,
-
-                    # insert the history pre a "days worth" of samples
-                    # to leave some real-time buffer space at the end.
-                    prepend=True,
-                    # update_first=False,
-                    # start=prepend_start,
-                    field_map=marketstore.ohlc_key_map,
-                )
-
-                # load as much from storage into shm as space will
-                # allow according to user's shm size settings.
-                count = 0
-                end = fastest['Epoch'][0]
-
-                while hist_shm._first.value > 0:
-                    count += 1
-                    series = await storage.read_ohlcv(
-                        fqsn,
-                        end=end,
-                        timeframe=timeframe,
-                    )
-                    history = list(series.values())
-                    fastest = history[0]
-                    end = fastest['Epoch'][0]
-                    prepend_start -= len(to_push)
-                    to_push = fastest[:prepend_start]
-
-                    hist_shm.push(
-                        to_push,
-
-                        # insert the history pre a "days worth" of samples
-                        # to leave some real-time buffer space at the end.
-                        prepend=True,
-                        # update_first=False,
-                        # start=prepend_start,
-                        field_map=marketstore.ohlc_key_map,
-                    )
-
-                    # manually trigger step update to update charts/fsps
-                    # which need an incremental update.
-                    # NOTE: the way this works is super duper
-                    # un-intuitive right now:
-                    # - the broadcaster fires a msg to the fsp subsystem.
-                    # - fsp subsys then checks for a sample step diff and
-                    #   possibly recomputes prepended history.
-                    # - the fsp then sends back to the parent actor
-                    #   (usually a chart showing graphics for said fsp)
-                    #   which tells the chart to conduct a manual full
-                    #   graphics loop cycle.
-                    for delay_s in sampler.subscribers:
-                        await broadcast(delay_s)
-
-                    if count > 6:
-                        break
-
-                log.info(f'Loaded {to_push.shape} datums from storage')
-
-                # TODO: write new data to tsdb to be ready to for next read.
-
-    if do_legacy_backfill:
-        # do a legacy incremental backfill from the provider.
-        log.info('No existing `marketstored` found..')
-
-        # start history backfill task ``backfill_bars()`` is
-        # a required backend func this must block until shm is
-        # filled with first set of ohlc bars
-        await bus.nursery.start(
-            partial(
-                start_backfill,
-                mod,
-                bfqsn,
-                hist_shm,
-                timeframe=timeframe,
-            )
+    # load less history if no tsdb can be found
+    elif (
+        not tsdb_is_up
+        and opened
+    ):
+        await basic_backfill(
+            bus,
+            mod,
+            bfqsn,
+            shms={
+                1: rt_shm,
+                60: hist_shm,
+            },
        )
-
-        # yield back after client connect with filled shm
-        task_status.started((hist_shm, rt_shm))
-
-        # indicate to caller that feed can be delivered to
-        # remote requesting client since we've loaded history
-        # data that can be used.
+        task_status.started((
+            hist_zero_index,
+            hist_shm,
+            rt_zero_index,
+            rt_shm,
+        ))
        some_data_ready.set()
-
-    # history retreival loop depending on user interaction and thus
-    # a small RPC-prot for remotely controllinlg what data is loaded
-    # for viewing.
-    await trio.sleep_forever()
+        await trio.sleep_forever()


 async def allocate_persistent_feed(
@ -750,7 +847,12 @@ async def allocate_persistent_feed(
    # https://github.com/python-trio/trio/issues/2258
    # bus.nursery.start_soon(
    # await bus.start_task(
-    hist_shm, rt_shm = await bus.nursery.start(
+    (
+        izero_hist,
+        hist_shm,
+        izero_rt,
+        rt_shm,
+    ) = await bus.nursery.start(
        manage_history,
        mod,
        bus,
@ -764,7 +866,9 @@ async def allocate_persistent_feed(
    # this task.
    msg = init_msg[symbol]
    msg['hist_shm_token'] = hist_shm.token
-    msg['startup_hist_index'] = hist_shm.index - 1
+    # msg['startup_hist_index'] = hist_shm.index - 1
+    msg['izero_hist'] = izero_hist
+    msg['izero_rt'] = izero_rt
    msg['rt_shm_token'] = rt_shm.token

    # true fqsn
@ -794,31 +898,19 @@ async def allocate_persistent_feed(
        fqsn: first_quote,
    }

+    # for ambiguous names we simply apply the retreived
+    # feed to that name (for now).
    bus.feeds[symbol] = bus.feeds[bfqsn] = (
        init_msg,
        generic_first_quotes,
    )
-    # for ambiguous names we simply apply the retreived
-    # feed to that name (for now).

+    # insert 1s ohlc into the increment buffer set
+    # to update and shift every second
    sampler.ohlcv_shms.setdefault(
        1,
        []
    ).append(rt_shm)
-    ohlckeys = ['open', 'high', 'low', 'close']
-
-    # set the rt (hft) shm array as append only
-    # (for now).
-    rt_shm._first.value = 0
-    rt_shm._last.value = 0
-
-    # push last sample from history to rt buffer just as a filler datum
-    # but we don't want a history sized datum outlier so set vlm to zero
-    # and ohlc to the close value.
-    rt_shm.push(hist_shm.array[-2:-1])
-
-    rt_shm.array[ohlckeys] = hist_shm.array['close'][-1]
-    rt_shm._array['volume'] = 0

    task_status.started()

@ -829,16 +921,12 @@ async def allocate_persistent_feed(
    # the backend will indicate when real-time quotes have begun.
    await feed_is_live.wait()

-    # start shm incrementer task for OHLC style sampling
-    # at the current detected step period.
-    times = hist_shm.array['time']
-    delay_s = times[-1] - times[times != times[-1]][-1]
-    sampler.ohlcv_shms.setdefault(delay_s, []).append(hist_shm)
+    # insert 1m ohlc into the increment buffer set
+    # to shift every 60s.
+    sampler.ohlcv_shms.setdefault(60, []).append(hist_shm)

    # create buffer a single incrementer task broker backend
    # (aka `brokerd`) using the lowest sampler period.
-    # await tractor.breakpoint()
-    # for delay_s in sampler.ohlcv_shms:
    if sampler.incrementers.get(_default_delay_s) is None:
        await bus.start_task(
            increment_ohlc_buffer,
@ -849,7 +937,8 @@ async def allocate_persistent_feed(
        'shm_write_opts', {}
    ).get('sum_tick_vlm', True)

-    # start sample loop
+    # start sample loop and shm incrementer task for OHLC style sampling
+    # at the above registered step periods.
    try:
        await sample_and_broadcast(
            bus,
@ -1037,7 +1126,8 @@ class Feed:
    stream: trio.abc.ReceiveChannel[dict[str, Any]]
    status: dict[str, Any]

-    startup_hist_index: int = 0
+    izero_hist: int = 0
+    izero_rt: int = 0

    throttle_rate: Optional[int] = None

@ -1055,7 +1145,7 @@ class Feed:
    async def receive(self) -> dict:
        return await self.stream.receive()

-    @asynccontextmanager
+    @acm
    async def index_stream(
        self,
        delay_s: int = 1,
@ -1116,7 +1206,7 @@ class Feed:
        )


-@asynccontextmanager
+@acm
 async def install_brokerd_search(

    portal: tractor.Portal,
@ -1150,7 +1240,7 @@ async def install_brokerd_search(
                yield


-@asynccontextmanager
+@acm
 async def open_feed(

    fqsns: list[str],
@ -1226,7 +1316,8 @@ async def open_feed(
            stream=stream,
            _portal=portal,
            status={},
-            startup_hist_index=init['startup_hist_index'],
+            izero_hist=init['izero_hist'],
+            izero_rt=init['izero_rt'],
            throttle_rate=tick_throttle,
        )

@ -1278,7 +1369,7 @@ async def open_feed(
            await ctx.cancel()


-@asynccontextmanager
+@acm
 async def maybe_open_feed(

    fqsns: list[str],