Start `piker.storage` subsys: cross-(ts)db middlewares

The plan is to offer multiple tsdb and other storage backends (for a variety of use cases) and expose them similarly to how we do for broker and data providers B)
2023-03-09 15:30:18 -05:00 · 2023-03-09 15:30:18 -05:00 · 29211b200d
parent ae8358a5e7
commit 29211b200d
3 changed files with 446 additions and 386 deletions
--- a/piker/data/feed.py
+++ b/piker/data/feed.py
@ -718,7 +718,7 @@ async def install_brokerd_search(

    async with portal.open_context(
        brokermod.open_symbol_search
-    ) as (ctx, cache):
+    ) as (ctx, _):

        # shield here since we expect the search rpc to be
        # cancellable by the user as they see fit.
--- a/piker/service/marketstore.py
+++ b/piker/service/marketstore.py
@ -1,5 +1,5 @@
 # piker: trading gear for hackers
-# Copyright (C) Tyler Goodlet (in stewardship for piker0)
+# Copyright (C) Tyler Goodlet (in stewardship for pikers)

 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
@ -25,11 +25,9 @@
 '''
 from __future__ import annotations
 from contextlib import asynccontextmanager as acm
-from datetime import datetime
 from pprint import pformat
 from typing import (
    Any,
-    Union,
    TYPE_CHECKING,
 )
 import time
@ -37,31 +35,34 @@ from math import isnan
 from pathlib import Path

 from bidict import bidict
-from msgspec.msgpack import encode, decode
+from msgspec.msgpack import (
+    encode,
+    decode,
+)
 # import pyqtgraph as pg
 import numpy as np
 import tractor
 from trio_websocket import open_websocket_url
-from anyio_marketstore import (
+from anyio_marketstore import (  # noqa
    open_marketstore_client,
    MarketstoreClient,
    Params,
 )
 import pendulum
-import purerpc
+# TODO: import this for specific error set expected by mkts client
+# import purerpc
+
+from ..data.feed import maybe_open_feed
+from . import Services
+from ._util import (
+    log,  # sub-sys logger
+    get_console_log,
+)

 if TYPE_CHECKING:
    import docker
    from ._ahab import DockerContainer

-from ._util import (
-    log,  # sub-sys logger
-    get_console_log,
-)
-from . import Services
-from ..data.feed import maybe_open_feed
-from .._profile import Profiler
-from .. import config


 # ahabd-supervisor and container level config
@ -432,375 +433,6 @@ tf_in_1s = bidict({
 })


-class Storage:
-    '''
-    High level storage api for both real-time and historical ingest.
-
-    '''
-    def __init__(
-        self,
-        client: MarketstoreClient,
-
-    ) -> None:
-        # TODO: eventually this should be an api/interface type that
-        # ensures we can support multiple tsdb backends.
-        self.client = client
-
-        # series' cache from tsdb reads
-        self._arrays: dict[str, np.ndarray] = {}
-
-    async def list_keys(self) -> list[str]:
-        return await self.client.list_symbols()
-
-    async def search_keys(self, pattern: str) -> list[str]:
-        '''
-        Search for time series key in the storage backend.
-
-        '''
-        ...
-
-    async def write_ticks(self, ticks: list) -> None:
-        ...
-
-    async def load(
-        self,
-        fqme: str,
-        timeframe: int,
-
-    ) -> tuple[
-        np.ndarray,  # timeframe sampled array-series
-        datetime | None,  # first dt
-        datetime | None,  # last dt
-    ]:
-
-        first_tsdb_dt, last_tsdb_dt = None, None
-        hist = await self.read_ohlcv(
-            fqme,
-            # on first load we don't need to pull the max
-            # history per request size worth.
-            limit=3000,
-            timeframe=timeframe,
-        )
-        log.info(f'Loaded tsdb history {hist}')
-
-        if len(hist):
-            times = hist['Epoch']
-            first, last = times[0], times[-1]
-            first_tsdb_dt, last_tsdb_dt = map(
-                pendulum.from_timestamp, [first, last]
-            )
-
-        return (
-            hist,  # array-data
-            first_tsdb_dt,  # start of query-frame
-            last_tsdb_dt,  # most recent
-        )
-
-    async def read_ohlcv(
-        self,
-        fqme: str,
-        timeframe: int | str,
-        end: int | None = None,
-        limit: int = int(800e3),
-
-    ) -> np.ndarray:
-
-        client = self.client
-        syms = await client.list_symbols()
-
-        if fqme not in syms:
-            return {}
-
-        # use the provided timeframe or 1s by default
-        tfstr = tf_in_1s.get(timeframe, tf_in_1s[1])
-
-        params = Params(
-            symbols=fqme,
-            timeframe=tfstr,
-            attrgroup='OHLCV',
-            end=end,
-            # limit_from_start=True,
-
-            # TODO: figure the max limit here given the
-            # ``purepc`` msg size limit of purerpc: 33554432
-            limit=limit,
-        )
-
-        for i in range(3):
-            try:
-                result = await client.query(params)
-                break
-            except purerpc.grpclib.exceptions.UnknownError as err:
-                if 'snappy' in err.args:
-                    await tractor.breakpoint()
-
-                # indicate there is no history for this timeframe
-                log.exception(
-                    f'Unknown mkts QUERY error: {params}\n'
-                    f'{err.args}'
-                )
-        else:
-            return {}
-
-        # TODO: it turns out column access on recarrays is actually slower:
-        # https://jakevdp.github.io/PythonDataScienceHandbook/02.09-structured-data-numpy.html#RecordArrays:-Structured-Arrays-with-a-Twist
-        # it might make sense to make these structured arrays?
-        data_set = result.by_symbols()[fqme]
-        array = data_set.array
-
-        # XXX: ensure sample rate is as expected
-        time = data_set.array['Epoch']
-        if len(time) > 1:
-            time_step = time[-1] - time[-2]
-            ts = tf_in_1s.inverse[data_set.timeframe]
-
-            if time_step != ts:
-                log.warning(
-                    f'MKTS BUG: wrong timeframe loaded: {time_step}'
-                    'YOUR DATABASE LIKELY CONTAINS BAD DATA FROM AN OLD BUG'
-                    f'WIPING HISTORY FOR {ts}s'
-                )
-                await self.delete_ts(fqme, timeframe)
-
-                # try reading again..
-                return await self.read_ohlcv(
-                    fqme,
-                    timeframe,
-                    end,
-                    limit,
-                )
-
-        return array
-
-    async def delete_ts(
-        self,
-        key: str,
-        timeframe: Union[int, str | None] = None,
-        fmt: str = 'OHLCV',
-
-    ) -> bool:
-
-        client = self.client
-        syms = await client.list_symbols()
-        if key not in syms:
-            await tractor.breakpoint()
-            raise KeyError(f'`{key}` table key not found in\n{syms}?')
-
-        tbk = mk_tbk((
-            key,
-            tf_in_1s.get(timeframe, tf_in_1s[60]),
-            fmt,
-        ))
-        return await client.destroy(tbk=tbk)
-
-    async def write_ohlcv(
-        self,
-        fqme: str,
-        ohlcv: np.ndarray,
-        timeframe: int,
-        append_and_duplicate: bool = True,
-        limit: int = int(800e3),
-
-    ) -> None:
-        # build mkts schema compat array for writing
-        mkts_dt = np.dtype(_ohlcv_dt)
-        mkts_array = np.zeros(
-            len(ohlcv),
-            dtype=mkts_dt,
-        )
-        # copy from shm array (yes it's this easy):
-        # https://numpy.org/doc/stable/user/basics.rec.html#assignment-from-other-structured-arrays
-        mkts_array[:] = ohlcv[[
-            'time',
-            'open',
-            'high',
-            'low',
-            'close',
-            'volume',
-        ]]
-
-        m, r = divmod(len(mkts_array), limit)
-
-        tfkey = tf_in_1s[timeframe]
-        for i in range(m, 1):
-            to_push = mkts_array[i-1:i*limit]
-
-            # write to db
-            resp = await self.client.write(
-                to_push,
-                tbk=f'{fqme}/{tfkey}/OHLCV',
-
-                # NOTE: will will append duplicates
-                # for the same timestamp-index.
-                # TODO: pre-deduplicate?
-                isvariablelength=append_and_duplicate,
-            )
-
-            log.info(
-                f'Wrote {mkts_array.size} datums to tsdb\n'
-            )
-
-            for resp in resp.responses:
-                err = resp.error
-                if err:
-                    raise MarketStoreError(err)
-
-        if r:
-            to_push = mkts_array[m*limit:]
-
-            # write to db
-            resp = await self.client.write(
-                to_push,
-                tbk=f'{fqme}/{tfkey}/OHLCV',
-
-                # NOTE: will will append duplicates
-                # for the same timestamp-index.
-                # TODO: pre deduplicate?
-                isvariablelength=append_and_duplicate,
-            )
-
-            log.info(
-                f'Wrote {mkts_array.size} datums to tsdb\n'
-            )
-
-            for resp in resp.responses:
-                err = resp.error
-                if err:
-                    raise MarketStoreError(err)
-
-    # XXX: currently the only way to do this is through the CLI:
-
-    # sudo ./marketstore connect --dir ~/.config/piker/data
-    # >> \show mnq.globex.20220617.ib/1Sec/OHLCV 2022-05-15
-    # and this seems to block and use up mem..
-    # >> \trim mnq.globex.20220617.ib/1Sec/OHLCV 2022-05-15
-
-    # relevant source code for this is here:
-    # https://github.com/alpacahq/marketstore/blob/master/cmd/connect/session/trim.go#L14
-    # def delete_range(self, start_dt, end_dt) -> None:
-    #     ...
-
-
-@acm
-async def open_storage_client(
-    host: str,
-    grpc_port: int,
-
-) -> tuple[Storage, dict[str, np.ndarray]]:
-    '''
-    Load a series by key and deliver in ``numpy`` struct array format.
-
-    '''
-    async with (
-        # eventually a storage backend endpoint
-        get_client(
-            host=host,
-            port=grpc_port,
-        ) as client,
-    ):
-        # slap on our wrapper api
-        yield Storage(client)
-
-
-@acm
-async def open_tsdb_client(
-    fqme: str,
-) -> Storage:
-
-    # TODO: real-time dedicated task for ensuring
-    # history consistency between the tsdb, shm and real-time feed..
-
-    # update sequence design notes:
-
-    # - load existing highest frequency data from mkts
-    #   * how do we want to offer this to the UI?
-    #    - lazy loading?
-    #    - try to load it all and expect graphics caching/diffing
-    #      to  hide extra bits that aren't in view?
-
-    # - compute the diff between latest data from broker and shm
-    #   * use sql api in mkts to determine where the backend should
-    #     start querying for data?
-    #   * append any diff with new shm length
-    #   * determine missing (gapped) history by scanning
-    #   * how far back do we look?
-
-    # - begin rt update ingest and aggregation
-    #   * could start by always writing ticks to mkts instead of
-    #     worrying about a shm queue for now.
-    #   * we have a short list of shm queues worth groking:
-    #     - https://github.com/pikers/piker/issues/107
-    #   * the original data feed arch blurb:
-    #     - https://github.com/pikers/piker/issues/98
-    #
-    profiler = Profiler(
-        disabled=True,  # not pg_profile_enabled(),
-        delayed=False,
-    )
-
-    # load any user service settings for connecting to
-    rootconf, path = config.load(
-        'conf',
-        touch_if_dne=True,
-    )
-    tsdbconf = rootconf['network'].get('tsdb')
-    # backend = tsdbconf.pop('backend')
-    async with (
-        open_storage_client(
-            **tsdbconf,
-        ) as storage,
-
-        maybe_open_feed(
-            [fqme],
-            start_stream=False,
-
-        ) as feed,
-    ):
-        profiler(f'opened feed for {fqme}')
-
-        # to_append = feed.hist_shm.array
-        # to_prepend = None
-
-        if fqme:
-            flume = feed.flumes[fqme]
-            symbol = flume.mkt
-            if symbol:
-                fqme = symbol.fqme
-
-            # diff db history with shm and only write the missing portions
-            # ohlcv = flume.hist_shm.array
-
-            # TODO: use pg profiler
-            # for secs in (1, 60):
-            #     tsdb_array = await storage.read_ohlcv(
-            #         fqme,
-            #         timeframe=timeframe,
-            #     )
-            #     # hist diffing:
-            #     # these aren't currently used but can be referenced from
-            #     # within the embedded ipython shell below.
-            #     to_append = ohlcv[ohlcv['time'] > ts['Epoch'][-1]]
-            #     to_prepend = ohlcv[ohlcv['time'] < ts['Epoch'][0]]
-
-            # profiler('Finished db arrays diffs')
-
-            _ = await storage.client.list_symbols()
-            # log.info(f'Existing tsdb symbol set:\n{pformat(syms)}')
-            # profiler(f'listed symbols {syms}')
-            yield storage
-
-        # for array in [to_append, to_prepend]:
-        #     if array is None:
-        #         continue
-
-        #     log.info(
-        #         f'Writing datums {array.size} -> to tsdb from shm\n'
-        #     )
-        #     await storage.write_ohlcv(fqme, array)
-
-        # profiler('Finished db writes')
-
-
 async def ingest_quote_stream(
    symbols: list[str],
    brokername: str,
@ -963,5 +595,3 @@ async def stream_quotes(

            if quotes:
                yield quotes
-
-
--- a/piker/storage/init.py
+++ b/piker/storage/init.py
@ -0,0 +1,430 @@
+# piker: trading gear for hackers
+# Copyright (C) Tyler Goodlet (in stewardship for pikers)
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+'''
+(time-series) database middle ware layer.
+
+- APIs for read, write, delete, replicate over multiple
+  db systems.
+- backend agnostic tick msg ingest machinery.
+- broadcast systems for fan out of real-time ingested
+  data to live consumers.
+- test harness utilities for data-processing verification.
+
+'''
+from __future__ import annotations
+from contextlib import asynccontextmanager as acm
+from datetime import datetime
+# from pprint import pformat
+from typing import (
+    Union,
+)
+
+import tractor
+import numpy as np
+from anyio_marketstore import (
+    Params,
+)
+import pendulum
+import purerpc
+
+from . import config
+from ..service.marketstore import (
+    MarketstoreClient,
+    tf_in_1s,
+    mk_tbk,
+    _ohlcv_dt,
+    MarketStoreError,
+)
+from ..data.feed import maybe_open_feed
+from ..log import get_logger
+from .._profile import Profiler
+
+
+log = get_logger(__name__)
+
+
+class Storage:
+    '''
+    High level storage api for both real-time and historical ingest.
+
+    '''
+    def __init__(
+        self,
+        client: MarketstoreClient,
+
+    ) -> None:
+        # TODO: eventually this should be an api/interface type that
+        # ensures we can support multiple tsdb backends.
+        self.client = client
+
+        # series' cache from tsdb reads
+        self._arrays: dict[str, np.ndarray] = {}
+
+    async def list_keys(self) -> list[str]:
+        return await self.client.list_symbols()
+
+    async def search_keys(self, pattern: str) -> list[str]:
+        '''
+        Search for time series key in the storage backend.
+
+        '''
+        ...
+
+    async def write_ticks(self, ticks: list) -> None:
+        ...
+
+    async def load(
+        self,
+        fqme: str,
+        timeframe: int,
+
+    ) -> tuple[
+        np.ndarray,  # timeframe sampled array-series
+        datetime | None,  # first dt
+        datetime | None,  # last dt
+    ]:
+
+        first_tsdb_dt, last_tsdb_dt = None, None
+        hist = await self.read_ohlcv(
+            fqme,
+            # on first load we don't need to pull the max
+            # history per request size worth.
+            limit=3000,
+            timeframe=timeframe,
+        )
+        log.info(f'Loaded tsdb history {hist}')
+
+        if len(hist):
+            times = hist['Epoch']
+            first, last = times[0], times[-1]
+            first_tsdb_dt, last_tsdb_dt = map(
+                pendulum.from_timestamp, [first, last]
+            )
+
+        return (
+            hist,  # array-data
+            first_tsdb_dt,  # start of query-frame
+            last_tsdb_dt,  # most recent
+        )
+
+    async def read_ohlcv(
+        self,
+        fqme: str,
+        timeframe: int | str,
+        end: int | None = None,
+        limit: int = int(800e3),
+
+    ) -> np.ndarray:
+
+        client = self.client
+        syms = await client.list_symbols()
+
+        if fqme not in syms:
+            return {}
+
+        # use the provided timeframe or 1s by default
+        tfstr = tf_in_1s.get(timeframe, tf_in_1s[1])
+
+        params = Params(
+            symbols=fqme,
+            timeframe=tfstr,
+            attrgroup='OHLCV',
+            end=end,
+            # limit_from_start=True,
+
+            # TODO: figure the max limit here given the
+            # ``purepc`` msg size limit of purerpc: 33554432
+            limit=limit,
+        )
+
+        for i in range(3):
+            try:
+                result = await client.query(params)
+                break
+            except purerpc.grpclib.exceptions.UnknownError as err:
+                if 'snappy' in err.args:
+                    await tractor.breakpoint()
+
+                # indicate there is no history for this timeframe
+                log.exception(
+                    f'Unknown mkts QUERY error: {params}\n'
+                    f'{err.args}'
+                )
+        else:
+            return {}
+
+        # TODO: it turns out column access on recarrays is actually slower:
+        # https://jakevdp.github.io/PythonDataScienceHandbook/02.09-structured-data-numpy.html#RecordArrays:-Structured-Arrays-with-a-Twist
+        # it might make sense to make these structured arrays?
+        data_set = result.by_symbols()[fqme]
+        array = data_set.array
+
+        # XXX: ensure sample rate is as expected
+        time = data_set.array['Epoch']
+        if len(time) > 1:
+            time_step = time[-1] - time[-2]
+            ts = tf_in_1s.inverse[data_set.timeframe]
+
+            if time_step != ts:
+                log.warning(
+                    f'MKTS BUG: wrong timeframe loaded: {time_step}'
+                    'YOUR DATABASE LIKELY CONTAINS BAD DATA FROM AN OLD BUG'
+                    f'WIPING HISTORY FOR {ts}s'
+                )
+                await self.delete_ts(fqme, timeframe)
+
+                # try reading again..
+                return await self.read_ohlcv(
+                    fqme,
+                    timeframe,
+                    end,
+                    limit,
+                )
+
+        return array
+
+    async def delete_ts(
+        self,
+        key: str,
+        timeframe: Union[int, str | None] = None,
+        fmt: str = 'OHLCV',
+
+    ) -> bool:
+
+        client = self.client
+        syms = await client.list_symbols()
+        if key not in syms:
+            await tractor.breakpoint()
+            raise KeyError(f'`{key}` table key not found in\n{syms}?')
+
+        tbk = mk_tbk((
+            key,
+            tf_in_1s.get(timeframe, tf_in_1s[60]),
+            fmt,
+        ))
+        return await client.destroy(tbk=tbk)
+
+    async def write_ohlcv(
+        self,
+        fqme: str,
+        ohlcv: np.ndarray,
+        timeframe: int,
+        append_and_duplicate: bool = True,
+        limit: int = int(800e3),
+
+    ) -> None:
+        # build mkts schema compat array for writing
+        mkts_dt = np.dtype(_ohlcv_dt)
+        mkts_array = np.zeros(
+            len(ohlcv),
+            dtype=mkts_dt,
+        )
+        # copy from shm array (yes it's this easy):
+        # https://numpy.org/doc/stable/user/basics.rec.html#assignment-from-other-structured-arrays
+        mkts_array[:] = ohlcv[[
+            'time',
+            'open',
+            'high',
+            'low',
+            'close',
+            'volume',
+        ]]
+
+        m, r = divmod(len(mkts_array), limit)
+
+        tfkey = tf_in_1s[timeframe]
+        for i in range(m, 1):
+            to_push = mkts_array[i-1:i*limit]
+
+            # write to db
+            resp = await self.client.write(
+                to_push,
+                tbk=f'{fqme}/{tfkey}/OHLCV',
+
+                # NOTE: will will append duplicates
+                # for the same timestamp-index.
+                # TODO: pre-deduplicate?
+                isvariablelength=append_and_duplicate,
+            )
+
+            log.info(
+                f'Wrote {mkts_array.size} datums to tsdb\n'
+            )
+
+            for resp in resp.responses:
+                err = resp.error
+                if err:
+                    raise MarketStoreError(err)
+
+        if r:
+            to_push = mkts_array[m*limit:]
+
+            # write to db
+            resp = await self.client.write(
+                to_push,
+                tbk=f'{fqme}/{tfkey}/OHLCV',
+
+                # NOTE: will will append duplicates
+                # for the same timestamp-index.
+                # TODO: pre deduplicate?
+                isvariablelength=append_and_duplicate,
+            )
+
+            log.info(
+                f'Wrote {mkts_array.size} datums to tsdb\n'
+            )
+
+            for resp in resp.responses:
+                err = resp.error
+                if err:
+                    raise MarketStoreError(err)
+
+    # XXX: currently the only way to do this is through the CLI:
+
+    # sudo ./marketstore connect --dir ~/.config/piker/data
+    # >> \show mnq.globex.20220617.ib/1Sec/OHLCV 2022-05-15
+    # and this seems to block and use up mem..
+    # >> \trim mnq.globex.20220617.ib/1Sec/OHLCV 2022-05-15
+
+    # relevant source code for this is here:
+    # https://github.com/alpacahq/marketstore/blob/master/cmd/connect/session/trim.go#L14
+    # def delete_range(self, start_dt, end_dt) -> None:
+    #     ...
+
+
+@acm
+async def open_storage_client(
+    host: str,
+    grpc_port: int,
+
+) -> tuple[Storage, dict[str, np.ndarray]]:
+    '''
+    Load a series by key and deliver in ``numpy`` struct array format.
+
+    '''
+    from piker.service.marketstore import get_client
+
+    async with (
+        # eventually a storage backend endpoint
+        get_client(
+            host=host,
+            port=grpc_port,
+        ) as client,
+    ):
+        # slap on our wrapper api
+        yield Storage(client)
+
+
+# NOTE: pretty sure right now this is only being
+# called by a CLI entrypoint?
+@acm
+async def open_tsdb_client(
+    fqme: str,
+) -> Storage:
+
+    # TODO: real-time dedicated task for ensuring
+    # history consistency between the tsdb, shm and real-time feed..
+
+    # update sequence design notes:
+
+    # - load existing highest frequency data from mkts
+    #   * how do we want to offer this to the UI?
+    #    - lazy loading?
+    #    - try to load it all and expect graphics caching/diffing
+    #      to  hide extra bits that aren't in view?
+
+    # - compute the diff between latest data from broker and shm
+    #   * use sql api in mkts to determine where the backend should
+    #     start querying for data?
+    #   * append any diff with new shm length
+    #   * determine missing (gapped) history by scanning
+    #   * how far back do we look?
+
+    # - begin rt update ingest and aggregation
+    #   * could start by always writing ticks to mkts instead of
+    #     worrying about a shm queue for now.
+    #   * we have a short list of shm queues worth groking:
+    #     - https://github.com/pikers/piker/issues/107
+    #   * the original data feed arch blurb:
+    #     - https://github.com/pikers/piker/issues/98
+    #
+    profiler = Profiler(
+        disabled=True,  # not pg_profile_enabled(),
+        delayed=False,
+    )
+
+    # load any user service settings for connecting to
+    rootconf, path = config.load(
+        'conf',
+        touch_if_dne=True,
+    )
+    tsdbconf = rootconf['network'].get('tsdb')
+    # backend = tsdbconf.pop('backend')
+    async with (
+        open_storage_client(
+            **tsdbconf,
+        ) as storage,
+
+        maybe_open_feed(
+            [fqme],
+            start_stream=False,
+
+        ) as feed,
+    ):
+        profiler(f'opened feed for {fqme}')
+
+        # to_append = feed.hist_shm.array
+        # to_prepend = None
+
+        if fqme:
+            flume = feed.flumes[fqme]
+            symbol = flume.mkt
+            if symbol:
+                fqme = symbol.fqme
+
+            # diff db history with shm and only write the missing portions
+            # ohlcv = flume.hist_shm.array
+
+            # TODO: use pg profiler
+            # for secs in (1, 60):
+            #     tsdb_array = await storage.read_ohlcv(
+            #         fqme,
+            #         timeframe=timeframe,
+            #     )
+            #     # hist diffing:
+            #     # these aren't currently used but can be referenced from
+            #     # within the embedded ipython shell below.
+            #     to_append = ohlcv[ohlcv['time'] > ts['Epoch'][-1]]
+            #     to_prepend = ohlcv[ohlcv['time'] < ts['Epoch'][0]]
+
+            # profiler('Finished db arrays diffs')
+
+            _ = await storage.client.list_symbols()
+            # log.info(f'Existing tsdb symbol set:\n{pformat(syms)}')
+            # profiler(f'listed symbols {syms}')
+            yield storage
+
+        # for array in [to_append, to_prepend]:
+        #     if array is None:
+        #         continue
+
+        #     log.info(
+        #         f'Writing datums {array.size} -> to tsdb from shm\n'
+        #     )
+        #     await storage.write_ohlcv(fqme, array)
+
+        # profiler('Finished db writes')