10 changed files with 200 additions and 366 deletions
--- a/piker/brokers/ib/api.py
+++ b/piker/brokers/ib/api.py
@ -49,12 +49,7 @@ from bidict import bidict
 import trio
 import tractor
 from tractor import to_asyncio
-from pendulum import (
-    from_timestamp,
-    DateTime,
-    Duration,
-    duration as mk_duration,
-)
+import pendulum
 from eventkit import Event
 from ib_insync import (
    client as ib_client,
@ -226,20 +221,16 @@ def bars_to_np(bars: list) -> np.ndarray:
 # https://interactivebrokers.github.io/tws-api/historical_limitations.html#non-available_hd
 _samplings: dict[int, tuple[str, str]] = {
    1: (
-        # ib strs
        '1 secs',
        f'{int(2e3)} S',
-
-        mk_duration(seconds=2e3),
+        pendulum.duration(seconds=2e3),
    ),
    # TODO: benchmark >1 D duration on query to see if
    # throughput can be made faster during backfilling.
    60: (
-        # ib strs
        '1 min',
        '2 D',
-
-        mk_duration(days=2),
+        pendulum.duration(days=2),
    ),
 }

@ -324,7 +315,7 @@ class Client:

        **kwargs,

-    ) -> tuple[BarDataList, np.ndarray, Duration]:
+    ) -> tuple[BarDataList, np.ndarray, pendulum.Duration]:
        '''
        Retreive OHLCV bars for a fqme over a range to the present.

@ -333,20 +324,11 @@ class Client:
        # https://interactivebrokers.github.io/tws-api/historical_data.html
        bars_kwargs = {'whatToShow': 'TRADES'}
        bars_kwargs.update(kwargs)
-        (
-            bar_size,
-            ib_duration_str,
-            default_dt_duration,
-        ) = _samplings[sample_period_s]
-
-        dt_duration: DateTime = (
-            duration
-            or default_dt_duration
-        )
+        bar_size, duration, dt_duration = _samplings[sample_period_s]

        global _enters
        log.info(
-            f"REQUESTING {ib_duration_str}'s worth {bar_size} BARS\n"
+            f"REQUESTING {duration}'s worth {bar_size} BARS\n"
            f'{_enters} @ end={end_dt}"'
        )

@ -371,7 +353,7 @@ class Client:

            # time history length values format:
            # ``durationStr=integer{SPACE}unit (S|D|W|M|Y)``
-            durationStr=ib_duration_str,
+            durationStr=duration,

            # always use extended hours
            useRTH=False,
@ -401,23 +383,16 @@ class Client:
        # => we recursively call this method until we get at least
        # as many bars such that they sum in aggregate to the the
        # desired total time (duration) at most.
-        if end_dt:
-            nparr: np.ndarray = bars_to_np(bars)
-            times: np.ndarray = nparr['time']
-            first: float = times[0]
-            tdiff: float = times[-1] - first
-
-            if (
-                # len(bars) * sample_period_s) < dt_duration.in_seconds()
-                tdiff < dt_duration.in_seconds()
-            ):
-                end_dt: DateTime = from_timestamp(first)
-                log.warning(
-                    f'Frame result was shorter then {dt_duration}!?\n'
-                    'Recursing for more bars:\n'
-                    f'end_dt: {end_dt}\n'
-                    f'dt_duration: {dt_duration}\n'
+        elif (
+            end_dt
+            and (
+                (len(bars) * sample_period_s) < dt_duration.in_seconds()
            )
+        ):
+            log.warning(
+                f'Recursing to get more bars from {end_dt} for {dt_duration}'
+            )
+            end_dt -= dt_duration
            (
                r_bars,
                r_arr,
@ -426,30 +401,11 @@ class Client:
                fqme,
                start_dt=start_dt,
                end_dt=end_dt,
-                    sample_period_s=sample_period_s,
-
-                    # TODO: make a table for Duration to
-                    # the ib str values in order to use this?
-                    # duration=duration,
            )
            r_bars.extend(bars)
            bars = r_bars

        nparr = bars_to_np(bars)
-
-        # timestep should always be at least as large as the
-        # period step.
-        tdiff: np.ndarray = np.diff(nparr['time'])
-        to_short: np.ndarray = tdiff < sample_period_s
-        if (to_short).any():
-            # raise ValueError(
-            log.error(
-                f'OHLC frame for {sample_period_s} has {to_short.size} '
-                'time steps which are shorter then expected?!"'
-            )
-            # OOF: this will break teardown?
-            # breakpoint()
-
        return bars, nparr, dt_duration

    async def con_deats(
--- a/piker/brokers/ib/broker.py
+++ b/piker/brokers/ib/broker.py
@ -20,7 +20,7 @@ Order and trades endpoints for use with ``piker``'s EMS.
 """
 from __future__ import annotations
 from contextlib import ExitStack
-# from collections import ChainMap
+from collections import ChainMap
 from functools import partial
 from pprint import pformat
 import time
--- a/piker/brokers/ib/feed.py
+++ b/piker/brokers/ib/feed.py
@ -196,8 +196,10 @@ async def open_history_client(
                    f'mean: {mean}'
                )

+            if (
+                out is None
+            ):
                # could be trying to retreive bars over weekend
-            if out is None:
                log.error(f"Can't grab bars starting at {end_dt}!?!?")
                raise NoData(
                    f'{end_dt}',
@ -211,24 +213,7 @@ async def open_history_client(
            ):
                raise DataUnavailable(f'First timestamp is {head_dt}')

-            # also see return type for `get_bars()`
-            bars: ibis.objects.BarDataList
-            bars_array: np.ndarray
-            first_dt: datetime
-            last_dt: datetime
-            (
-                bars,
-                bars_array,
-                first_dt,
-                last_dt,
-            ) = out
-
-            # TODO: audit the sampling period here as well?
-            # timestep should always be at least as large as the
-            # period step.
-            # tdiff: np.ndarray = np.diff(bars_array['time'])
-            # if (tdiff < timeframe).any():
-            #     await tractor.pause()
+            bars, bars_array, first_dt, last_dt = out

            # volume cleaning since there's -ve entries,
            # wood luv to know what crookery that is..
--- a/piker/data/init.py
+++ b/piker/data/init.py
@ -56,7 +56,6 @@ __all__: list[str] = [
    'ShmArray',
    'iterticks',
    'maybe_open_shm_array',
-    'match_from_pairs',
    'attach_shm_array',
    'open_shm_array',
    'get_shm_token',
--- a/piker/data/_timeseries.py
+++ b/piker/data/_timeseries.py
@ -23,13 +23,11 @@ Routines are generally implemented in either ``numpy`` or

 '''
 from __future__ import annotations
-from functools import partial
+from typing import Literal
 from math import (
    ceil,
    floor,
 )
-import time
-from typing import Literal

 import numpy as np
 import polars as pl
@ -40,18 +38,6 @@ from ..toolz.profile import (
    pg_profile_enabled,
    ms_slower_then,
 )
-from ..log import (
-    get_logger,
-    get_console_log,
-)
-# for "time series processing"
-subsys: str = 'piker.tsp'
-
-log = get_logger(subsys)
-get_console_log = partial(
-    get_console_log,
-    name=subsys,
-)


 def slice_from_time(
@ -262,7 +248,7 @@ def with_dts(
 ) -> pl.DataFrame:
    '''
    Insert datetime (casted) columns to a (presumably) OHLC sampled
-    time series with an epoch-time column keyed by `time_col: str`.
+    time series with an epoch-time column keyed by ``time_col``.

    '''
    return df.with_columns([
@ -270,9 +256,7 @@ def with_dts(
        pl.col(time_col).diff().alias('s_diff'),
        pl.from_epoch(pl.col(time_col)).alias('dt'),
    ]).with_columns([
-        pl.from_epoch(
-            pl.col(f'{time_col}_prev')
-        ).alias('dt_prev'),
+        pl.from_epoch(pl.col(f'{time_col}_prev')).alias('dt_prev'),
        pl.col('dt').diff().alias('dt_diff'),
    ]) #.with_columns(
        # pl.col('dt').diff().dt.days().alias('days_dt_diff'),
@ -365,117 +349,3 @@ def detect_price_gaps(
    #     (pl.col(time_col) - pl.col(f'{time_col}_previous')).alias('diff'),
    # ])
    ...
-
-
-def dedupe(src_df: pl.DataFrame) -> tuple[
-    pl.DataFrame,  # with dts
-    pl.DataFrame,  # gaps
-    pl.DataFrame,  # with deduplicated dts (aka gap/repeat removal)
-    int,  # len diff between input and deduped
-]:
-    '''
-    Check for time series gaps and if found
-    de-duplicate any datetime entries, check for
-    a frame height diff and return the newly
-    dt-deduplicated frame.
-
-    '''
-    df: pl.DataFrame = with_dts(src_df)
-    gaps: pl.DataFrame = detect_time_gaps(df)
-
-    # if no gaps detected just return carbon copies
-    # and no len diff.
-    if gaps.is_empty():
-        return (
-            df,
-            gaps,
-            df,
-            0,
-        )
-
-    # remove duplicated datetime samples/sections
-    deduped: pl.DataFrame = dedup_dt(df)
-    deduped_gaps = detect_time_gaps(deduped)
-
-    diff: int = (
-        df.height
-        -
-        deduped.height
-    )
-    log.warning(
-        f'Gaps found:\n{gaps}\n'
-        f'deduped Gaps found:\n{deduped_gaps}'
-    )
-    # TODO: rewrite this in polars and/or convert to
-    # ndarray to detect and remove?
-    # null_gaps = detect_null_time_gap()
-
-    return (
-        df,
-        gaps,
-        deduped,
-        diff,
-    )
-
-
-def sort_diff(
-    src_df: pl.DataFrame,
-    col: str = 'time',
-
-) -> tuple[
-    pl.DataFrame,  # with dts
-    pl.DataFrame,  # sorted
-    list[int],  # indices of segments that are out-of-order
-]:
-    ser: pl.Series = src_df[col]
-
-    diff: pl.Series = ser.diff()
-    sortd: pl.DataFrame = ser.sort()
-    sortd_diff: pl.Series = sortd.diff()
-    i_step_diff = (diff != sortd_diff).arg_true()
-    if i_step_diff.len():
-        import pdbp
-        pdbp.set_trace()
-
-# NOTE: thanks to this SO answer for the below conversion routines
-# to go from numpy struct-arrays to polars dataframes and back:
-# https://stackoverflow.com/a/72054819
-def np2pl(array: np.ndarray) -> pl.DataFrame:
-    start = time.time()
-
-    # XXX: thanks to this SO answer for this conversion tip:
-    # https://stackoverflow.com/a/72054819
-    df = pl.DataFrame({
-        field_name: array[field_name]
-        for field_name in array.dtype.fields
-    })
-    delay: float = round(
-        time.time() - start,
-        ndigits=6,
-    )
-    log.info(
-        f'numpy -> polars conversion took {delay} secs\n'
-        f'polars df: {df}'
-    )
-    return df
-
-
-def pl2np(
-    df: pl.DataFrame,
-    dtype: np.dtype,
-
-) -> np.ndarray:
-
-    # Create numpy struct array of the correct size and dtype
-    # and loop through df columns to fill in array fields.
-    array = np.empty(
-        df.height,
-        dtype,
-    )
-    for field, col in zip(
-        dtype.fields,
-        df.columns,
-    ):
-        array[field] = df.get_column(col).to_numpy()
-
-    return array
--- a/piker/data/history.py
+++ b/piker/data/history.py
@ -1,19 +1,18 @@
 # piker: trading gear for hackers
 # Copyright (C) Tyler Goodlet (in stewardship for pikers)

-# This program is free software: you can redistribute it and/or
-# modify it under the terms of the GNU Affero General Public
-# License as published by the Free Software Foundation, either
-# version 3 of the License, or (at your option) any later version.
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.

 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# Affero General Public License for more details.
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.

-# You should have received a copy of the GNU Affero General Public
-# License along with this program. If not, see
-# <https://www.gnu.org/licenses/>.
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.

 '''
 Historical data business logic for load, backfill and tsdb storage.
@ -40,7 +39,6 @@ from pendulum import (
    from_timestamp,
 )
 import numpy as np
-import polars as pl

 from ..accounting import (
    MktPair,
@ -56,7 +54,6 @@ from ._source import def_iohlcv_fields
 from ._sampling import (
    open_sample_stream,
 )
-from . import tsp
 from ..brokers._util import (
    DataUnavailable,
 )
@ -200,7 +197,7 @@ async def start_backfill(

            # do a decently sized backfill and load it into storage.
            periods = {
-                1: {'days': 2},
+                1: {'days': 6},
                60: {'years': 6},
            }
            period_duration: int = periods[timeframe]
@ -249,16 +246,13 @@ async def start_backfill(
            # broker says there never was or is no more history to pull
            except DataUnavailable:
                log.warning(
-                    f'NO-MORE-DATA: backend {mod.name} halted history:\n'
-                    f'{timeframe}@{mkt.fqme}'
+                    f'NO-MORE-DATA: backend {mod.name} halted history!?'
                )

                # ugh, what's a better way?
                # TODO: fwiw, we probably want a way to signal a throttle
                # condition (eg. with ib) so that we can halt the
                # request loop until the condition is resolved?
-                if timeframe > 1:
-                    await tractor.pause()
                return

            # TODO: drop this? see todo above..
@ -306,11 +300,9 @@ async def start_backfill(
                array,
                prepend_until_dt=backfill_until_dt,
            )
-            ln: int = len(to_push)
+            ln = len(to_push)
            if ln:
-                log.info(
-                    f'{ln} bars for {next_start_dt} -> {last_start_dt}'
-                )
+                log.info(f'{ln} bars for {next_start_dt} -> {last_start_dt}')

            else:
                log.warning(
@ -396,29 +388,14 @@ async def start_backfill(
                        without_src=True,
                    )
                else:
-                    col_sym_key: str = mkt.get_fqme(
-                        delim_char='',
-                    )
+                    col_sym_key: str = mkt.get_fqme(delim_char='')

+                # TODO: implement parquet append!?
                await storage.write_ohlcv(
                    col_sym_key,
                    shm.array,
                    timeframe,
                )
-                df: pl.DataFrame = await storage.as_df(
-                    fqme=mkt.fqme,
-                    period=timeframe,
-                    load_from_offline=False,
-                )
-                (
-                    df,
-                    gaps,
-                    deduped,
-                    diff,
-                ) = tsp.dedupe(df)
-                if diff:
-                    tsp.sort_diff(df)
-
        else:
            # finally filled gap
            log.info(
@ -429,7 +406,7 @@ async def start_backfill(
            # TODO: ideally these never exist but somehow it seems
            # sometimes we're writing zero-ed segments on certain
            # (teardown) cases?
-            from .tsp import detect_null_time_gap
+            from ._timeseries import detect_null_time_gap

            gap_indices: tuple | None = detect_null_time_gap(shm)
            while gap_indices:
@ -657,19 +634,12 @@ async def tsdb_backfill(
    async with mod.open_history_client(
        mkt,
    ) as (get_hist, config):
-        log.info(
-            f'`{mod}` history client returned backfill config:\n'
-            f'{config}\n'
-        )
+        log.info(f'{mod} history client returned backfill config: {config}')

        # get latest query's worth of history all the way
        # back to what is recorded in the tsdb
        try:
-            (
-                array,
-                mr_start_dt,
-                mr_end_dt,
-            ) = await get_hist(
+            array, mr_start_dt, mr_end_dt = await get_hist(
                timeframe,
                end_dt=None,
            )
@ -679,7 +649,6 @@ async def tsdb_backfill(
        # there's no backfilling possible.
        except DataUnavailable:
            task_status.started()
-            await tractor.pause()
            return

        # TODO: fill in non-zero epoch time values ALWAYS!
@ -730,8 +699,9 @@ async def tsdb_backfill(
            )
        except TimeseriesNotFound:
            log.warning(
-                f'No timeseries yet for {timeframe}@{fqme}'
+                f'No timeseries yet for {fqme}'
            )
+
        else:
            (
                tsdb_history,
@ -761,9 +731,9 @@ async def tsdb_backfill(
            # to push datums that have already been recorded in the
            # tsdb. In this case we instead only retreive and push
            # the series portion missing from the db's data set.
-            # if offset_s < 0:
-            #     non_overlap_diff: Duration = mr_end_dt - last_tsdb_dt
-            #     non_overlap_offset_s: float = backfill_diff.in_seconds()
+            if offset_s < 0:
+                non_overlap_diff: Duration = mr_end_dt - last_tsdb_dt
+                non_overlap_offset_s: float = backfill_diff.in_seconds()

            offset_samples: int = round(offset_s / timeframe)

@ -814,24 +784,25 @@ async def tsdb_backfill(
                f'timeframe of {timeframe} seconds..\n'
                'So yuh.. dun do dat brudder.'
            )
-
        # if there is a gap to backfill from the first
        # history frame until the last datum loaded from the tsdb
        # continue that now in the background
        bf_done = await tn.start(
            partial(
                start_backfill,
-                get_hist=get_hist,
-                mod=mod,
-                mkt=mkt,
-                shm=shm,
-                timeframe=timeframe,
+                get_hist,
+                mod,
+                mkt,
+                shm,
+                timeframe,
+
                backfill_from_shm_index=backfill_gap_from_shm_index,
                backfill_from_dt=mr_start_dt,
+
                sampler_stream=sampler_stream,
+
                backfill_until_dt=last_tsdb_dt,
                storage=storage,
-                write_tsdb=True,
            )
        )

@ -853,11 +824,8 @@ async def tsdb_backfill(
        finally:
            return

-        # XXX NOTE: this is legacy from when we were using
-        # marketstore and we needed to continue backloading
-        # incrementally from the tsdb client.. (bc it couldn't 
-        # handle a single large query with gRPC for some
-        # reason.. classic goolag pos)
+        # IF we need to continue backloading incrementally from the
+        # tsdb client..
        tn.start_soon(
            back_load_from_tsdb,

@ -1026,18 +994,19 @@ async def manage_history(
            log.info(f'Connected to sampler stream: {sample_stream}')

            for timeframe in [60, 1]:
-                await tn.start(partial(
+                await tn.start(
                    tsdb_backfill,
-                    mod=mod,
-                    storemod=storemod,
-                    tn=tn,
+                    mod,
+                    storemod,
+                    tn,
                    # bus,
-                    storage=client,
-                    mkt=mkt,
-                    shm=tf2mem[timeframe],
-                    timeframe=timeframe,
-                    sampler_stream=sample_stream,
-                ))
+                    client,
+                    mkt,
+                    tf2mem[timeframe],
+                    timeframe,
+
+                    sample_stream,
+                )

            # indicate to caller that feed can be delivered to
            # remote requesting client since we've loaded history
--- a/piker/storage/cli.py
+++ b/piker/storage/cli.py
@ -40,7 +40,6 @@ from piker.data import (
    maybe_open_shm_array,
    def_iohlcv_fields,
    ShmArray,
-    tsp,
 )
 from piker.data.history import (
    _default_hist_size,
@ -99,18 +98,6 @@ def ls(
    trio.run(query_all)


-# TODO: like ls but takes in a pattern and matches
-# @store.command()
-# def search(
-#     patt: str,
-#     backends: list[str] = typer.Argument(
-#         default=None,
-#         help='Storage backends to query, default is all.'
-#     ),
-# ):
-#     ...
-
-
@store.command()
 def delete(
    symbols: list[str],
@ -149,6 +136,53 @@ def delete(
    trio.run(main, symbols)


+def dedupe(src_df: pl.DataFrame) -> tuple[
+    pl.DataFrame,  # with dts
+    pl.DataFrame,  # gaps
+    pl.DataFrame,  # with deduplicated dts (aka gap/repeat removal)
+    bool,
+]:
+    '''
+    Check for time series gaps and if found
+    de-duplicate any datetime entries, check for
+    a frame height diff and return the newly
+    dt-deduplicated frame.
+
+    '''
+    from piker.data import _timeseries as tsp
+    df: pl.DataFrame = tsp.with_dts(src_df)
+    gaps: pl.DataFrame = tsp.detect_time_gaps(df)
+    if not gaps.is_empty():
+
+        # remove duplicated datetime samples/sections
+        deduped: pl.DataFrame = tsp.dedup_dt(df)
+        deduped_gaps = tsp.detect_time_gaps(deduped)
+
+        log.warning(
+            f'Gaps found:\n{gaps}\n'
+            f'deduped Gaps found:\n{deduped_gaps}'
+        )
+        # TODO: rewrite this in polars and/or convert to
+        # ndarray to detect and remove?
+        # null_gaps = tsp.detect_null_time_gap()
+
+        diff: int = (
+            df.height
+            -
+            deduped.height
+        )
+        was_deduped: bool = False
+        if diff:
+            deduped: bool = True
+
+        return (
+            df,
+            gaps,
+            deduped,
+            was_deduped,
+        )
+
+
@store.command()
 def anal(
    fqme: str,
@ -201,10 +235,10 @@ def anal(
                df,
                gaps,
                deduped,
-                diff,
-            ) = tsp.dedupe(shm_df)
+                shortened,
+            ) = dedupe(shm_df)

-            if diff:
+            if shortened:
                await client.write_ohlcv(
                    fqme,
                    ohlcv=deduped,
@ -272,8 +306,22 @@ def iter_dfs_from_shms(fqme: str) -> Generator[
        assert not opened
        ohlcv = shm.array

-        from ..data import tsp
-        df: pl.DataFrame = tsp.np2pl(ohlcv)
+        start = time.time()
+
+        # XXX: thanks to this SO answer for this conversion tip:
+        # https://stackoverflow.com/a/72054819
+        df = pl.DataFrame({
+            field_name: ohlcv[field_name]
+            for field_name in ohlcv.dtype.fields
+        })
+        delay: float = round(
+            time.time() - start,
+            ndigits=6,
+        )
+        log.info(
+            f'numpy -> polars conversion took {delay} secs\n'
+            f'polars df: {df}'
+        )

        yield (
            shmfile,
@ -285,6 +333,7 @@ def iter_dfs_from_shms(fqme: str) -> Generator[
@store.command()
 def ldshm(
    fqme: str,
+
    write_parquet: bool = False,

 ) -> None:
@ -313,6 +362,7 @@ def ldshm(
                        f'Something is wrong with time period for {shm}:\n{times}'
                    )

+
                # over-write back to shm?
                df: pl.DataFrame  # with dts
                deduped: pl.DataFrame  # deduplicated dts
@ -320,8 +370,8 @@ def ldshm(
                    df,
                    gaps,
                    deduped,
-                    diff,
-                ) = tsp.dedupe(shm_df)
+                    was_dded,
+                ) = dedupe(shm_df)

                # TODO: maybe only optionally enter this depending
                # on some CLI flags and/or gap detection?
--- a/piker/storage/nativedb.py
+++ b/piker/storage/nativedb.py
@ -65,11 +65,8 @@ from pendulum import (
 )

 from piker import config
-from piker.data import (
-    def_iohlcv_fields,
-    ShmArray,
-    tsp,
-)
+from piker.data import def_iohlcv_fields
+from piker.data import ShmArray
 from piker.log import get_logger
 from . import TimeseriesNotFound

@ -77,6 +74,37 @@ from . import TimeseriesNotFound
 log = get_logger('storage.nativedb')


+# NOTE: thanks to this SO answer for the below conversion routines
+# to go from numpy struct-arrays to polars dataframes and back:
+# https://stackoverflow.com/a/72054819
+def np2pl(array: np.ndarray) -> pl.DataFrame:
+    return pl.DataFrame({
+        field_name: array[field_name]
+        for field_name in array.dtype.fields
+    })
+
+
+def pl2np(
+    df: pl.DataFrame,
+    dtype: np.dtype,
+
+) -> np.ndarray:
+
+    # Create numpy struct array of the correct size and dtype
+    # and loop through df columns to fill in array fields.
+    array = np.empty(
+        df.height,
+        dtype,
+    )
+    for field, col in zip(
+        dtype.fields,
+        df.columns,
+    ):
+        array[field] = df.get_column(col).to_numpy()
+
+    return array
+
+
 def detect_period(shm: ShmArray) -> float:
    '''
    Attempt to detect the series time step sampling period
@ -236,22 +264,6 @@ class NativeStorageClient:
            datadir=self._datadir,
        )

-    def _cache_df(
-        self,
-        fqme: str,
-        df: pl.DataFrame,
-        timeframe: float,
-    ) -> None:
-        # cache df for later usage since we (currently) need to
-        # convert to np.ndarrays to push to our `ShmArray` rt
-        # buffers subsys but later we may operate entirely on
-        # pyarrow arrays/buffers so keeping the dfs around for
-        # a variety of purposes is handy.
-        self._dfs.setdefault(
-            timeframe,
-            {},
-        )[fqme] = df
-
    async def read_ohlcv(
        self,
        fqme: str,
@ -266,14 +278,19 @@ class NativeStorageClient:
        )
        df: pl.DataFrame = pl.read_parquet(path)

-        self._cache_df(
-            fqme=fqme,
-            df=df,
-            timeframe=timeframe,
-        )
+        # cache df for later usage since we (currently) need to
+        # convert to np.ndarrays to push to our `ShmArray` rt
+        # buffers subsys but later we may operate entirely on
+        # pyarrow arrays/buffers so keeping the dfs around for
+        # a variety of purposes is handy.
+        self._dfs.setdefault(
+            timeframe,
+            {},
+        )[fqme] = df
+
        # TODO: filter by end and limit inputs
        # times: pl.Series = df['time']
-        array: np.ndarray = tsp.pl2np(
+        array: np.ndarray = pl2np(
            df,
            dtype=np.dtype(def_iohlcv_fields),
        )
@ -283,15 +300,11 @@ class NativeStorageClient:
        self,
        fqme: str,
        period: int = 60,
-        load_from_offline: bool = True,

    ) -> pl.DataFrame:
        try:
            return self._dfs[period][fqme]
        except KeyError:
-            if not load_from_offline:
-                raise
-
            await self.read_ohlcv(fqme, period)
            return self._dfs[period][fqme]

@ -313,22 +326,14 @@ class NativeStorageClient:
            datadir=self._datadir,
        )
        if isinstance(ohlcv, np.ndarray):
-            df: pl.DataFrame = tsp.np2pl(ohlcv)
+            df: pl.DataFrame = np2pl(ohlcv)
        else:
            df = ohlcv

-        self._cache_df(
-            fqme=fqme,
-            df=df,
-            timeframe=timeframe,
-        )
-
        # TODO: in terms of managing the ultra long term data
-        # -[ ] use a proper profiler to measure all this IO and
+        # - use a proper profiler to measure all this IO and
        #   roundtripping!
-        # -[ ] implement parquet append!? see issue:
-        #   https://github.com/pikers/piker/issues/536
-        #   -[ ] try out ``fastparquet``'s append writing:
+        # - try out ``fastparquet``'s append writing:
        # https://fastparquet.readthedocs.io/en/latest/api.html#fastparquet.write
        start = time.time()
        df.write_parquet(path)
--- a/piker/ui/_dataviz.py
+++ b/piker/ui/_dataviz.py
@ -49,7 +49,7 @@ from ..data._formatters import (
    OHLCBarsAsCurveFmtr,  # OHLC converted to line
    StepCurveFmtr,  # "step" curve (like for vlm)
 )
-from ..data.tsp import (
+from ..data._timeseries import (
    slice_from_time,
 )
 from ._ohlc import (
--- a/piker/ui/view_mode.py
+++ b/piker/ui/view_mode.py
@ -31,7 +31,7 @@ import pendulum
 import pyqtgraph as pg

 from piker.types import Struct
-from ..data.tsp import slice_from_time
+from ..data._timeseries import slice_from_time
 from ..log import get_logger
 from ..toolz import Profiler