Create `piker.tsp` "time series processing" subpkg
Move `.data.history` -> `.tsp.__init__.py` for now as main pkg-mod and `.data.tsp` -> `.tsp._anal` (for analysis). Obviously follow commits will change surrounding codebase (imports) to match..distribute_dis
							parent
							
								
									d5d68f75ea
								
							
						
					
					
						commit
						4568c55f17
					
				| 
						 | 
				
			
			@ -32,6 +32,7 @@ from __future__ import annotations
 | 
			
		|||
from datetime import datetime
 | 
			
		||||
from functools import partial
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from pprint import pformat
 | 
			
		||||
from types import ModuleType
 | 
			
		||||
from typing import (
 | 
			
		||||
    Callable,
 | 
			
		||||
| 
						 | 
				
			
			@ -53,25 +54,64 @@ import polars as pl
 | 
			
		|||
from ..accounting import (
 | 
			
		||||
    MktPair,
 | 
			
		||||
)
 | 
			
		||||
from ._util import (
 | 
			
		||||
from ..data._util import (
 | 
			
		||||
    log,
 | 
			
		||||
)
 | 
			
		||||
from ._sharedmem import (
 | 
			
		||||
from ..data._sharedmem import (
 | 
			
		||||
    maybe_open_shm_array,
 | 
			
		||||
    ShmArray,
 | 
			
		||||
)
 | 
			
		||||
from ._source import def_iohlcv_fields
 | 
			
		||||
from ._sampling import (
 | 
			
		||||
from ..data._source import def_iohlcv_fields
 | 
			
		||||
from ..data._sampling import (
 | 
			
		||||
    open_sample_stream,
 | 
			
		||||
)
 | 
			
		||||
from .tsp import (
 | 
			
		||||
    dedupe,
 | 
			
		||||
from ._anal import (
 | 
			
		||||
 | 
			
		||||
    get_null_segs,
 | 
			
		||||
    iter_null_segs,
 | 
			
		||||
    sort_diff,
 | 
			
		||||
    Frame,
 | 
			
		||||
    # Seq,
 | 
			
		||||
    Seq,
 | 
			
		||||
 | 
			
		||||
    # codec-ish
 | 
			
		||||
    np2pl,
 | 
			
		||||
    pl2np,
 | 
			
		||||
 | 
			
		||||
    # `numpy` only
 | 
			
		||||
    slice_from_time,
 | 
			
		||||
 | 
			
		||||
    # `polars` specific
 | 
			
		||||
    dedupe,
 | 
			
		||||
    with_dts,
 | 
			
		||||
    detect_time_gaps,
 | 
			
		||||
    sort_diff,
 | 
			
		||||
 | 
			
		||||
    # TODO:
 | 
			
		||||
    detect_price_gaps
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
__all__: list[str] = [
 | 
			
		||||
    'dedupe',
 | 
			
		||||
    'get_null_segs',
 | 
			
		||||
    'iter_null_segs',
 | 
			
		||||
    'sort_diff',
 | 
			
		||||
    'slice_from_time',
 | 
			
		||||
    'Frame',
 | 
			
		||||
    'Seq',
 | 
			
		||||
 | 
			
		||||
    'np2pl',
 | 
			
		||||
    'pl2np',
 | 
			
		||||
 | 
			
		||||
    'slice_from_time',
 | 
			
		||||
 | 
			
		||||
    'with_dts',
 | 
			
		||||
    'detect_time_gaps',
 | 
			
		||||
    'sort_diff',
 | 
			
		||||
 | 
			
		||||
    # TODO:
 | 
			
		||||
    'detect_price_gaps'
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
# TODO: break up all this shite into submods!
 | 
			
		||||
from ..brokers._util import (
 | 
			
		||||
    DataUnavailable,
 | 
			
		||||
)
 | 
			
		||||
| 
						 | 
				
			
			@ -252,21 +292,54 @@ async def maybe_fill_null_segments(
 | 
			
		|||
        and
 | 
			
		||||
        len(null_segs[-1])
 | 
			
		||||
    ):
 | 
			
		||||
        await tractor.pause()
 | 
			
		||||
        (
 | 
			
		||||
            iabs_slices,
 | 
			
		||||
            iabs_zero_rows,
 | 
			
		||||
            zero_t,
 | 
			
		||||
        ) = null_segs
 | 
			
		||||
        log.warning(
 | 
			
		||||
            f'{len(iabs_slices)} NULL TIME SEGMENTS DETECTED!\n'
 | 
			
		||||
            f'{pformat(iabs_slices)}'
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    array = shm.array
 | 
			
		||||
    zeros = array[array['low'] == 0]
 | 
			
		||||
 | 
			
		||||
    # always backfill gaps with the earliest (price) datum's
 | 
			
		||||
        # TODO: always backfill gaps with the earliest (price) datum's
 | 
			
		||||
        # value to avoid the y-ranger including zeros and completely
 | 
			
		||||
        # stretching the y-axis..
 | 
			
		||||
    if 0 < zeros.size:
 | 
			
		||||
        zeros[[
 | 
			
		||||
        # array: np.ndarray = shm.array
 | 
			
		||||
        # zeros = array[array['low'] == 0]
 | 
			
		||||
        ohlc_fields: list[str] = [
 | 
			
		||||
            'open',
 | 
			
		||||
            'high',
 | 
			
		||||
            'low',
 | 
			
		||||
            'close',
 | 
			
		||||
        ]] = shm._array[zeros['index'][0] - 1]['close']
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
        for istart, istop in iabs_slices:
 | 
			
		||||
 | 
			
		||||
            # get view into buffer for null-segment
 | 
			
		||||
            gap: np.ndarray = shm._array[istart:istop]
 | 
			
		||||
 | 
			
		||||
            # copy the oldest OHLC samples forward
 | 
			
		||||
            gap[ohlc_fields] = shm._array[istart]['close']
 | 
			
		||||
 | 
			
		||||
            start_t: float = shm._array[istart]['time']
 | 
			
		||||
            t_diff: float = (istop - istart)*timeframe
 | 
			
		||||
            gap['time'] = np.arange(
 | 
			
		||||
                start=start_t,
 | 
			
		||||
                stop=start_t + t_diff,
 | 
			
		||||
                step=timeframe,
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            await sampler_stream.send({
 | 
			
		||||
                'broadcast_all': {
 | 
			
		||||
 | 
			
		||||
                    # XXX NOTE XXX: see the
 | 
			
		||||
                    # `.ui._display.increment_history_view()` if block
 | 
			
		||||
                    # that looks for this info to FORCE a hard viz
 | 
			
		||||
                    # redraw!
 | 
			
		||||
                    'backfilling': (mkt.fqme, timeframe),
 | 
			
		||||
                },
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
            # TODO: interatively step through any remaining
 | 
			
		||||
            # time-gaps/null-segments and spawn piecewise backfiller
 | 
			
		||||
| 
						 | 
				
			
			@ -277,10 +350,7 @@ async def maybe_fill_null_segments(
 | 
			
		|||
            #     parallel possible no matter the backend?
 | 
			
		||||
            # -[ ] fill algo: do queries in alternating "latest, then
 | 
			
		||||
            #    earliest, then latest.. etc?"
 | 
			
		||||
    # if (
 | 
			
		||||
    #     next_end_dt not in frame[
 | 
			
		||||
    # ):
 | 
			
		||||
    #     pass
 | 
			
		||||
            # await tractor.pause()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
async def start_backfill(
 | 
			
		||||
| 
						 | 
				
			
			@ -1252,8 +1322,8 @@ def iter_dfs_from_shms(
 | 
			
		|||
        assert not opened
 | 
			
		||||
        ohlcv = shm.array
 | 
			
		||||
 | 
			
		||||
        from ..data import tsp
 | 
			
		||||
        df: pl.DataFrame = tsp.np2pl(ohlcv)
 | 
			
		||||
        from ._anal import np2pl
 | 
			
		||||
        df: pl.DataFrame = np2pl(ohlcv)
 | 
			
		||||
 | 
			
		||||
        yield (
 | 
			
		||||
            shmfile,
 | 
			
		||||
| 
						 | 
				
			
			@ -319,9 +319,8 @@ def get_null_segs(
 | 
			
		|||
    if num_gaps < 1:
 | 
			
		||||
        if absi_zeros.size > 1:
 | 
			
		||||
            absi_zsegs = [[
 | 
			
		||||
                # see `get_hist()` in backend, should ALWAYS be
 | 
			
		||||
                # able to handle a `start_dt=None`!
 | 
			
		||||
                # None,
 | 
			
		||||
                # TODO: maybe mk these max()/min() limits func
 | 
			
		||||
                # consts instead of called more then once?
 | 
			
		||||
                max(
 | 
			
		||||
                    absi_zeros[0] - 1,
 | 
			
		||||
                    0,
 | 
			
		||||
| 
						 | 
				
			
			@ -359,7 +358,10 @@ def get_null_segs(
 | 
			
		|||
        # corresponding to the first zero-segment's row, we add it
 | 
			
		||||
        # manually here.
 | 
			
		||||
        absi_zsegs.append([
 | 
			
		||||
            max(
 | 
			
		||||
                absi_zeros[0] - 1,
 | 
			
		||||
                0,
 | 
			
		||||
            ),
 | 
			
		||||
            None,
 | 
			
		||||
        ])
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -400,14 +402,18 @@ def get_null_segs(
 | 
			
		|||
 | 
			
		||||
        else:
 | 
			
		||||
            if 0 < num_gaps < 2:
 | 
			
		||||
                absi_zsegs[-1][1] = absi_zeros[-1] + 1
 | 
			
		||||
                absi_zsegs[-1][1] = min(
 | 
			
		||||
                    absi_zeros[-1] + 1,
 | 
			
		||||
                    frame['index'][-1],
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
            iabs_first: int = frame['index'][0]
 | 
			
		||||
            for start, end in absi_zsegs:
 | 
			
		||||
 | 
			
		||||
                ts_start: float = times[start - iabs_first]
 | 
			
		||||
                ts_end: float = times[end - iabs_first]
 | 
			
		||||
                if (
 | 
			
		||||
                    ts_start == 0
 | 
			
		||||
                    (ts_start == 0 and not start == 0)
 | 
			
		||||
                    or
 | 
			
		||||
                    ts_end == 0
 | 
			
		||||
                ):
 | 
			
		||||
| 
						 | 
				
			
			@ -451,11 +457,13 @@ def iter_null_segs(
 | 
			
		|||
    ],
 | 
			
		||||
    None,
 | 
			
		||||
]:
 | 
			
		||||
    if null_segs is None:
 | 
			
		||||
        null_segs: tuple = get_null_segs(
 | 
			
		||||
    if not (
 | 
			
		||||
        null_segs := get_null_segs(
 | 
			
		||||
            frame,
 | 
			
		||||
            period=timeframe,
 | 
			
		||||
        )
 | 
			
		||||
    ):
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    absi_pairs_zsegs: list[list[float, float]]
 | 
			
		||||
    izeros: Seq
 | 
			
		||||
| 
						 | 
				
			
			@ -502,6 +510,7 @@ def iter_null_segs(
 | 
			
		|||
        )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# TODO: move to ._pl_anal
 | 
			
		||||
def with_dts(
 | 
			
		||||
    df: pl.DataFrame,
 | 
			
		||||
    time_col: str = 'time',
 | 
			
		||||
| 
						 | 
				
			
			@ -525,19 +534,6 @@ def with_dts(
 | 
			
		|||
    # )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def dedup_dt(
 | 
			
		||||
    df: pl.DataFrame,
 | 
			
		||||
) -> pl.DataFrame:
 | 
			
		||||
    '''
 | 
			
		||||
    Drop duplicate date-time rows (normally from an OHLC frame).
 | 
			
		||||
 | 
			
		||||
    '''
 | 
			
		||||
    return df.unique(
 | 
			
		||||
        subset=['dt'],
 | 
			
		||||
        maintain_order=True,
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
t_unit: Literal = Literal[
 | 
			
		||||
    'days',
 | 
			
		||||
    'hours',
 | 
			
		||||
| 
						 | 
				
			
			@ -651,7 +647,11 @@ def dedupe(src_df: pl.DataFrame) -> tuple[
 | 
			
		|||
        )
 | 
			
		||||
 | 
			
		||||
    # remove duplicated datetime samples/sections
 | 
			
		||||
    deduped: pl.DataFrame = dedup_dt(df)
 | 
			
		||||
    deduped: pl.DataFrame = df.unique(
 | 
			
		||||
        subset=['dt'],
 | 
			
		||||
        maintain_order=True,
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    deduped_gaps = detect_time_gaps(deduped)
 | 
			
		||||
 | 
			
		||||
    diff: int = (
 | 
			
		||||
		Loading…
	
		Reference in New Issue