piker/piker/data/flows.py

363 lines
10 KiB
Python

# piker: trading gear for hackers
# Copyright (C) Tyler Goodlet (in stewardship for pikers)
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
abstractions for organizing, managing and generally operating-on
real-time data processing data-structures.
"Streams, flumes, cascades and flows.."
"""
from __future__ import annotations
from contextlib import asynccontextmanager as acm
from functools import partial
from typing import (
AsyncIterator,
TYPE_CHECKING,
)
import tractor
from tractor.trionics import (
maybe_open_context,
)
import pendulum
import numpy as np
from .types import Struct
from ._source import (
Symbol,
)
from ._sharedmem import (
attach_shm_array,
ShmArray,
_Token,
)
from ._sampling import (
open_sample_stream,
)
if TYPE_CHECKING:
from pyqtgraph import PlotItem
from .feed import Feed
# TODO: ideas for further abstractions as per
# https://github.com/pikers/piker/issues/216 and
# https://github.com/pikers/piker/issues/270:
# - a ``Cascade`` would be the minimal "connection" of 2 ``Flumes``
# as per circuit parlance:
# https://en.wikipedia.org/wiki/Two-port_network#Cascade_connection
# - could cover the combination of our `FspAdmin` and the
# backend `.fsp._engine` related machinery to "connect" one flume
# to another?
# - a (financial signal) ``Flow`` would be the a "collection" of such
# minmial cascades. Some engineering based jargon concepts:
# - https://en.wikipedia.org/wiki/Signal_chain
# - https://en.wikipedia.org/wiki/Daisy_chain_(electrical_engineering)
# - https://en.wikipedia.org/wiki/Audio_signal_flow
# - https://en.wikipedia.org/wiki/Digital_signal_processing#Implementation
# - https://en.wikipedia.org/wiki/Dataflow_programming
# - https://en.wikipedia.org/wiki/Signal_programming
# - https://en.wikipedia.org/wiki/Incremental_computing
class Flume(Struct):
'''
Composite reference type which points to all the addressing handles
and other meta-data necessary for the read, measure and management
of a set of real-time updated data flows.
Can be thought of as a "flow descriptor" or "flow frame" which
describes the high level properties of a set of data flows that can
be used seamlessly across process-memory boundaries.
Each instance's sub-components normally includes:
- a msg oriented quote stream provided via an IPC transport
- history and real-time shm buffers which are both real-time
updated and backfilled.
- associated startup indexing information related to both buffer
real-time-append and historical prepend addresses.
- low level APIs to read and measure the updated data and manage
queuing properties.
'''
symbol: Symbol
first_quote: dict
_rt_shm_token: _Token
# optional since some data flows won't have a "downsampled" history
# buffer/stream (eg. FSPs).
_hist_shm_token: _Token | None = None
# private shm refs loaded dynamically from tokens
_hist_shm: ShmArray | None = None
_rt_shm: ShmArray | None = None
stream: tractor.MsgStream | None = None
izero_hist: int = 0
izero_rt: int = 0
throttle_rate: int | None = None
# TODO: do we need this really if we can pull the `Portal` from
# ``tractor``'s internals?
feed: Feed | None = None
@property
def rt_shm(self) -> ShmArray:
if self._rt_shm is None:
self._rt_shm = attach_shm_array(
token=self._rt_shm_token,
readonly=True,
)
return self._rt_shm
@property
def hist_shm(self) -> ShmArray:
if self._hist_shm_token is None:
raise RuntimeError(
'No shm token has been set for the history buffer?'
)
if (
self._hist_shm is None
):
self._hist_shm = attach_shm_array(
token=self._hist_shm_token,
readonly=True,
)
return self._hist_shm
async def receive(self) -> dict:
return await self.stream.receive()
@acm
async def index_stream(
self,
delay_s: float = 1,
) -> AsyncIterator[int]:
if not self.feed:
raise RuntimeError('This flume is not part of any ``Feed``?')
# TODO: maybe a public (property) API for this in ``tractor``?
portal = self.stream._ctx._portal
assert portal
# XXX: this should be singleton on a host,
# a lone broker-daemon per provider should be
# created for all practical purposes
async with open_sample_stream(float(delay_s)) as stream:
yield stream
def get_ds_info(
self,
) -> tuple[float, float, float]:
'''
Compute the "downsampling" ratio info between the historical shm
buffer and the real-time (HFT) one.
Return a tuple of the fast sample period, historical sample
period and ratio between them.
'''
times = self.hist_shm.array['time']
end = pendulum.from_timestamp(times[-1])
start = pendulum.from_timestamp(times[times != times[-1]][-1])
hist_step_size_s = (end - start).seconds
times = self.rt_shm.array['time']
end = pendulum.from_timestamp(times[-1])
start = pendulum.from_timestamp(times[times != times[-1]][-1])
rt_step_size_s = (end - start).seconds
ratio = hist_step_size_s / rt_step_size_s
return (
rt_step_size_s,
hist_step_size_s,
ratio,
)
# TODO: get native msgspec decoding for these workinn
def to_msg(self) -> dict:
msg = self.to_dict()
msg['symbol'] = msg['symbol'].to_dict()
# can't serialize the stream or feed objects, it's expected
# you'll have a ref to it since this msg should be rxed on
# a stream on whatever far end IPC..
msg.pop('stream')
msg.pop('feed')
return msg
@classmethod
def from_msg(cls, msg: dict) -> dict:
symbol = Symbol(**msg.pop('symbol'))
return cls(
symbol=symbol,
**msg,
)
def get_index(
self,
time_s: float,
) -> int:
'''
Return array shm-buffer index for for epoch time.
'''
array = self.rt_shm.array
times = array['time']
mask = (times >= time_s)
if any(mask):
return array['index'][mask][0]
# just the latest index
array['index'][-1]
def slice_from_time(
self,
arr: np.ndarray,
start_t: float,
stop_t: float,
) -> tuple[
slice,
slice,
np.ndarray | None,
]:
'''
Slice an input struct array to a time range and return the absolute
and "readable" slices for that array as well as the indexing mask
for the caller to use to slice the input array if needed.
'''
times = arr['time']
index = arr['index']
if (
start_t < 0
or start_t >= stop_t
):
return (
slice(
index[0],
index[-1],
),
slice(
0,
len(arr),
),
None,
)
# use advanced indexing to map the
# time range to the index range.
mask: np.ndarray = (
(times >= start_t)
| # fml, i guess it's not an `&` ??
(times < stop_t)
)
# TODO: if we can ensure each time field has a uniform
# step we can instead do some arithmetic to determine
# the equivalent index like we used to?
# return array[
# lbar - ifirst:
# (rbar - ifirst) + 1
# ]
i_by_t = index[mask]
try:
i_0 = i_by_t[0]
except IndexError:
if (
start_t < times[0]
or stop_t >= times[-1]
):
return (
slice(
index[0],
index[-1],
),
slice(
0,
len(arr),
),
None,
)
abs_slc = slice(
i_0,
i_by_t[-1],
)
# slice data by offset from the first index
# available in the passed datum set.
read_slc = slice(
i_0 - index[0],
i_by_t[-1] - i_0,
)
# also return the readable data from the timerange
return (
abs_slc,
read_slc,
mask,
)
# TODO: maybe move this our ``Viz`` type to avoid
# the shm lookup discrepancy?
def view_data(
self,
plot: PlotItem,
timeframe_s: int = 1,
) -> np.ndarray:
'''
Return sliced-to-view source data along with absolute
(``ShmArray._array['index']``) and read-relative
(``ShmArray.array``) slices.
'''
# get far-side x-indices plot view
vr = plot.viewRect()
if timeframe_s > 1:
arr = self.hist_shm.array
else:
arr = self.rt_shm.array
(
abs_slc,
read_slc,
mask,
) = self.slice_from_time(
arr,
start_t=vr.left(),
stop_t=vr.right(),
)
return (
abs_slc,
read_slc,
arr[mask] if mask is not None else arr,
)