Move `Flume` to a new `.data._flumes` module
							parent
							
								
									e5e70a6011
								
							
						
					
					
						commit
						eacd44dd65
					
				|  | @ -0,0 +1,304 @@ | |||
| # piker: trading gear for hackers | ||||
| # Copyright (C) Tyler Goodlet (in stewardship for pikers) | ||||
| 
 | ||||
| # This program is free software: you can redistribute it and/or modify | ||||
| # it under the terms of the GNU Affero General Public License as published by | ||||
| # the Free Software Foundation, either version 3 of the License, or | ||||
| # (at your option) any later version. | ||||
| 
 | ||||
| # This program is distributed in the hope that it will be useful, | ||||
| # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| # GNU Affero General Public License for more details. | ||||
| 
 | ||||
| # You should have received a copy of the GNU Affero General Public License | ||||
| # along with this program.  If not, see <https://www.gnu.org/licenses/>. | ||||
| 
 | ||||
| """ | ||||
| Real-time data flow abstractions. | ||||
| 
 | ||||
| <writeup on "buffers, streams, flumes and flows.."> | ||||
| 
 | ||||
| """ | ||||
| from contextlib import asynccontextmanager as acm | ||||
| from functools import partial | ||||
| from typing import ( | ||||
|     AsyncIterator, | ||||
|     TYPE_CHECKING, | ||||
| ) | ||||
| 
 | ||||
| import tractor | ||||
| from tractor.trionics import ( | ||||
|     maybe_open_context, | ||||
| ) | ||||
| import pendulum | ||||
| import numpy as np | ||||
| 
 | ||||
| from .types import Struct | ||||
| from ._source import ( | ||||
|     Symbol, | ||||
| ) | ||||
| from ._sharedmem import ( | ||||
|     attach_shm_array, | ||||
|     ShmArray, | ||||
|     _Token, | ||||
| ) | ||||
| from ._sampling import ( | ||||
|     iter_ohlc_periods, | ||||
| ) | ||||
| 
 | ||||
| if TYPE_CHECKING: | ||||
|     from pyqtgraph import PlotItem | ||||
|     from .feed import Feed | ||||
| 
 | ||||
| 
 | ||||
| class Flume(Struct): | ||||
|     ''' | ||||
|     Composite reference type which points to all the addressing handles | ||||
|     and other meta-data necessary for the read, measure and management | ||||
|     of a set of real-time updated data flows. | ||||
| 
 | ||||
|     Can be thought of as a "flow descriptor" or "flow frame" which | ||||
|     describes the high level properties of a set of data flows that can | ||||
|     be used seamlessly across process-memory boundaries. | ||||
| 
 | ||||
|     Each instance's sub-components normally includes: | ||||
|      - a msg oriented quote stream provided via an IPC transport | ||||
|      - history and real-time shm buffers which are both real-time | ||||
|        updated and backfilled. | ||||
|      - associated startup indexing information related to both buffer | ||||
|        real-time-append and historical prepend addresses. | ||||
|      - low level APIs to read and measure the updated data and manage | ||||
|        queuing properties. | ||||
| 
 | ||||
|     ''' | ||||
|     symbol: Symbol | ||||
|     first_quote: dict | ||||
|     _hist_shm_token: _Token | ||||
|     _rt_shm_token: _Token | ||||
| 
 | ||||
|     # private shm refs loaded dynamically from tokens | ||||
|     _hist_shm: ShmArray | None = None | ||||
|     _rt_shm: ShmArray | None = None | ||||
| 
 | ||||
|     stream: tractor.MsgStream | None = None | ||||
|     izero_hist: int = 0 | ||||
|     izero_rt: int = 0 | ||||
|     throttle_rate: int | None = None | ||||
| 
 | ||||
|     # TODO: do we need this really if we can pull the `Portal` from | ||||
|     # ``tractor``'s internals? | ||||
|     feed: Feed | None = None | ||||
| 
 | ||||
|     @property | ||||
|     def rt_shm(self) -> ShmArray: | ||||
| 
 | ||||
|         if self._rt_shm is None: | ||||
|             self._rt_shm = attach_shm_array( | ||||
|                 token=self._rt_shm_token, | ||||
|                 readonly=True, | ||||
|             ) | ||||
| 
 | ||||
|         return self._rt_shm | ||||
| 
 | ||||
|     @property | ||||
|     def hist_shm(self) -> ShmArray: | ||||
| 
 | ||||
|         if self._hist_shm is None: | ||||
|             self._hist_shm = attach_shm_array( | ||||
|                 token=self._hist_shm_token, | ||||
|                 readonly=True, | ||||
|             ) | ||||
| 
 | ||||
|         return self._hist_shm | ||||
| 
 | ||||
|     async def receive(self) -> dict: | ||||
|         return await self.stream.receive() | ||||
| 
 | ||||
|     @acm | ||||
|     async def index_stream( | ||||
|         self, | ||||
|         delay_s: int = 1, | ||||
| 
 | ||||
|     ) -> AsyncIterator[int]: | ||||
| 
 | ||||
|         if not self.feed: | ||||
|             raise RuntimeError('This flume is not part of any ``Feed``?') | ||||
| 
 | ||||
|         # TODO: maybe a public (property) API for this in ``tractor``? | ||||
|         portal = self.stream._ctx._portal | ||||
|         assert portal | ||||
| 
 | ||||
|         # XXX: this should be singleton on a host, | ||||
|         # a lone broker-daemon per provider should be | ||||
|         # created for all practical purposes | ||||
|         async with maybe_open_context( | ||||
|             acm_func=partial( | ||||
|                 portal.open_context, | ||||
|                 iter_ohlc_periods, | ||||
|             ), | ||||
|             kwargs={'delay_s': delay_s}, | ||||
|         ) as (cache_hit, (ctx, first)): | ||||
|             async with ctx.open_stream() as istream: | ||||
|                 if cache_hit: | ||||
|                     # add a new broadcast subscription for the quote stream | ||||
|                     # if this feed is likely already in use | ||||
|                     async with istream.subscribe() as bistream: | ||||
|                         yield bistream | ||||
|                 else: | ||||
|                     yield istream | ||||
| 
 | ||||
|     def get_ds_info( | ||||
|         self, | ||||
|     ) -> tuple[float, float, float]: | ||||
|         ''' | ||||
|         Compute the "downsampling" ratio info between the historical shm | ||||
|         buffer and the real-time (HFT) one. | ||||
| 
 | ||||
|         Return a tuple of the fast sample period, historical sample | ||||
|         period and ratio between them. | ||||
| 
 | ||||
|         ''' | ||||
|         times = self.hist_shm.array['time'] | ||||
|         end = pendulum.from_timestamp(times[-1]) | ||||
|         start = pendulum.from_timestamp(times[times != times[-1]][-1]) | ||||
|         hist_step_size_s = (end - start).seconds | ||||
| 
 | ||||
|         times = self.rt_shm.array['time'] | ||||
|         end = pendulum.from_timestamp(times[-1]) | ||||
|         start = pendulum.from_timestamp(times[times != times[-1]][-1]) | ||||
|         rt_step_size_s = (end - start).seconds | ||||
| 
 | ||||
|         ratio = hist_step_size_s / rt_step_size_s | ||||
|         return ( | ||||
|             rt_step_size_s, | ||||
|             hist_step_size_s, | ||||
|             ratio, | ||||
|         ) | ||||
| 
 | ||||
|     # TODO: get native msgspec decoding for these workinn | ||||
|     def to_msg(self) -> dict: | ||||
|         msg = self.to_dict() | ||||
|         msg['symbol'] = msg['symbol'].to_dict() | ||||
| 
 | ||||
|         # can't serialize the stream or feed objects, it's expected | ||||
|         # you'll have a ref to it since this msg should be rxed on | ||||
|         # a stream on whatever far end IPC.. | ||||
|         msg.pop('stream') | ||||
|         msg.pop('feed') | ||||
|         return msg | ||||
| 
 | ||||
|     @classmethod | ||||
|     def from_msg(cls, msg: dict) -> dict: | ||||
|         symbol = Symbol(**msg.pop('symbol')) | ||||
|         return cls( | ||||
|             symbol=symbol, | ||||
|             **msg, | ||||
|         ) | ||||
| 
 | ||||
|     def get_index( | ||||
|         self, | ||||
|         time_s: float, | ||||
| 
 | ||||
|     ) -> int: | ||||
|         ''' | ||||
|         Return array shm-buffer index for for epoch time. | ||||
| 
 | ||||
|         ''' | ||||
|         array = self.rt_shm.array | ||||
|         times = array['time'] | ||||
|         mask = (times >= time_s) | ||||
| 
 | ||||
|         if any(mask): | ||||
|             return array['index'][mask][0] | ||||
| 
 | ||||
|         # just the latest index | ||||
|         array['index'][-1] | ||||
| 
 | ||||
|     def slice_from_time( | ||||
|         self, | ||||
|         array: np.ndarray, | ||||
|         start_t: float, | ||||
|         stop_t: float, | ||||
|         timeframe_s: int = 1, | ||||
|         return_data: bool = False, | ||||
| 
 | ||||
|     ) -> np.ndarray: | ||||
|         ''' | ||||
|         Slice an input struct array providing only datums | ||||
|         "in view" of this chart. | ||||
| 
 | ||||
|         ''' | ||||
|         arr = { | ||||
|             1: self.rt_shm.array, | ||||
|             60: self.hist_shm.arry, | ||||
|         }[timeframe_s] | ||||
| 
 | ||||
|         times = arr['time'] | ||||
|         index = array['index'] | ||||
| 
 | ||||
|         # use advanced indexing to map the | ||||
|         # time range to the index range. | ||||
|         mask = ( | ||||
|             (times >= start_t) | ||||
|             & | ||||
|             (times < stop_t) | ||||
|         ) | ||||
| 
 | ||||
|         # TODO: if we can ensure each time field has a uniform | ||||
|         # step we can instead do some arithmetic to determine | ||||
|         # the equivalent index like we used to? | ||||
|         # return array[ | ||||
|         #     lbar - ifirst: | ||||
|         #     (rbar - ifirst) + 1 | ||||
|         # ] | ||||
| 
 | ||||
|         i_by_t = index[mask] | ||||
|         i_0 = i_by_t[0] | ||||
| 
 | ||||
|         abs_slc = slice( | ||||
|             i_0, | ||||
|             i_by_t[-1], | ||||
|         ) | ||||
|         # slice data by offset from the first index | ||||
|         # available in the passed datum set. | ||||
|         read_slc = slice( | ||||
|             0, | ||||
|             i_by_t[-1] - i_0, | ||||
|         ) | ||||
|         if not return_data: | ||||
|             return ( | ||||
|                 abs_slc, | ||||
|                 read_slc, | ||||
|             ) | ||||
| 
 | ||||
|         # also return the readable data from the timerange | ||||
|         return ( | ||||
|             abs_slc, | ||||
|             read_slc, | ||||
|             arr[mask], | ||||
|         ) | ||||
| 
 | ||||
|     def view_data( | ||||
|         self, | ||||
|         plot: PlotItem, | ||||
|         timeframe_s: int = 1, | ||||
| 
 | ||||
|     ) -> np.ndarray: | ||||
| 
 | ||||
|         # get far-side x-indices plot view | ||||
|         vr = plot.viewRect() | ||||
|         l = vr.left() | ||||
|         r = vr.right() | ||||
| 
 | ||||
|         ( | ||||
|             abs_slc, | ||||
|             buf_slc, | ||||
|             iv_arr, | ||||
|         ) = self.slice_from_time( | ||||
|             start_t=l, | ||||
|             stop_t=r, | ||||
|             timeframe_s=timeframe_s, | ||||
|             return_data=True, | ||||
|         ) | ||||
|         return iv_arr | ||||
|  | @ -29,7 +29,6 @@ import time | |||
| from types import ModuleType | ||||
| from typing import ( | ||||
|     Any, | ||||
|     AsyncIterator, | ||||
|     AsyncContextManager, | ||||
|     Callable, | ||||
|     Optional, | ||||
|  | @ -57,11 +56,10 @@ from .._daemon import ( | |||
|     maybe_spawn_brokerd, | ||||
|     check_for_service, | ||||
| ) | ||||
| from ._flumes import Flume | ||||
| from ._sharedmem import ( | ||||
|     maybe_open_shm_array, | ||||
|     attach_shm_array, | ||||
|     ShmArray, | ||||
|     _Token, | ||||
|     _secs_in_day, | ||||
| ) | ||||
| from .ingest import get_ingestormod | ||||
|  | @ -76,7 +74,6 @@ from ._sampling import ( | |||
|     sampler, | ||||
|     broadcast, | ||||
|     increment_ohlc_buffer, | ||||
|     iter_ohlc_periods, | ||||
|     sample_and_broadcast, | ||||
|     uniform_rate_send, | ||||
|     _default_delay_s, | ||||
|  | @ -87,7 +84,6 @@ from ..brokers._util import ( | |||
| 
 | ||||
| if TYPE_CHECKING: | ||||
|     from .marketstore import Storage | ||||
|     from pyqtgraph import PlotItem | ||||
| 
 | ||||
| log = get_logger(__name__) | ||||
| 
 | ||||
|  | @ -894,258 +890,6 @@ async def manage_history( | |||
|         await trio.sleep_forever() | ||||
| 
 | ||||
| 
 | ||||
| class Flume(Struct): | ||||
|     ''' | ||||
|     Composite reference type which points to all the addressing handles | ||||
|     and other meta-data necessary for the read, measure and management | ||||
|     of a set of real-time updated data flows. | ||||
| 
 | ||||
|     Can be thought of as a "flow descriptor" or "flow frame" which | ||||
|     describes the high level properties of a set of data flows that can | ||||
|     be used seamlessly across process-memory boundaries. | ||||
| 
 | ||||
|     Each instance's sub-components normally includes: | ||||
|      - a msg oriented quote stream provided via an IPC transport | ||||
|      - history and real-time shm buffers which are both real-time | ||||
|        updated and backfilled. | ||||
|      - associated startup indexing information related to both buffer | ||||
|        real-time-append and historical prepend addresses. | ||||
|      - low level APIs to read and measure the updated data and manage | ||||
|        queuing properties. | ||||
| 
 | ||||
|     ''' | ||||
|     symbol: Symbol | ||||
|     first_quote: dict | ||||
|     _hist_shm_token: _Token | ||||
|     _rt_shm_token: _Token | ||||
| 
 | ||||
|     # private shm refs loaded dynamically from tokens | ||||
|     _hist_shm: ShmArray | None = None | ||||
|     _rt_shm: ShmArray | None = None | ||||
| 
 | ||||
|     stream: tractor.MsgStream | None = None | ||||
|     izero_hist: int = 0 | ||||
|     izero_rt: int = 0 | ||||
|     throttle_rate: int | None = None | ||||
| 
 | ||||
|     # TODO: do we need this really if we can pull the `Portal` from | ||||
|     # ``tractor``'s internals? | ||||
|     feed: Feed | None = None | ||||
| 
 | ||||
|     @property | ||||
|     def rt_shm(self) -> ShmArray: | ||||
| 
 | ||||
|         if self._rt_shm is None: | ||||
|             self._rt_shm = attach_shm_array( | ||||
|                 token=self._rt_shm_token, | ||||
|                 readonly=True, | ||||
|             ) | ||||
| 
 | ||||
|         return self._rt_shm | ||||
| 
 | ||||
|     @property | ||||
|     def hist_shm(self) -> ShmArray: | ||||
| 
 | ||||
|         if self._hist_shm is None: | ||||
|             self._hist_shm = attach_shm_array( | ||||
|                 token=self._hist_shm_token, | ||||
|                 readonly=True, | ||||
|             ) | ||||
| 
 | ||||
|         return self._hist_shm | ||||
| 
 | ||||
|     async def receive(self) -> dict: | ||||
|         return await self.stream.receive() | ||||
| 
 | ||||
|     @acm | ||||
|     async def index_stream( | ||||
|         self, | ||||
|         delay_s: int = 1, | ||||
| 
 | ||||
|     ) -> AsyncIterator[int]: | ||||
| 
 | ||||
|         if not self.feed: | ||||
|             raise RuntimeError('This flume is not part of any ``Feed``?') | ||||
| 
 | ||||
|         # TODO: maybe a public (property) API for this in ``tractor``? | ||||
|         portal = self.stream._ctx._portal | ||||
|         assert portal | ||||
| 
 | ||||
|         # XXX: this should be singleton on a host, | ||||
|         # a lone broker-daemon per provider should be | ||||
|         # created for all practical purposes | ||||
|         async with maybe_open_context( | ||||
|             acm_func=partial( | ||||
|                 portal.open_context, | ||||
|                 iter_ohlc_periods, | ||||
|             ), | ||||
|             kwargs={'delay_s': delay_s}, | ||||
|         ) as (cache_hit, (ctx, first)): | ||||
|             async with ctx.open_stream() as istream: | ||||
|                 if cache_hit: | ||||
|                     # add a new broadcast subscription for the quote stream | ||||
|                     # if this feed is likely already in use | ||||
|                     async with istream.subscribe() as bistream: | ||||
|                         yield bistream | ||||
|                 else: | ||||
|                     yield istream | ||||
| 
 | ||||
|     def get_ds_info( | ||||
|         self, | ||||
|     ) -> tuple[float, float, float]: | ||||
|         ''' | ||||
|         Compute the "downsampling" ratio info between the historical shm | ||||
|         buffer and the real-time (HFT) one. | ||||
| 
 | ||||
|         Return a tuple of the fast sample period, historical sample | ||||
|         period and ratio between them. | ||||
| 
 | ||||
|         ''' | ||||
|         times = self.hist_shm.array['time'] | ||||
|         end = pendulum.from_timestamp(times[-1]) | ||||
|         start = pendulum.from_timestamp(times[times != times[-1]][-1]) | ||||
|         hist_step_size_s = (end - start).seconds | ||||
| 
 | ||||
|         times = self.rt_shm.array['time'] | ||||
|         end = pendulum.from_timestamp(times[-1]) | ||||
|         start = pendulum.from_timestamp(times[times != times[-1]][-1]) | ||||
|         rt_step_size_s = (end - start).seconds | ||||
| 
 | ||||
|         ratio = hist_step_size_s / rt_step_size_s | ||||
|         return ( | ||||
|             rt_step_size_s, | ||||
|             hist_step_size_s, | ||||
|             ratio, | ||||
|         ) | ||||
| 
 | ||||
|     # TODO: get native msgspec decoding for these workinn | ||||
|     def to_msg(self) -> dict: | ||||
|         msg = self.to_dict() | ||||
|         msg['symbol'] = msg['symbol'].to_dict() | ||||
| 
 | ||||
|         # can't serialize the stream or feed objects, it's expected | ||||
|         # you'll have a ref to it since this msg should be rxed on | ||||
|         # a stream on whatever far end IPC.. | ||||
|         msg.pop('stream') | ||||
|         msg.pop('feed') | ||||
|         return msg | ||||
| 
 | ||||
|     @classmethod | ||||
|     def from_msg(cls, msg: dict) -> dict: | ||||
|         symbol = Symbol(**msg.pop('symbol')) | ||||
|         return cls( | ||||
|             symbol=symbol, | ||||
|             **msg, | ||||
|         ) | ||||
| 
 | ||||
|     def get_index( | ||||
|         self, | ||||
|         time_s: float, | ||||
| 
 | ||||
|     ) -> int: | ||||
|         ''' | ||||
|         Return array shm-buffer index for for epoch time. | ||||
| 
 | ||||
|         ''' | ||||
|         array = self.rt_shm.array | ||||
|         times = array['time'] | ||||
|         mask = (times >= time_s) | ||||
| 
 | ||||
|         if any(mask): | ||||
|             return array['index'][mask][0] | ||||
| 
 | ||||
|         # just the latest index | ||||
|         array['index'][-1] | ||||
| 
 | ||||
|     def slice_from_time( | ||||
|         self, | ||||
|         array: np.ndarray, | ||||
|         start_t: float, | ||||
|         stop_t: float, | ||||
|         timeframe_s: int = 1, | ||||
|         return_data: bool = False, | ||||
| 
 | ||||
|     ) -> np.ndarray: | ||||
|         ''' | ||||
|         Slice an input struct array providing only datums | ||||
|         "in view" of this chart. | ||||
| 
 | ||||
|         ''' | ||||
|         arr = { | ||||
|             1: self.rt_shm.array, | ||||
|             60: self.hist_shm.arry, | ||||
|         }[timeframe_s] | ||||
| 
 | ||||
|         times = arr['time'] | ||||
|         index = array['index'] | ||||
| 
 | ||||
|         # use advanced indexing to map the | ||||
|         # time range to the index range. | ||||
|         mask = ( | ||||
|             (times >= start_t) | ||||
|             & | ||||
|             (times < stop_t) | ||||
|         ) | ||||
| 
 | ||||
|         # TODO: if we can ensure each time field has a uniform | ||||
|         # step we can instead do some arithmetic to determine | ||||
|         # the equivalent index like we used to? | ||||
|         # return array[ | ||||
|         #     lbar - ifirst: | ||||
|         #     (rbar - ifirst) + 1 | ||||
|         # ] | ||||
| 
 | ||||
|         i_by_t = index[mask] | ||||
|         i_0 = i_by_t[0] | ||||
| 
 | ||||
|         abs_slc = slice( | ||||
|             i_0, | ||||
|             i_by_t[-1], | ||||
|         ) | ||||
|         # slice data by offset from the first index | ||||
|         # available in the passed datum set. | ||||
|         read_slc = slice( | ||||
|             0, | ||||
|             i_by_t[-1] - i_0, | ||||
|         ) | ||||
|         if not return_data: | ||||
|             return ( | ||||
|                 abs_slc, | ||||
|                 read_slc, | ||||
|             ) | ||||
| 
 | ||||
|         # also return the readable data from the timerange | ||||
|         return ( | ||||
|             abs_slc, | ||||
|             read_slc, | ||||
|             arr[mask], | ||||
|         ) | ||||
| 
 | ||||
|     def view_data( | ||||
|         self, | ||||
|         plot: PlotItem, | ||||
|         timeframe_s: int = 1, | ||||
| 
 | ||||
|     ) -> np.ndarray: | ||||
| 
 | ||||
|         # get far-side x-indices plot view | ||||
|         vr = plot.viewRect() | ||||
|         l = vr.left() | ||||
|         r = vr.right() | ||||
| 
 | ||||
|         ( | ||||
|             abs_slc, | ||||
|             buf_slc, | ||||
|             iv_arr, | ||||
|         ) = self.slice_from_time( | ||||
|             start_t=l, | ||||
|             stop_t=r, | ||||
|             timeframe_s=timeframe_s, | ||||
|             return_data=True, | ||||
|         ) | ||||
|         return iv_arr | ||||
| 
 | ||||
| 
 | ||||
| async def allocate_persistent_feed( | ||||
|     bus: _FeedsBus, | ||||
|     sub_registered: trio.Event, | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue