Unify backfilling logic into common task-routine
							parent
							
								
									c5ed9b5955
								
							
						
					
					
						commit
						f7b3215aa4
					
				| 
						 | 
					@ -32,7 +32,6 @@ from typing import (
 | 
				
			||||||
    Awaitable,
 | 
					    Awaitable,
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import pendulum
 | 
					 | 
				
			||||||
import trio
 | 
					import trio
 | 
				
			||||||
from trio.abc import ReceiveChannel
 | 
					from trio.abc import ReceiveChannel
 | 
				
			||||||
from trio_typing import TaskStatus
 | 
					from trio_typing import TaskStatus
 | 
				
			||||||
| 
						 | 
					@ -194,20 +193,97 @@ async def _setup_persistent_brokerd(
 | 
				
			||||||
        await trio.sleep_forever()
 | 
					        await trio.sleep_forever()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def diff_history(
 | 
				
			||||||
 | 
					    array,
 | 
				
			||||||
 | 
					    start_dt,
 | 
				
			||||||
 | 
					    end_dt,
 | 
				
			||||||
 | 
					    last_tsdb_dt: Optional[datetime] = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					) -> np.ndarray:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if last_tsdb_dt:
 | 
				
			||||||
 | 
					        s_diff = (last_tsdb_dt - start_dt).seconds
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # if we detect a partial frame's worth of data
 | 
				
			||||||
 | 
					        # that is new, slice out only that history and
 | 
				
			||||||
 | 
					        # write to shm.
 | 
				
			||||||
 | 
					        if s_diff > 0:
 | 
				
			||||||
 | 
					            assert last_tsdb_dt > start_dt
 | 
				
			||||||
 | 
					            selected = array['time'] > last_tsdb_dt.timestamp()
 | 
				
			||||||
 | 
					            to_push = array[selected]
 | 
				
			||||||
 | 
					            log.info(
 | 
				
			||||||
 | 
					                f'Pushing partial frame {to_push.size} to shm'
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					            return to_push
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return array
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
async def start_backfill(
 | 
					async def start_backfill(
 | 
				
			||||||
    mod: ModuleType,
 | 
					    mod: ModuleType,
 | 
				
			||||||
    fqsn: str,
 | 
					    bfqsn: str,
 | 
				
			||||||
    shm: ShmArray,
 | 
					    shm: ShmArray,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    last_tsdb_dt: Optional[datetime] = None,
 | 
				
			||||||
 | 
					    do_legacy: bool = False,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED,
 | 
					    task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
) -> int:
 | 
					) -> int:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return await mod.backfill_bars(
 | 
					    if do_legacy:
 | 
				
			||||||
        fqsn,
 | 
					        return await mod.backfill_bars(
 | 
				
			||||||
        shm,
 | 
					            bfqsn,
 | 
				
			||||||
        task_status=task_status,
 | 
					            shm,
 | 
				
			||||||
    )
 | 
					            task_status=task_status,
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    async with mod.open_history_client(bfqsn) as hist:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # get latest query's worth of history all the way
 | 
				
			||||||
 | 
					        # back to what is recorded in the tsdb
 | 
				
			||||||
 | 
					        array, start_dt, end_dt = await hist(end_dt=None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        to_push = diff_history(
 | 
				
			||||||
 | 
					            array,
 | 
				
			||||||
 | 
					            start_dt,
 | 
				
			||||||
 | 
					            end_dt,
 | 
				
			||||||
 | 
					            last_tsdb_dt=last_tsdb_dt,
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        log.info(f'Pushing {to_push.size} to shm!')
 | 
				
			||||||
 | 
					        shm.push(to_push)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for delay_s in sampler.subscribers:
 | 
				
			||||||
 | 
					            await broadcast(delay_s)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # let caller unblock and deliver latest history frame
 | 
				
			||||||
 | 
					        task_status.started(shm)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # pull new history frames until we hit latest
 | 
				
			||||||
 | 
					        # already in the tsdb
 | 
				
			||||||
 | 
					        # while start_dt > last_tsdb_dt:
 | 
				
			||||||
 | 
					        while True:
 | 
				
			||||||
 | 
					            array, start_dt, end_dt = await hist(end_dt=start_dt)
 | 
				
			||||||
 | 
					            to_push = diff_history(
 | 
				
			||||||
 | 
					                array,
 | 
				
			||||||
 | 
					                start_dt,
 | 
				
			||||||
 | 
					                end_dt,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                # last_tsdb_dt=last_tsdb_dt,
 | 
				
			||||||
 | 
					                # XXX: hacky, just run indefinitely
 | 
				
			||||||
 | 
					                last_tsdb_dt=None,
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					            log.info(f'Pushing {to_push.size} to shm!')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # bail on shm allocation overrun
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                shm.push(to_push, prepend=True)
 | 
				
			||||||
 | 
					            except ValueError:
 | 
				
			||||||
 | 
					                break
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            for delay_s in sampler.subscribers:
 | 
				
			||||||
 | 
					                await broadcast(delay_s)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
async def manage_history(
 | 
					async def manage_history(
 | 
				
			||||||
| 
						 | 
					@ -251,127 +327,59 @@ async def manage_history(
 | 
				
			||||||
    # for now only do backfilling if no tsdb can be found
 | 
					    # for now only do backfilling if no tsdb can be found
 | 
				
			||||||
    do_legacy_backfill = not is_up and opened
 | 
					    do_legacy_backfill = not is_up and opened
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    open_history_client = getattr(mod, 'open_history_client', None)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    bfqsn = fqsn.replace('.' + mod.name, '')
 | 
					    bfqsn = fqsn.replace('.' + mod.name, '')
 | 
				
			||||||
 | 
					    open_history_client = getattr(mod, 'open_history_client', None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if is_up and opened and open_history_client:
 | 
					    if is_up and opened and open_history_client:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        log.info('Found existing `marketstored`')
 | 
					        log.info('Found existing `marketstored`')
 | 
				
			||||||
        from . import marketstore
 | 
					        from . import marketstore
 | 
				
			||||||
 | 
					 | 
				
			||||||
        async with marketstore.open_storage_client(
 | 
					        async with marketstore.open_storage_client(
 | 
				
			||||||
            fqsn,
 | 
					            fqsn,
 | 
				
			||||||
        ) as storage:
 | 
					        ) as storage:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            tsdb_arrays = await storage.read_ohlcv(fqsn)
 | 
					            # TODO: this should be used verbatim for the pure
 | 
				
			||||||
 | 
					            # shm backfiller approach below.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if not tsdb_arrays:
 | 
					            # start history anal and load missing new data via backend.
 | 
				
			||||||
                do_legacy_backfill = True
 | 
					            series, first_dt, last_dt = await storage.load(fqsn)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            else:
 | 
					            broker, symbol, expiry = unpack_fqsn(fqsn)
 | 
				
			||||||
                log.info(f'Loaded tsdb history {tsdb_arrays}')
 | 
					            await bus.nursery.start(
 | 
				
			||||||
 | 
					                partial(
 | 
				
			||||||
                fastest = list(tsdb_arrays.values())[0]
 | 
					                    start_backfill,
 | 
				
			||||||
                times = fastest['Epoch']
 | 
					                    mod,
 | 
				
			||||||
                first, last = times[0], times[-1]
 | 
					                    bfqsn,
 | 
				
			||||||
                first_tsdb_dt, last_tsdb_dt = map(
 | 
					                    shm,
 | 
				
			||||||
                    pendulum.from_timestamp, [first, last]
 | 
					                    last_tsdb_dt=last_dt,
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					            task_status.started(shm)
 | 
				
			||||||
 | 
					            some_data_ready.set()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                # TODO: this should be used verbatim for the pure
 | 
					            # TODO: see if there's faster multi-field reads:
 | 
				
			||||||
                # shm backfiller approach below.
 | 
					            # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields
 | 
				
			||||||
 | 
					            # re-index  with a `time` and index field
 | 
				
			||||||
 | 
					            history = list(series.values())
 | 
				
			||||||
 | 
					            if history:
 | 
				
			||||||
 | 
					                fastest = history[0]
 | 
				
			||||||
 | 
					                shm.push(
 | 
				
			||||||
 | 
					                    fastest[-shm._first.value:],
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                def diff_history(
 | 
					                    # insert the history pre a "days worth" of samples
 | 
				
			||||||
                    array,
 | 
					                    # to leave some real-time buffer space at the end.
 | 
				
			||||||
                    start_dt,
 | 
					                    prepend=True,
 | 
				
			||||||
                    end_dt,
 | 
					                    # start=shm._len - _secs_in_day,
 | 
				
			||||||
                    last_tsdb_dt: Optional[datetime] = None
 | 
					                    field_map={
 | 
				
			||||||
 | 
					                        'Epoch': 'time',
 | 
				
			||||||
                ) -> np.ndarray:
 | 
					                        'Open': 'open',
 | 
				
			||||||
 | 
					                        'High': 'high',
 | 
				
			||||||
                    if last_tsdb_dt:
 | 
					                        'Low': 'low',
 | 
				
			||||||
                        s_diff = (last_tsdb_dt - start_dt).seconds
 | 
					                        'Close': 'close',
 | 
				
			||||||
 | 
					                        'Volume': 'volume',
 | 
				
			||||||
                        # if we detect a partial frame's worth of data
 | 
					                    },
 | 
				
			||||||
                        # that is new, slice out only that history and
 | 
					                )
 | 
				
			||||||
                        # write to shm.
 | 
					                # TODO: write new data to tsdb to be ready to for next read.
 | 
				
			||||||
                        if s_diff > 0:
 | 
					 | 
				
			||||||
                            assert last_tsdb_dt > start_dt
 | 
					 | 
				
			||||||
                            selected = array['time'] > last_tsdb_dt.timestamp()
 | 
					 | 
				
			||||||
                            to_push = array[selected]
 | 
					 | 
				
			||||||
                            log.info(
 | 
					 | 
				
			||||||
                                f'Pushing partial frame {to_push.size} to shm'
 | 
					 | 
				
			||||||
                            )
 | 
					 | 
				
			||||||
                            return to_push
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    return array
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                # start history anal and load missing new data via backend.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                broker, symbol, expiry = unpack_fqsn(fqsn)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                async with open_history_client(bfqsn) as hist:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    # get latest query's worth of history all the way
 | 
					 | 
				
			||||||
                    # back to what is recorded in the tsdb
 | 
					 | 
				
			||||||
                    array, start_dt, end_dt = await hist(end_dt=None)
 | 
					 | 
				
			||||||
                    to_push = diff_history(
 | 
					 | 
				
			||||||
                        array,
 | 
					 | 
				
			||||||
                        start_dt,
 | 
					 | 
				
			||||||
                        end_dt,
 | 
					 | 
				
			||||||
                        last_tsdb_dt=last_tsdb_dt,
 | 
					 | 
				
			||||||
                    )
 | 
					 | 
				
			||||||
                    log.info(f'Pushing {to_push.size} to shm!')
 | 
					 | 
				
			||||||
                    shm.push(to_push)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    for delay_s in sampler.subscribers:
 | 
					 | 
				
			||||||
                        await broadcast(delay_s)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    # let caller unblock and deliver latest history frame
 | 
					 | 
				
			||||||
                    task_status.started(shm)
 | 
					 | 
				
			||||||
                    some_data_ready.set()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    # pull new history frames until we hit latest
 | 
					 | 
				
			||||||
                    # already in the tsdb
 | 
					 | 
				
			||||||
                    # while start_dt > last_tsdb_dt:
 | 
					 | 
				
			||||||
                    while True:
 | 
					 | 
				
			||||||
                        array, start_dt, end_dt = await hist(end_dt=start_dt)
 | 
					 | 
				
			||||||
                        to_push = diff_history(
 | 
					 | 
				
			||||||
                            array,
 | 
					 | 
				
			||||||
                            start_dt,
 | 
					 | 
				
			||||||
                            end_dt,
 | 
					 | 
				
			||||||
                            # last_tsdb_dt=last_tsdb_dt,
 | 
					 | 
				
			||||||
                            # just run indefinitely
 | 
					 | 
				
			||||||
                            last_tsdb_dt=None,
 | 
					 | 
				
			||||||
                        )
 | 
					 | 
				
			||||||
                        log.info(f'Pushing {to_push.size} to shm!')
 | 
					 | 
				
			||||||
                        shm.push(to_push, prepend=True)
 | 
					 | 
				
			||||||
                        for delay_s in sampler.subscribers:
 | 
					 | 
				
			||||||
                            await broadcast(delay_s)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    # TODO: see if there's faster multi-field reads:
 | 
					 | 
				
			||||||
                    # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields
 | 
					 | 
				
			||||||
                    # re-index  with a `time` and index field
 | 
					 | 
				
			||||||
                    shm.push(
 | 
					 | 
				
			||||||
                        fastest[-shm._first.value:],
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                        # insert the history pre a "days worth" of samples
 | 
					 | 
				
			||||||
                        # to leave some real-time buffer space at the end.
 | 
					 | 
				
			||||||
                        prepend=True,
 | 
					 | 
				
			||||||
                        # start=shm._len - _secs_in_day,
 | 
					 | 
				
			||||||
                        field_map={
 | 
					 | 
				
			||||||
                            'Epoch': 'time',
 | 
					 | 
				
			||||||
                            'Open': 'open',
 | 
					 | 
				
			||||||
                            'High': 'high',
 | 
					 | 
				
			||||||
                            'Low': 'low',
 | 
					 | 
				
			||||||
                            'Close': 'close',
 | 
					 | 
				
			||||||
                            'Volume': 'volume',
 | 
					 | 
				
			||||||
                        },
 | 
					 | 
				
			||||||
                    )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    # TODO: write new data to tsdb to be ready to for next
 | 
					 | 
				
			||||||
                    # read.
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if do_legacy_backfill:
 | 
					    if do_legacy_backfill:
 | 
				
			||||||
        # do a legacy incremental backfill from the provider.
 | 
					        # do a legacy incremental backfill from the provider.
 | 
				
			||||||
| 
						 | 
					@ -385,6 +393,7 @@ async def manage_history(
 | 
				
			||||||
            mod,
 | 
					            mod,
 | 
				
			||||||
            bfqsn,
 | 
					            bfqsn,
 | 
				
			||||||
            shm,
 | 
					            shm,
 | 
				
			||||||
 | 
					            do_legacy=True,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # yield back after client connect with filled shm
 | 
					        # yield back after client connect with filled shm
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue