Compare commits

...

4 Commits

Author SHA1 Message Date
Gud Boi ce1f038b53 .tsp._history: add gap detection in backfill loop
Add frame-gap detection when `frame_last_dt < end_dt_param` to
warn about potential venue closures or missing data during the
backfill loop in `start_backfill()`.

Deats,
- add `frame_last_dt < end_dt_param` check after frame recv
- log warnings with EST-converted timestamps for clarity
- add `await tractor.pause()` for REPL-investigation on gaps
- add TODO comment about venue closure hour checking
- capture `_until_was_none` walrus var for null-check clarity
- add `last_time` assertion for `time[-1] == next_end_dt`
- rename `_daterr` to `nodata` with `_nodata` capture

Also,
- import `pendulum.timezone` and create `est` tz instance
- change `get_logger()` import from `.data._util` to `.log`
- add parens around `(next_prepend_index - ln) < 0` check

(this commit msg was generated in some part by [`claude-code`][claude-code-gh])
[claude-code-gh]: https://github.com/anthropics/claude-code
2026-02-05 17:48:52 -05:00
Gud Boi f502851999 Use `.ib.venues.is_venue_open()` in `.feed`
In `.ib.feed.stream_quotes()` specifically that is since time-range
checking code was moved to the new sub-mod.

Deats,
- drop import of old `is_current_time_in_range()` from `._util`
- change `get_bars()` sig: `end_dt`/`start_dt` to `datetime|None`
- comment-out `breakpoint()` in `open_history_client()`

Styling,
- add multiline style to conditionals and tuple unpacks
- fix type annotation: `Contract|None` vs `Contract | None`
- fix backticks in comment: `ib_insync` vs `ib_async`

(this commit msg was generated in some part by [`claude-code`][claude-code-gh])
[claude-code-gh]: https://github.com/anthropics/claude-code
2026-02-05 17:44:06 -05:00
Gud Boi ba575d93ea Add venue-closure gap-detection in `.ib.api.Client.bars()`
With all detection logic coming from our new `.ib.venues` helpers
allowing use to verify IB's OHLC bars frames don't contain unexpected
time-gaps.

`Client.bars()` new checking deats,
- add `is_venue_open()`, `has_weekend()`, `sesh_times()`, and
  `is_venue_closure()` checks when `last_dt < end_dt`
- always calc gap-period in local tz via `ContractDetails.timeZoneId`.
- log warnings on invalid non-closure gaps, debug on closures for now.
- change recursion case to just `log.error()` + `breakpoint()`; we might end
  up tossing it since i don't think i could ever get it to be reliable..
  * mask-out recursive `.bars()` call (likely unnecessary).
- flip `start_dt`/`end_dt` param defaults to `None` vs epoch `str`.
- update docstring to clarify no `start_dt` support by IB
- add mod level `_iso8601_epoch_in_est` const to keep track of orig
  param default value.
- add multiline style to return type-annot, type all `pendulum` objects.

Also,
- uppercase `Crypto.symbol` for PAXOS contracts in `.find_contracts()`,
  tho now we're getting a weird new API error i left in a todo-comment..

(this commit msg was generated in some part by [`claude-code`][claude-code-gh])
[claude-code-gh]: https://github.com/anthropics/claude-code
2026-02-05 17:40:37 -05:00
Gud Boi f484726a8c Mv `parse_trading_hours()` from `._util` to `.venues`
It was an AI-model draft that we can prolly toss but figured might as
well org it appropriately.

(this commit msg was generated in some part by [`claude-code`][claude-code-gh])
[claude-code-gh]: https://github.com/anthropics/claude-code
2026-02-05 16:51:26 -05:00
5 changed files with 287 additions and 166 deletions

View File

@ -326,7 +326,6 @@ def i3ipc_fin_wins_titled(
)
def i3ipc_xdotool_manual_click_hack() -> None:
'''
Do the data reset hack but expecting a local X-window using `xdotool`.
@ -388,99 +387,3 @@ def i3ipc_xdotool_manual_click_hack() -> None:
])
except subprocess.TimeoutExpired:
log.exception('xdotool timed out?')
def is_current_time_in_range(
start_dt: datetime,
end_dt: datetime,
) -> bool:
'''
Check if current time is within the datetime range.
Use any/the-same timezone as provided by `start_dt.tzinfo` value
in the range.
'''
now: datetime = datetime.now(start_dt.tzinfo)
return start_dt <= now <= end_dt
# TODO, put this into `._util` and call it from here!
#
# NOTE, this was generated by @guille from a gpt5 prompt
# and was originally thot to be needed before learning about
# `ib_insync.contract.ContractDetails._parseSessions()` and
# it's downstream meths..
#
# This is still likely useful to keep for now to parse the
# `.tradingHours: str` value manually if we ever decide
# to move off `ib_async` and implement our own `trio`/`anyio`
# based version Bp
#
# >attempt to parse the retarted ib "time stampy thing" they
# >do for "venue hours" with this.. written by
# >gpt5-"thinking",
#
def parse_trading_hours(
spec: str,
tz: TzInfo|None = None
) -> dict[
date,
tuple[datetime, datetime]
]|None:
'''
Parse venue hours like:
'YYYYMMDD:HHMM-YYYYMMDD:HHMM;YYYYMMDD:CLOSED;...'
Returns `dict[date] = (open_dt, close_dt)` or `None` if
closed.
'''
if (
not isinstance(spec, str)
or
not spec
):
raise ValueError('spec must be a non-empty string')
out: dict[
date,
tuple[datetime, datetime]
]|None = {}
for part in (p.strip() for p in spec.split(';') if p.strip()):
if part.endswith(':CLOSED'):
day_s, _ = part.split(':', 1)
d = datetime.strptime(day_s, '%Y%m%d').date()
out[d] = None
continue
try:
start_s, end_s = part.split('-', 1)
start_dt = datetime.strptime(start_s, '%Y%m%d:%H%M')
end_dt = datetime.strptime(end_s, '%Y%m%d:%H%M')
except ValueError as exc:
raise ValueError(f'invalid segment: {part}') from exc
if tz is not None:
start_dt = start_dt.replace(tzinfo=tz)
end_dt = end_dt.replace(tzinfo=tz)
out[start_dt.date()] = (start_dt, end_dt)
return out
# ORIG desired usage,
#
# TODO, for non-drunk tomorrow,
# - call above fn and check that `output[today] is not None`
# trading_hrs: dict = parse_trading_hours(
# details.tradingHours
# )
# liq_hrs: dict = parse_trading_hours(
# details.liquidHours
# )

View File

@ -50,10 +50,11 @@ import tractor
from tractor import to_asyncio
from tractor import trionics
from pendulum import (
from_timestamp,
DateTime,
Duration,
duration as mk_duration,
from_timestamp,
Interval,
)
from eventkit import Event
from ib_insync import (
@ -260,6 +261,16 @@ def remove_handler_on_err(
event.disconnect(handler)
# (originally?) i thot that,
# > "EST in ISO 8601 format is required.."
#
# XXX, but see `ib_async`'s impl,
# - `ib_async.ib.IB.reqHistoricalDataAsync()`
# - `ib_async.util.formatIBDatetime()`
# below is EPOCH.
_iso8601_epoch_in_est: str = "1970-01-01T00:00:00.000000-05:00"
class Client:
'''
IB wrapped for our broker backend API.
@ -333,9 +344,11 @@ class Client:
self,
fqme: str,
# EST in ISO 8601 format is required... below is EPOCH
start_dt: datetime|str = "1970-01-01T00:00:00.000000-05:00",
end_dt: datetime|str = "",
# EST in ISO 8601 format is required..
# XXX, see `ib_async.ib.IB.reqHistoricalDataAsync()`
# below is EPOCH.
start_dt: datetime|None = None, # _iso8601_epoch_in_est,
end_dt: datetime|None = None,
# ohlc sample period in seconds
sample_period_s: int = 1,
@ -346,9 +359,17 @@ class Client:
**kwargs,
) -> tuple[BarDataList, np.ndarray, Duration]:
) -> tuple[
BarDataList,
np.ndarray,
Duration,
]:
'''
Retreive OHLCV bars for a fqme over a range to the present.
Retreive the `fqme`'s OHLCV-bars for the time-range "until `end_dt`".
Notes:
- IB's api doesn't support a `start_dt` (which is why default
is null) so we only use it for bar-frame duration checking.
'''
# See API docs here:
@ -363,13 +384,19 @@ class Client:
dt_duration: Duration = (
duration
or default_dt_duration
or
default_dt_duration
)
# TODO: maybe remove all this?
global _enters
if not end_dt:
end_dt = ''
if end_dt is None:
end_dt: str = ''
else:
est_end_dt = end_dt.in_tz('EST')
if est_end_dt != end_dt:
breakpoint()
_enters += 1
@ -438,58 +465,127 @@ class Client:
+ query_info
)
# TODO: we could maybe raise ``NoData`` instead if we
# TODO: we could maybe raise `NoData` instead if we
# rewrite the method in the first case?
# right now there's no way to detect a timeout..
return [], np.empty(0), dt_duration
log.info(query_info)
# ------ GAP-DETECTION ------
# NOTE XXX: ensure minimum duration in bars?
# => recursively call this method until we get at least as
# many bars such that they sum in aggregate to the the
# desired total time (duration) at most.
# - if you query over a gap and get no data
# that may short circuit the history
if (
# XXX XXX XXX
# => WHY DID WE EVEN NEED THIS ORIGINALLY!? <=
# XXX XXX XXX
False
and end_dt
):
if end_dt:
nparr: np.ndarray = bars_to_np(bars)
times: np.ndarray = nparr['time']
first: float = times[0]
tdiff: float = times[-1] - first
last: float = times[-1]
# frame_dur: float = times[-1] - first
first_dt: DateTime = from_timestamp(first)
last_dt: DateTime = from_timestamp(last)
tdiff: int = (
last_dt
-
first_dt
).in_seconds() + sample_period_s
# XXX, do gap detections.
if (
last_dt.add(seconds=sample_period_s)
<
end_dt
):
details: ContractDetails = (
await self.ib.reqContractDetailsAsync(contract)
)[0]
from .venues import (
is_venue_open,
has_weekend,
sesh_times,
is_venue_closure,
)
_open_now: bool = is_venue_open(
con_deats=details,
)
open_time, close_time = sesh_times(details)
# XXX, always calc gap in mkt-venue-local timezone
tz: str = details.timeZoneId
gap: Interval = (
end_dt.in_tz(tz)
-
last_dt.in_tz(tz)
)
if (
not has_weekend(gap)
and
# XXX NOT outside venue closures.
# !TODO, replace with,
# `not is_venue_closure()`
# per below assert on inverse case!
gap.end.time() != open_time
and
gap.start.time() != close_time
):
breakpoint()
log.warning(
f'Invalid non-closure gap for {fqme!r} ?!?\n'
f'is-open-now: {_open_now}\n'
f'\n'
f'{gap}\n'
)
else:
assert is_venue_closure(
gap=gap,
con_deats=details,
)
log.debug(
f'Detected venue closure gap (weekend),\n'
f'{gap}\n'
)
if (
start_dt is None
and
tdiff
<
dt_duration.in_seconds()
# and
# len(bars) * sample_period_s) < dt_duration.in_seconds()
tdiff < dt_duration.in_seconds()
# and False
):
end_dt: DateTime = from_timestamp(first)
log.warning(
log.error(
f'Frame result was shorter then {dt_duration}!?\n'
'Recursing for more bars:\n'
f'end_dt: {end_dt}\n'
f'dt_duration: {dt_duration}\n'
# f'\n'
# f'Recursing for more bars:\n'
)
(
r_bars,
r_arr,
r_duration,
) = await self.bars(
fqme,
start_dt=start_dt,
end_dt=end_dt,
sample_period_s=sample_period_s,
breakpoint()
# XXX ? TODO? recursively try to re-request?
# => i think *NO* right?
#
# (
# r_bars,
# r_arr,
# r_duration,
# ) = await self.bars(
# fqme,
# start_dt=start_dt,
# end_dt=end_dt,
# sample_period_s=sample_period_s,
# TODO: make a table for Duration to
# the ib str values in order to use this?
# duration=duration,
)
r_bars.extend(bars)
bars = r_bars
# # TODO: make a table for Duration to
# # the ib str values in order to use this?
# # duration=duration,
# )
# r_bars.extend(bars)
# bars = r_bars
nparr: np.ndarray = bars_to_np(bars)
@ -784,9 +880,16 @@ class Client:
# crypto$
elif exch == 'PAXOS': # btc.paxos
con = Crypto(
symbol=symbol,
currency=currency,
symbol=symbol.upper(),
currency='USD',
exchange='PAXOS',
)
# XXX, on `ib_insync` when first tried this,
# > Error 10299, reqId 141: Expected what to show is
# > AGGTRADES, please use that instead of TRADES.,
# > contract: Crypto(conId=479624278, symbol='BTC',
# > exchange='PAXOS', currency='USD',
# > localSymbol='BTC.USD', tradingClass='BTC')
# stonks
else:

View File

@ -69,9 +69,9 @@ from .api import (
Contract,
RequestError,
)
from .venues import is_venue_open
from ._util import (
data_reset_hack,
is_current_time_in_range,
)
from .symbols import get_mkt_info
@ -203,7 +203,8 @@ async def open_history_client(
latency = time.time() - query_start
if (
not timedout
# and latency <= max_timeout
# and
# latency <= max_timeout
):
count += 1
mean += latency / count
@ -219,8 +220,10 @@ async def open_history_client(
)
if (
end_dt
and head_dt
and end_dt <= head_dt
and
head_dt
and
end_dt <= head_dt
):
raise DataUnavailable(
f'First timestamp is {head_dt}\n'
@ -278,7 +281,7 @@ async def open_history_client(
start_dt
):
# TODO! rm this once we're more confident it never hits!
breakpoint()
# breakpoint()
raise RuntimeError(
f'OHLC-bars array start is gt `start_dt` limit !!\n'
f'start_dt: {start_dt}\n'
@ -298,7 +301,7 @@ async def open_history_client(
# TODO: it seems like we can do async queries for ohlc
# but getting the order right still isn't working and I'm not
# quite sure why.. needs some tinkering and probably
# a lookthrough of the ``ib_insync`` machinery, for eg. maybe
# a lookthrough of the `ib_insync` machinery, for eg. maybe
# we have to do the batch queries on the `asyncio` side?
yield (
get_hist,
@ -421,14 +424,13 @@ _failed_resets: int = 0
async def get_bars(
proxy: MethodProxy,
fqme: str,
timeframe: int,
# blank to start which tells ib to look up the latest datum
end_dt: str = '',
start_dt: str|None = '',
end_dt: datetime|None = None,
start_dt: datetime|None = None,
# TODO: make this more dynamic based on measured frame rx latency?
# how long before we trigger a feed reset (seconds)
@ -482,7 +484,8 @@ async def get_bars(
dt_duration,
) = await proxy.bars(
fqme=fqme,
# XXX TODO! lol we're not using this..
# XXX TODO! LOL we're not using this and IB dun
# support it anyway..
# start_dt=start_dt,
end_dt=end_dt,
sample_period_s=timeframe,
@ -734,7 +737,7 @@ async def _setup_quote_stream(
# '294', # Trade rate / minute
# '295', # Vlm rate / minute
),
contract: Contract | None = None,
contract: Contract|None = None,
) -> trio.abc.ReceiveChannel:
'''
@ -756,7 +759,12 @@ async def _setup_quote_stream(
# XXX since this is an `asyncio.Task`, we must use
# tractor.pause_from_sync()
caccount_name, client = get_preferred_data_client(accts2clients)
(
_account_name,
client,
) = get_preferred_data_client(
accts2clients,
)
contract = (
contract
or
@ -1091,14 +1099,9 @@ async def stream_quotes(
)
# is venue active rn?
venue_is_open: bool = any(
is_current_time_in_range(
start_dt=sesh.start,
end_dt=sesh.end,
)
for sesh in details.tradingSessions()
venue_is_open: bool = is_venue_open(
con_deats=details,
)
init_msg = FeedInit(mkt_info=mkt)
# NOTE, tell sampler (via config) to skip vlm summing for dst

View File

@ -149,3 +149,83 @@ def is_venue_closure(
return True
return False
# TODO, put this into `._util` and call it from here!
#
# NOTE, this was generated by @guille from a gpt5 prompt
# and was originally thot to be needed before learning about
# `ib_insync.contract.ContractDetails._parseSessions()` and
# it's downstream meths..
#
# This is still likely useful to keep for now to parse the
# `.tradingHours: str` value manually if we ever decide
# to move off `ib_async` and implement our own `trio`/`anyio`
# based version Bp
#
# >attempt to parse the retarted ib "time stampy thing" they
# >do for "venue hours" with this.. written by
# >gpt5-"thinking",
#
def parse_trading_hours(
spec: str,
tz: TzInfo|None = None
) -> dict[
date,
tuple[datetime, datetime]
]|None:
'''
Parse venue hours like:
'YYYYMMDD:HHMM-YYYYMMDD:HHMM;YYYYMMDD:CLOSED;...'
Returns `dict[date] = (open_dt, close_dt)` or `None` if
closed.
'''
if (
not isinstance(spec, str)
or
not spec
):
raise ValueError('spec must be a non-empty string')
out: dict[
date,
tuple[datetime, datetime]
]|None = {}
for part in (p.strip() for p in spec.split(';') if p.strip()):
if part.endswith(':CLOSED'):
day_s, _ = part.split(':', 1)
d = datetime.strptime(day_s, '%Y%m%d').date()
out[d] = None
continue
try:
start_s, end_s = part.split('-', 1)
start_dt = datetime.strptime(start_s, '%Y%m%d:%H%M')
end_dt = datetime.strptime(end_s, '%Y%m%d:%H%M')
except ValueError as exc:
raise ValueError(f'invalid segment: {part}') from exc
if tz is not None:
start_dt = start_dt.replace(tzinfo=tz)
end_dt = end_dt.replace(tzinfo=tz)
out[start_dt.date()] = (start_dt, end_dt)
return out
# ORIG desired usage,
#
# TODO, for non-drunk tomorrow,
# - call above fn and check that `output[today] is not None`
# trading_hrs: dict = parse_trading_hours(
# details.tradingHours
# )
# liq_hrs: dict = parse_trading_hours(
# details.liquidHours
# )

View File

@ -49,6 +49,7 @@ from pendulum import (
Duration,
duration as mk_duration,
from_timestamp,
timezone,
)
import numpy as np
import polars as pl
@ -57,9 +58,7 @@ from piker.brokers import NoData
from piker.accounting import (
MktPair,
)
from piker.data._util import (
log,
)
from piker.log import get_logger
from ..data._sharedmem import (
maybe_open_shm_array,
ShmArray,
@ -97,6 +96,9 @@ if TYPE_CHECKING:
# from .feed import _FeedsBus
log = get_logger()
# `ShmArray` buffer sizing configuration:
_mins_in_day = int(60 * 24)
# how much is probably dependent on lifestyle
@ -401,7 +403,9 @@ async def start_backfill(
# based on the sample step size, maybe load a certain amount history
update_start_on_prepend: bool = False
if backfill_until_dt is None:
if (
_until_was_none := (backfill_until_dt is None)
):
# TODO: per-provider default history-durations?
# -[ ] inside the `open_history_client()` config allow
@ -435,6 +439,8 @@ async def start_backfill(
last_start_dt: datetime = backfill_from_dt
next_prepend_index: int = backfill_from_shm_index
est = timezone('EST')
while last_start_dt > backfill_until_dt:
log.info(
f'Requesting {timeframe}s frame:\n'
@ -448,9 +454,10 @@ async def start_backfill(
next_end_dt,
) = await get_hist(
timeframe,
end_dt=last_start_dt,
end_dt=(end_dt_param := last_start_dt),
)
except NoData as _daterr:
except NoData as nodata:
_nodata = nodata
orig_last_start_dt: datetime = last_start_dt
gap_report: str = (
f'EMPTY FRAME for `end_dt: {last_start_dt}`?\n'
@ -518,8 +525,32 @@ async def start_backfill(
==
next_start_dt.timestamp()
)
assert (
(last_time := time[-1])
==
next_end_dt.timestamp()
)
assert time[-1] == next_end_dt.timestamp()
frame_last_dt = from_timestamp(last_time)
if (
frame_last_dt.add(seconds=timeframe)
<
end_dt_param
):
est_frame_last_dt = est.convert(frame_last_dt)
est_end_dt_param = est.convert(end_dt_param)
log.warning(
f'Provider frame ending BEFORE requested end_dt={end_dt_param} ??\n'
f'frame_last_dt (EST): {est_frame_last_dt!r}\n'
f'end_dt_param (EST): {est_end_dt_param!r}\n'
f'\n'
f'Likely contains,\n'
f'- a venue closure.\n'
f'- (maybe?) missing data ?\n'
)
# ?TODO, check against venue closure hours
# if/when provided by backend?
await tractor.pause()
expected_dur: Interval = (
last_start_dt.subtract(
@ -581,10 +612,11 @@ async def start_backfill(
'0 BARS TO PUSH after diff!?\n'
f'{next_start_dt} -> {last_start_dt}'
)
await tractor.pause()
# Check if we're about to exceed buffer capacity BEFORE
# attempting the push
if next_prepend_index - ln < 0:
if (next_prepend_index - ln) < 0:
log.warning(
f'Backfill would exceed buffer capacity!\n'
f'next_prepend_index: {next_prepend_index}\n'
@ -655,7 +687,7 @@ async def start_backfill(
},
})
# can't push the entire frame? so
# XXX, can't push the entire frame? so
# push only the amount that can fit..
break
@ -715,8 +747,8 @@ async def start_backfill(
) = dedupe(df)
if diff:
log.warning(
f'Found {diff} duplicates in tsdb, '
f'overwriting with deduped data\n'
f'Found {diff!r} duplicates in tsdb! '
f'=> Overwriting with `deduped` data !! <=\n'
)
await storage.write_ohlcv(
col_sym_key,