Compare commits
12 Commits
2d8b409b14
...
eb06fc79f1
Author | SHA1 | Date |
---|---|---|
|
eb06fc79f1 | |
|
76ca316b9d | |
|
3e8481978b | |
|
9e6bfa0926 | |
|
a945bb33f3 | |
|
850cdbfe59 | |
|
d49608f74e | |
|
bf0ac93aa3 | |
|
d7179d47b0 | |
|
c390e87536 | |
|
5e4a6d61c7 | |
|
3caaa30b03 |
|
@ -1,8 +1,40 @@
|
||||||
.accounting
|
piker.accounting
|
||||||
-----------
|
________________
|
||||||
A subsystem for transaction processing, storage and historical
|
A subsystem for transaction processing, storage and historical
|
||||||
measurement.
|
measurement.
|
||||||
|
|
||||||
|
synopsis
|
||||||
|
--------
|
||||||
|
The big question for any trader is this:
|
||||||
|
|
||||||
|
*what is the price that determines whether i take a loss or a gain on my
|
||||||
|
trade?*
|
||||||
|
|
||||||
|
In other words, at any given state of accounting your current assets,
|
||||||
|
what is the price between any 2 assets you've transacted that determines
|
||||||
|
at which price you can conduct **the next** transaction and know if you
|
||||||
|
are making or losing more (or less) of the *source* asset versus the
|
||||||
|
*destination* asset?
|
||||||
|
|
||||||
|
Let's do a very simple example:
|
||||||
|
|
||||||
|
> Joe wants to buy some tacos bc they're super hungo.
|
||||||
|
> Joe has a friend who also likes tacos but doesn't mind if they're fresh; he doesn't mind having day old tacos.
|
||||||
|
> Inflation is rampant and taco prices are trending up for no good reason besides everyone thinks prices are going up.
|
||||||
|
> Joe goes to the taco stand and buys 4 tacos at 25 mxn.
|
||||||
|
> This makes Joe's net cost `4 * 25 = 200` mxn.
|
||||||
|
> Joe eats 3 tacos and realizes that he can't finish the last, so he puts it in the fridge to save for the next day (since he owns a comal).
|
||||||
|
> The next day the price of tacos goes up to 30 mxn (for no good reason > besides the taco stand noticing Joe is a tourist and that > "inflation" is some thing that's used as an excuse for price changes).
|
||||||
|
> Joe's friend from before got lit up (like he does every morning) and msgs Joe to buy him 2 tacos for when he shows up in the late morning.
|
||||||
|
> Joe says "sure, but i also have a leftover if you want it, and I'm fasting today so you can have my sobras and i'll buy you a new one".
|
||||||
|
> The friend coughs a couple times, and says "yee no problem man, just make sure you get them"
|
||||||
|
>
|
||||||
|
|
||||||
|
|
||||||
|
Prior *suit* definitions:
|
||||||
|
|
||||||
|
- the canucks equiv of the IRS call this idea ["Adjusted cost base"](https://www.canada.ca/en/revenue-agency/services/tax/individuals/topics/about-your-tax-return/tax-return/completing-a-tax-return/personal-income/line-12700-capital-gains/definitions-capital-gains.html#Adjustedcostbase)
|
||||||
|
|
||||||
|
|
||||||
.pnl
|
.pnl
|
||||||
----
|
----
|
||||||
|
|
|
@ -40,7 +40,7 @@ import tomli_w # for fast ledger writing
|
||||||
|
|
||||||
from piker.types import Struct
|
from piker.types import Struct
|
||||||
from piker import config
|
from piker import config
|
||||||
from ..log import get_logger
|
from piker.log import get_logger
|
||||||
from .calc import (
|
from .calc import (
|
||||||
iter_by_dt,
|
iter_by_dt,
|
||||||
)
|
)
|
||||||
|
@ -239,7 +239,9 @@ class TransactionLedger(UserDict):
|
||||||
|
|
||||||
symcache: SymbologyCache = self._symcache
|
symcache: SymbologyCache = self._symcache
|
||||||
towrite: dict[str, Any] = {}
|
towrite: dict[str, Any] = {}
|
||||||
for tid, txdict in self.tx_sort(self.data.copy()):
|
for tid, txdict in self.tx_sort(
|
||||||
|
self.data.copy()
|
||||||
|
):
|
||||||
# write blank-str expiry for non-expiring assets
|
# write blank-str expiry for non-expiring assets
|
||||||
if (
|
if (
|
||||||
'expiry' in txdict
|
'expiry' in txdict
|
||||||
|
@ -377,7 +379,7 @@ def open_trade_ledger(
|
||||||
account,
|
account,
|
||||||
dirpath=_fp,
|
dirpath=_fp,
|
||||||
)
|
)
|
||||||
cpy = ledger_dict.copy()
|
cpy: dict = ledger_dict.copy()
|
||||||
|
|
||||||
# XXX NOTE: if not provided presume we are being called from
|
# XXX NOTE: if not provided presume we are being called from
|
||||||
# sync code and need to maybe run `trio` to generate..
|
# sync code and need to maybe run `trio` to generate..
|
||||||
|
@ -406,7 +408,13 @@ def open_trade_ledger(
|
||||||
account=account,
|
account=account,
|
||||||
mod=mod,
|
mod=mod,
|
||||||
symcache=symcache,
|
symcache=symcache,
|
||||||
tx_sort=getattr(mod, 'tx_sort', tx_sort),
|
|
||||||
|
# NOTE: allow backends to provide custom ledger sorting
|
||||||
|
tx_sort=getattr(
|
||||||
|
mod,
|
||||||
|
'tx_sort',
|
||||||
|
tx_sort,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
yield ledger
|
yield ledger
|
||||||
|
|
|
@ -305,8 +305,8 @@ class MktPair(Struct, frozen=True):
|
||||||
# config right?
|
# config right?
|
||||||
# src_type: AssetTypeName
|
# src_type: AssetTypeName
|
||||||
|
|
||||||
# for derivs, info describing contract, egs.
|
# for derivs, info describing contract, egs. strike price, call
|
||||||
# strike price, call or put, swap type, exercise model, etc.
|
# or put, swap type, exercise model, etc.
|
||||||
contract_info: list[str] | None = None
|
contract_info: list[str] | None = None
|
||||||
|
|
||||||
# TODO: rename to sectype since all of these can
|
# TODO: rename to sectype since all of these can
|
||||||
|
|
|
@ -30,7 +30,8 @@ from types import ModuleType
|
||||||
from typing import (
|
from typing import (
|
||||||
Any,
|
Any,
|
||||||
Iterator,
|
Iterator,
|
||||||
Generator
|
Generator,
|
||||||
|
TYPE_CHECKING,
|
||||||
)
|
)
|
||||||
|
|
||||||
import pendulum
|
import pendulum
|
||||||
|
@ -59,8 +60,10 @@ from ..clearing._messages import (
|
||||||
BrokerdPosition,
|
BrokerdPosition,
|
||||||
)
|
)
|
||||||
from piker.types import Struct
|
from piker.types import Struct
|
||||||
|
from piker.log import get_logger
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
from piker.data._symcache import SymbologyCache
|
from piker.data._symcache import SymbologyCache
|
||||||
from ..log import get_logger
|
|
||||||
|
|
||||||
log = get_logger(__name__)
|
log = get_logger(__name__)
|
||||||
|
|
||||||
|
@ -493,6 +496,17 @@ class Account(Struct):
|
||||||
|
|
||||||
_mktmap_table: dict[str, MktPair] | None = None,
|
_mktmap_table: dict[str, MktPair] | None = None,
|
||||||
|
|
||||||
|
only_require: list[str]|True = True,
|
||||||
|
# ^list of fqmes that are "required" to be processed from
|
||||||
|
# this ledger pass; we often don't care about others and
|
||||||
|
# definitely shouldn't always error in such cases.
|
||||||
|
# (eg. broker backend loaded that doesn't yet supsport the
|
||||||
|
# symcache but also, inside the paper engine we don't ad-hoc
|
||||||
|
# request `get_mkt_info()` for every symbol in the ledger,
|
||||||
|
# only the one for which we're simulating against).
|
||||||
|
# TODO, not sure if there's a better soln for this, ideally
|
||||||
|
# all backends get symcache support afap i guess..
|
||||||
|
|
||||||
) -> dict[str, Position]:
|
) -> dict[str, Position]:
|
||||||
'''
|
'''
|
||||||
Update the internal `.pps[str, Position]` table from input
|
Update the internal `.pps[str, Position]` table from input
|
||||||
|
@ -535,11 +549,32 @@ class Account(Struct):
|
||||||
if _mktmap_table is None:
|
if _mktmap_table is None:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
required: bool = (
|
||||||
|
only_require is True
|
||||||
|
or (
|
||||||
|
only_require is not True
|
||||||
|
and
|
||||||
|
fqme in only_require
|
||||||
|
)
|
||||||
|
)
|
||||||
# XXX: caller is allowed to provide a fallback
|
# XXX: caller is allowed to provide a fallback
|
||||||
# mktmap table for the case where a new position is
|
# mktmap table for the case where a new position is
|
||||||
# being added and the preloaded symcache didn't
|
# being added and the preloaded symcache didn't
|
||||||
# have this entry prior (eg. with frickin IB..)
|
# have this entry prior (eg. with frickin IB..)
|
||||||
mkt = _mktmap_table[fqme]
|
if (
|
||||||
|
not (mkt := _mktmap_table.get(fqme))
|
||||||
|
and
|
||||||
|
required
|
||||||
|
):
|
||||||
|
raise
|
||||||
|
|
||||||
|
elif not required:
|
||||||
|
continue
|
||||||
|
|
||||||
|
else:
|
||||||
|
# should be an entry retreived somewhere
|
||||||
|
assert mkt
|
||||||
|
|
||||||
|
|
||||||
if not (pos := pps.get(bs_mktid)):
|
if not (pos := pps.get(bs_mktid)):
|
||||||
|
|
||||||
|
@ -656,7 +691,7 @@ class Account(Struct):
|
||||||
def write_config(self) -> None:
|
def write_config(self) -> None:
|
||||||
'''
|
'''
|
||||||
Write the current account state to the user's account TOML file, normally
|
Write the current account state to the user's account TOML file, normally
|
||||||
something like ``pps.toml``.
|
something like `pps.toml`.
|
||||||
|
|
||||||
'''
|
'''
|
||||||
# TODO: show diff output?
|
# TODO: show diff output?
|
||||||
|
|
|
@ -251,10 +251,16 @@ def iter_by_dt(
|
||||||
for k in parsers:
|
for k in parsers:
|
||||||
if (
|
if (
|
||||||
isdict and k in tx
|
isdict and k in tx
|
||||||
or getattr(tx, k, None)
|
or
|
||||||
|
getattr(tx, k, None)
|
||||||
):
|
):
|
||||||
v = tx[k] if isdict else tx.dt
|
v = (
|
||||||
assert v is not None, f'No valid value for `{k}`!?'
|
tx[k] if isdict
|
||||||
|
else tx.dt
|
||||||
|
)
|
||||||
|
assert v is not None, (
|
||||||
|
f'No valid value for `{k}`!?'
|
||||||
|
)
|
||||||
|
|
||||||
# only call parser on the value if not None from
|
# only call parser on the value if not None from
|
||||||
# the `parsers` table above (when NOT using
|
# the `parsers` table above (when NOT using
|
||||||
|
@ -269,8 +275,21 @@ def iter_by_dt(
|
||||||
return v
|
return v
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# XXX: should never get here..
|
# TODO: move to top?
|
||||||
breakpoint()
|
from piker.log import get_logger
|
||||||
|
log = get_logger(__name__)
|
||||||
|
|
||||||
|
# XXX: we should really never get here..
|
||||||
|
# only if a ledger record has no expected sort(able)
|
||||||
|
# field will we likely hit this.. like with ze IB.
|
||||||
|
# if no sortable field just deliver epoch?
|
||||||
|
log.warning(
|
||||||
|
'No (time) sortable field for TXN:\n'
|
||||||
|
f'{tx}\n'
|
||||||
|
)
|
||||||
|
return from_timestamp(0)
|
||||||
|
# breakpoint()
|
||||||
|
|
||||||
|
|
||||||
entry: tuple[str, dict] | Transaction
|
entry: tuple[str, dict] | Transaction
|
||||||
for entry in sorted(
|
for entry in sorted(
|
||||||
|
|
|
@ -300,7 +300,8 @@ def disect(
|
||||||
assert not df.is_empty()
|
assert not df.is_empty()
|
||||||
|
|
||||||
# muck around in pdbp REPL
|
# muck around in pdbp REPL
|
||||||
breakpoint()
|
# tractor.devx.mk_pdb().set_trace()
|
||||||
|
# breakpoint()
|
||||||
|
|
||||||
# TODO: we REALLY need a better console REPL for this
|
# TODO: we REALLY need a better console REPL for this
|
||||||
# kinda thing..
|
# kinda thing..
|
||||||
|
|
|
@ -653,6 +653,7 @@ async def open_trade_dialog(
|
||||||
# in) use manually constructed table from calling
|
# in) use manually constructed table from calling
|
||||||
# the `.get_mkt_info()` provider EP above.
|
# the `.get_mkt_info()` provider EP above.
|
||||||
_mktmap_table=mkt_by_fqme,
|
_mktmap_table=mkt_by_fqme,
|
||||||
|
only_require=list(mkt_by_fqme),
|
||||||
)
|
)
|
||||||
|
|
||||||
pp_msgs: list[BrokerdPosition] = []
|
pp_msgs: list[BrokerdPosition] = []
|
||||||
|
|
|
@ -31,6 +31,7 @@ from pathlib import Path
|
||||||
from pprint import pformat
|
from pprint import pformat
|
||||||
from typing import (
|
from typing import (
|
||||||
Any,
|
Any,
|
||||||
|
Callable,
|
||||||
Sequence,
|
Sequence,
|
||||||
Hashable,
|
Hashable,
|
||||||
TYPE_CHECKING,
|
TYPE_CHECKING,
|
||||||
|
@ -56,7 +57,7 @@ from piker.brokers import (
|
||||||
)
|
)
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from ..accounting import (
|
from piker.accounting import (
|
||||||
Asset,
|
Asset,
|
||||||
MktPair,
|
MktPair,
|
||||||
)
|
)
|
||||||
|
@ -149,19 +150,36 @@ class SymbologyCache(Struct):
|
||||||
'Implement `Client.get_assets()`!'
|
'Implement `Client.get_assets()`!'
|
||||||
)
|
)
|
||||||
|
|
||||||
if get_mkt_pairs := getattr(client, 'get_mkt_pairs', None):
|
get_mkt_pairs: Callable|None = getattr(
|
||||||
|
client,
|
||||||
|
'get_mkt_pairs',
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
if not get_mkt_pairs:
|
||||||
|
log.warning(
|
||||||
|
'No symbology cache `Pair` support for `{provider}`..\n'
|
||||||
|
'Implement `Client.get_mkt_pairs()`!'
|
||||||
|
)
|
||||||
|
return self
|
||||||
|
|
||||||
pairs: dict[str, Struct] = await get_mkt_pairs()
|
pairs: dict[str, Struct] = await get_mkt_pairs()
|
||||||
for bs_fqme, pair in pairs.items():
|
if not pairs:
|
||||||
|
log.warning(
|
||||||
|
'No pairs from intial {provider!r} sym-cache request?\n\n'
|
||||||
|
'`Client.get_mkt_pairs()` -> {pairs!r} ?'
|
||||||
|
)
|
||||||
|
return self
|
||||||
|
|
||||||
# NOTE: every backend defined pair should
|
for bs_fqme, pair in pairs.items():
|
||||||
# declare it's ns path for roundtrip
|
|
||||||
# serialization lookup.
|
|
||||||
if not getattr(pair, 'ns_path', None):
|
if not getattr(pair, 'ns_path', None):
|
||||||
|
# XXX: every backend defined pair must declare
|
||||||
|
# a `.ns_path: tractor.NamespacePath` to enable
|
||||||
|
# roundtrip serialization lookup from a local
|
||||||
|
# cache file.
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
f'Pair-struct for {self.mod.name} MUST define a '
|
f'Pair-struct for {self.mod.name} MUST define a '
|
||||||
'`.ns_path: str`!\n'
|
'`.ns_path: str`!\n\n'
|
||||||
f'{pair}'
|
f'{pair!r}'
|
||||||
)
|
)
|
||||||
|
|
||||||
entry = await self.mod.get_mkt_info(pair.bs_fqme)
|
entry = await self.mod.get_mkt_info(pair.bs_fqme)
|
||||||
|
@ -195,12 +213,6 @@ class SymbologyCache(Struct):
|
||||||
pair,
|
pair,
|
||||||
)
|
)
|
||||||
|
|
||||||
else:
|
|
||||||
log.warning(
|
|
||||||
'No symbology cache `Pair` support for `{provider}`..\n'
|
|
||||||
'Implement `Client.get_mkt_pairs()`!'
|
|
||||||
)
|
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
|
@ -786,7 +786,6 @@ async def install_brokerd_search(
|
||||||
|
|
||||||
@acm
|
@acm
|
||||||
async def maybe_open_feed(
|
async def maybe_open_feed(
|
||||||
|
|
||||||
fqmes: list[str],
|
fqmes: list[str],
|
||||||
loglevel: str | None = None,
|
loglevel: str | None = None,
|
||||||
|
|
||||||
|
@ -840,7 +839,6 @@ async def maybe_open_feed(
|
||||||
|
|
||||||
@acm
|
@acm
|
||||||
async def open_feed(
|
async def open_feed(
|
||||||
|
|
||||||
fqmes: list[str],
|
fqmes: list[str],
|
||||||
|
|
||||||
loglevel: str|None = None,
|
loglevel: str|None = None,
|
||||||
|
|
|
@ -36,10 +36,10 @@ from ._sharedmem import (
|
||||||
ShmArray,
|
ShmArray,
|
||||||
_Token,
|
_Token,
|
||||||
)
|
)
|
||||||
|
from piker.accounting import MktPair
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from ..accounting import MktPair
|
from piker.data.feed import Feed
|
||||||
from .feed import Feed
|
|
||||||
|
|
||||||
|
|
||||||
class Flume(Struct):
|
class Flume(Struct):
|
||||||
|
|
|
@ -113,9 +113,9 @@ def validate_backend(
|
||||||
)
|
)
|
||||||
if ep is None:
|
if ep is None:
|
||||||
log.warning(
|
log.warning(
|
||||||
f'Provider backend {mod.name} is missing '
|
f'Provider backend {mod.name!r} is missing '
|
||||||
f'{daemon_name} support :(\n'
|
f'{daemon_name!r} support?\n'
|
||||||
f'The following endpoint is missing: {name}'
|
f'|_module endpoint-func missing: {name!r}\n'
|
||||||
)
|
)
|
||||||
|
|
||||||
inits: list[
|
inits: list[
|
||||||
|
|
|
@ -386,6 +386,8 @@ def ldshm(
|
||||||
open_annot_ctl() as actl,
|
open_annot_ctl() as actl,
|
||||||
):
|
):
|
||||||
shm_df: pl.DataFrame | None = None
|
shm_df: pl.DataFrame | None = None
|
||||||
|
tf2aids: dict[float, dict] = {}
|
||||||
|
|
||||||
for (
|
for (
|
||||||
shmfile,
|
shmfile,
|
||||||
shm,
|
shm,
|
||||||
|
@ -526,16 +528,17 @@ def ldshm(
|
||||||
new_df,
|
new_df,
|
||||||
step_gaps,
|
step_gaps,
|
||||||
)
|
)
|
||||||
|
|
||||||
# last chance manual overwrites in REPL
|
# last chance manual overwrites in REPL
|
||||||
await tractor.pause()
|
# await tractor.pause()
|
||||||
assert aids
|
assert aids
|
||||||
|
tf2aids[period_s] = aids
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# allow interaction even when no ts problems.
|
# allow interaction even when no ts problems.
|
||||||
await tractor.pause()
|
assert not diff
|
||||||
# assert not diff
|
|
||||||
|
|
||||||
|
await tractor.pause()
|
||||||
|
log.info('Exiting TSP shm anal-izer!')
|
||||||
|
|
||||||
if shm_df is None:
|
if shm_df is None:
|
||||||
log.error(
|
log.error(
|
||||||
|
|
|
@ -161,7 +161,13 @@ class NativeStorageClient:
|
||||||
|
|
||||||
def index_files(self):
|
def index_files(self):
|
||||||
for path in self._datadir.iterdir():
|
for path in self._datadir.iterdir():
|
||||||
if path.name in {'borked', 'expired',}:
|
if (
|
||||||
|
path.is_dir()
|
||||||
|
or
|
||||||
|
'.parquet' not in str(path)
|
||||||
|
# or
|
||||||
|
# path.name in {'borked', 'expired',}
|
||||||
|
):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
key: str = path.name.rstrip('.parquet')
|
key: str = path.name.rstrip('.parquet')
|
||||||
|
|
|
@ -44,8 +44,10 @@ import trio
|
||||||
from trio_typing import TaskStatus
|
from trio_typing import TaskStatus
|
||||||
import tractor
|
import tractor
|
||||||
from pendulum import (
|
from pendulum import (
|
||||||
|
Interval,
|
||||||
DateTime,
|
DateTime,
|
||||||
Duration,
|
Duration,
|
||||||
|
duration as mk_duration,
|
||||||
from_timestamp,
|
from_timestamp,
|
||||||
)
|
)
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
@ -214,7 +216,8 @@ async def maybe_fill_null_segments(
|
||||||
# pair, immediately stop backfilling?
|
# pair, immediately stop backfilling?
|
||||||
if (
|
if (
|
||||||
start_dt
|
start_dt
|
||||||
and end_dt < start_dt
|
and
|
||||||
|
end_dt < start_dt
|
||||||
):
|
):
|
||||||
await tractor.pause()
|
await tractor.pause()
|
||||||
break
|
break
|
||||||
|
@ -262,6 +265,7 @@ async def maybe_fill_null_segments(
|
||||||
except tractor.ContextCancelled:
|
except tractor.ContextCancelled:
|
||||||
# log.exception
|
# log.exception
|
||||||
await tractor.pause()
|
await tractor.pause()
|
||||||
|
raise
|
||||||
|
|
||||||
null_segs_detected.set()
|
null_segs_detected.set()
|
||||||
# RECHECK for more null-gaps
|
# RECHECK for more null-gaps
|
||||||
|
@ -349,7 +353,7 @@ async def maybe_fill_null_segments(
|
||||||
|
|
||||||
async def start_backfill(
|
async def start_backfill(
|
||||||
get_hist,
|
get_hist,
|
||||||
frame_types: dict[str, Duration] | None,
|
def_frame_duration: Duration,
|
||||||
mod: ModuleType,
|
mod: ModuleType,
|
||||||
mkt: MktPair,
|
mkt: MktPair,
|
||||||
shm: ShmArray,
|
shm: ShmArray,
|
||||||
|
@ -379,22 +383,23 @@ async def start_backfill(
|
||||||
update_start_on_prepend: bool = False
|
update_start_on_prepend: bool = False
|
||||||
if backfill_until_dt is None:
|
if backfill_until_dt is None:
|
||||||
|
|
||||||
# TODO: drop this right and just expose the backfill
|
# TODO: per-provider default history-durations?
|
||||||
# limits inside a [storage] section in conf.toml?
|
# -[ ] inside the `open_history_client()` config allow
|
||||||
# when no tsdb "last datum" is provided, we just load
|
# declaring the history duration limits instead of
|
||||||
# some near-term history.
|
# guessing and/or applying the same limits to all?
|
||||||
# periods = {
|
#
|
||||||
# 1: {'days': 1},
|
# -[ ] allow declaring (default) per-provider backfill
|
||||||
# 60: {'days': 14},
|
# limits inside a [storage] sub-section in conf.toml?
|
||||||
# }
|
#
|
||||||
|
# NOTE, when no tsdb "last datum" is provided, we just
|
||||||
# do a decently sized backfill and load it into storage.
|
# load some near-term history by presuming a "decently
|
||||||
|
# large" 60s duration limit and a much shorter 1s range.
|
||||||
periods = {
|
periods = {
|
||||||
1: {'days': 2},
|
1: {'days': 2},
|
||||||
60: {'years': 6},
|
60: {'years': 6},
|
||||||
}
|
}
|
||||||
period_duration: int = periods[timeframe]
|
period_duration: int = periods[timeframe]
|
||||||
update_start_on_prepend = True
|
update_start_on_prepend: bool = True
|
||||||
|
|
||||||
# NOTE: manually set the "latest" datetime which we intend to
|
# NOTE: manually set the "latest" datetime which we intend to
|
||||||
# backfill history "until" so as to adhere to the history
|
# backfill history "until" so as to adhere to the history
|
||||||
|
@ -416,7 +421,6 @@ async def start_backfill(
|
||||||
f'backfill_until_dt: {backfill_until_dt}\n'
|
f'backfill_until_dt: {backfill_until_dt}\n'
|
||||||
f'last_start_dt: {last_start_dt}\n'
|
f'last_start_dt: {last_start_dt}\n'
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
(
|
(
|
||||||
array,
|
array,
|
||||||
|
@ -426,71 +430,114 @@ async def start_backfill(
|
||||||
timeframe,
|
timeframe,
|
||||||
end_dt=last_start_dt,
|
end_dt=last_start_dt,
|
||||||
)
|
)
|
||||||
|
|
||||||
except NoData as _daterr:
|
except NoData as _daterr:
|
||||||
# 3 cases:
|
orig_last_start_dt: datetime = last_start_dt
|
||||||
# - frame in the middle of a legit venue gap
|
gap_report: str = (
|
||||||
# - history actually began at the `last_start_dt`
|
f'EMPTY FRAME for `end_dt: {last_start_dt}`?\n'
|
||||||
# - some other unknown error (ib blocking the
|
f'{mod.name} -> tf@fqme: {timeframe}@{mkt.fqme}\n'
|
||||||
# history bc they don't want you seeing how they
|
f'last_start_dt: {orig_last_start_dt}\n\n'
|
||||||
# cucked all the tinas..)
|
f'bf_until: {backfill_until_dt}\n'
|
||||||
if dur := frame_types.get(timeframe):
|
|
||||||
# decrement by a frame's worth of duration and
|
|
||||||
# retry a few times.
|
|
||||||
last_start_dt.subtract(
|
|
||||||
seconds=dur.total_seconds()
|
|
||||||
)
|
)
|
||||||
log.warning(
|
# EMPTY FRAME signal with 3 (likely) causes:
|
||||||
f'{mod.name} -> EMPTY FRAME for end_dt?\n'
|
#
|
||||||
f'tf@fqme: {timeframe}@{mkt.fqme}\n'
|
# 1. range contains legit gap in venue history
|
||||||
'bf_until <- last_start_dt:\n'
|
# 2. history actually (edge case) **began** at the
|
||||||
f'{backfill_until_dt} <- {last_start_dt}\n'
|
# value `last_start_dt`
|
||||||
f'Decrementing `end_dt` by {dur} and retry..\n'
|
# 3. some other unknown error (ib blocking the
|
||||||
|
# history-query bc they don't want you seeing how
|
||||||
|
# they cucked all the tinas.. like with options
|
||||||
|
# hist)
|
||||||
|
#
|
||||||
|
if def_frame_duration:
|
||||||
|
# decrement by a duration's (frame) worth of time
|
||||||
|
# as maybe indicated by the backend to see if we
|
||||||
|
# can get older data before this possible
|
||||||
|
# "history gap".
|
||||||
|
last_start_dt: datetime = last_start_dt.subtract(
|
||||||
|
seconds=def_frame_duration.total_seconds()
|
||||||
)
|
)
|
||||||
|
gap_report += (
|
||||||
|
f'Decrementing `end_dt` and retrying with,\n'
|
||||||
|
f'def_frame_duration: {def_frame_duration}\n'
|
||||||
|
f'(new) last_start_dt: {last_start_dt}\n'
|
||||||
|
)
|
||||||
|
log.warning(gap_report)
|
||||||
|
# skip writing to shm/tsdb and try the next
|
||||||
|
# duration's worth of prior history.
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# broker says there never was or is no more history to pull
|
else:
|
||||||
except DataUnavailable:
|
# await tractor.pause()
|
||||||
log.warning(
|
raise DataUnavailable(gap_report)
|
||||||
f'NO-MORE-DATA in range?\n'
|
|
||||||
f'`{mod.name}` halted history:\n'
|
|
||||||
f'tf@fqme: {timeframe}@{mkt.fqme}\n'
|
|
||||||
'bf_until <- last_start_dt:\n'
|
|
||||||
f'{backfill_until_dt} <- {last_start_dt}\n'
|
|
||||||
)
|
|
||||||
|
|
||||||
# ugh, what's a better way?
|
# broker says there never was or is no more history to pull
|
||||||
# TODO: fwiw, we probably want a way to signal a throttle
|
except DataUnavailable as due:
|
||||||
# condition (eg. with ib) so that we can halt the
|
message: str = due.args[0]
|
||||||
# request loop until the condition is resolved?
|
log.warning(
|
||||||
if timeframe > 1:
|
f'Provider {mod.name!r} halted backfill due to,\n\n'
|
||||||
await tractor.pause()
|
|
||||||
|
f'{message}\n'
|
||||||
|
|
||||||
|
f'fqme: {mkt.fqme}\n'
|
||||||
|
f'timeframe: {timeframe}\n'
|
||||||
|
f'last_start_dt: {last_start_dt}\n'
|
||||||
|
f'bf_until: {backfill_until_dt}\n'
|
||||||
|
)
|
||||||
|
# UGH: what's a better way?
|
||||||
|
# TODO: backends are responsible for being correct on
|
||||||
|
# this right!?
|
||||||
|
# -[ ] in the `ib` case we could maybe offer some way
|
||||||
|
# to halt the request loop until the condition is
|
||||||
|
# resolved or should the backend be entirely in
|
||||||
|
# charge of solving such faults? yes, right?
|
||||||
return
|
return
|
||||||
|
|
||||||
|
time: np.ndarray = array['time']
|
||||||
assert (
|
assert (
|
||||||
array['time'][0]
|
time[0]
|
||||||
==
|
==
|
||||||
next_start_dt.timestamp()
|
next_start_dt.timestamp()
|
||||||
)
|
)
|
||||||
|
|
||||||
diff = last_start_dt - next_start_dt
|
assert time[-1] == next_end_dt.timestamp()
|
||||||
frame_time_diff_s = diff.seconds
|
|
||||||
|
expected_dur: Interval = last_start_dt - next_start_dt
|
||||||
|
|
||||||
# frame's worth of sample-period-steps, in seconds
|
# frame's worth of sample-period-steps, in seconds
|
||||||
frame_size_s: float = len(array) * timeframe
|
frame_size_s: float = len(array) * timeframe
|
||||||
expected_frame_size_s: float = frame_size_s + timeframe
|
recv_frame_dur: Duration = (
|
||||||
if frame_time_diff_s > expected_frame_size_s:
|
from_timestamp(array[-1]['time'])
|
||||||
|
-
|
||||||
|
from_timestamp(array[0]['time'])
|
||||||
|
)
|
||||||
|
if (
|
||||||
|
(lt_frame := (recv_frame_dur < expected_dur))
|
||||||
|
or
|
||||||
|
(null_frame := (frame_size_s == 0))
|
||||||
|
# ^XXX, should NEVER hit now!
|
||||||
|
):
|
||||||
# XXX: query result includes a start point prior to our
|
# XXX: query result includes a start point prior to our
|
||||||
# expected "frame size" and thus is likely some kind of
|
# expected "frame size" and thus is likely some kind of
|
||||||
# history gap (eg. market closed period, outage, etc.)
|
# history gap (eg. market closed period, outage, etc.)
|
||||||
# so just report it to console for now.
|
# so just report it to console for now.
|
||||||
|
if lt_frame:
|
||||||
|
reason = 'Possible GAP (or first-datum)'
|
||||||
|
else:
|
||||||
|
assert null_frame
|
||||||
|
reason = 'NULL-FRAME'
|
||||||
|
|
||||||
|
missing_dur: Interval = expected_dur.end - recv_frame_dur.end
|
||||||
log.warning(
|
log.warning(
|
||||||
'GAP DETECTED:\n'
|
f'{timeframe}s-series {reason} detected!\n'
|
||||||
f'last_start_dt: {last_start_dt}\n'
|
f'fqme: {mkt.fqme}\n'
|
||||||
f'diff: {diff}\n'
|
f'last_start_dt: {last_start_dt}\n\n'
|
||||||
f'frame_time_diff_s: {frame_time_diff_s}\n'
|
f'recv interval: {recv_frame_dur}\n'
|
||||||
|
f'expected interval: {expected_dur}\n\n'
|
||||||
|
|
||||||
|
f'Missing duration of history of {missing_dur.in_words()!r}\n'
|
||||||
|
f'{missing_dur}\n'
|
||||||
)
|
)
|
||||||
|
# await tractor.pause()
|
||||||
|
|
||||||
to_push = diff_history(
|
to_push = diff_history(
|
||||||
array,
|
array,
|
||||||
|
@ -565,22 +612,27 @@ async def start_backfill(
|
||||||
# long-term storage.
|
# long-term storage.
|
||||||
if (
|
if (
|
||||||
storage is not None
|
storage is not None
|
||||||
and write_tsdb
|
and
|
||||||
|
write_tsdb
|
||||||
):
|
):
|
||||||
log.info(
|
log.info(
|
||||||
f'Writing {ln} frame to storage:\n'
|
f'Writing {ln} frame to storage:\n'
|
||||||
f'{next_start_dt} -> {last_start_dt}'
|
f'{next_start_dt} -> {last_start_dt}'
|
||||||
)
|
)
|
||||||
|
|
||||||
# always drop the src asset token for
|
# NOTE, always drop the src asset token for
|
||||||
# non-currency-pair like market types (for now)
|
# non-currency-pair like market types (for now)
|
||||||
|
#
|
||||||
|
# THAT IS, for now our table key schema is NOT
|
||||||
|
# including the dst[/src] source asset token. SO,
|
||||||
|
# 'tsla.nasdaq.ib' over 'tsla/usd.nasdaq.ib' for
|
||||||
|
# historical reasons ONLY.
|
||||||
if mkt.dst.atype not in {
|
if mkt.dst.atype not in {
|
||||||
'crypto',
|
'crypto',
|
||||||
'crypto_currency',
|
'crypto_currency',
|
||||||
'fiat', # a "forex pair"
|
'fiat', # a "forex pair"
|
||||||
|
'perpetual_future', # stupid "perps" from cex land
|
||||||
}:
|
}:
|
||||||
# for now, our table key schema is not including
|
|
||||||
# the dst[/src] source asset token.
|
|
||||||
col_sym_key: str = mkt.get_fqme(
|
col_sym_key: str = mkt.get_fqme(
|
||||||
delim_char='',
|
delim_char='',
|
||||||
without_src=True,
|
without_src=True,
|
||||||
|
@ -685,7 +737,7 @@ async def back_load_from_tsdb(
|
||||||
last_tsdb_dt
|
last_tsdb_dt
|
||||||
and latest_start_dt
|
and latest_start_dt
|
||||||
):
|
):
|
||||||
backfilled_size_s = (
|
backfilled_size_s: Duration = (
|
||||||
latest_start_dt - last_tsdb_dt
|
latest_start_dt - last_tsdb_dt
|
||||||
).seconds
|
).seconds
|
||||||
# if the shm buffer len is not large enough to contain
|
# if the shm buffer len is not large enough to contain
|
||||||
|
@ -908,6 +960,8 @@ async def tsdb_backfill(
|
||||||
f'{pformat(config)}\n'
|
f'{pformat(config)}\n'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# concurrently load the provider's most-recent-frame AND any
|
||||||
|
# pre-existing tsdb history already saved in `piker` storage.
|
||||||
dt_eps: list[DateTime, DateTime] = []
|
dt_eps: list[DateTime, DateTime] = []
|
||||||
async with trio.open_nursery() as tn:
|
async with trio.open_nursery() as tn:
|
||||||
tn.start_soon(
|
tn.start_soon(
|
||||||
|
@ -918,7 +972,6 @@ async def tsdb_backfill(
|
||||||
timeframe,
|
timeframe,
|
||||||
config,
|
config,
|
||||||
)
|
)
|
||||||
|
|
||||||
tsdb_entry: tuple = await load_tsdb_hist(
|
tsdb_entry: tuple = await load_tsdb_hist(
|
||||||
storage,
|
storage,
|
||||||
mkt,
|
mkt,
|
||||||
|
@ -947,6 +1000,25 @@ async def tsdb_backfill(
|
||||||
mr_end_dt,
|
mr_end_dt,
|
||||||
) = dt_eps
|
) = dt_eps
|
||||||
|
|
||||||
|
first_frame_dur_s: Duration = (mr_end_dt - mr_start_dt).seconds
|
||||||
|
calced_frame_size: Duration = mk_duration(
|
||||||
|
seconds=first_frame_dur_s,
|
||||||
|
)
|
||||||
|
# NOTE, attempt to use the backend declared default frame
|
||||||
|
# sizing (as allowed by their time-series query APIs) and
|
||||||
|
# if not provided try to construct a default from the
|
||||||
|
# first frame received above.
|
||||||
|
def_frame_durs: dict[
|
||||||
|
int,
|
||||||
|
Duration,
|
||||||
|
]|None = config.get('frame_types', None)
|
||||||
|
if def_frame_durs:
|
||||||
|
def_frame_size: Duration = def_frame_durs[timeframe]
|
||||||
|
assert def_frame_size == calced_frame_size
|
||||||
|
else:
|
||||||
|
# use what we calced from first frame above.
|
||||||
|
def_frame_size = calced_frame_size
|
||||||
|
|
||||||
# NOTE: when there's no offline data, there's 2 cases:
|
# NOTE: when there's no offline data, there's 2 cases:
|
||||||
# - data backend doesn't support timeframe/sample
|
# - data backend doesn't support timeframe/sample
|
||||||
# period (in which case `dt_eps` should be `None` and
|
# period (in which case `dt_eps` should be `None` and
|
||||||
|
@ -977,7 +1049,7 @@ async def tsdb_backfill(
|
||||||
partial(
|
partial(
|
||||||
start_backfill,
|
start_backfill,
|
||||||
get_hist=get_hist,
|
get_hist=get_hist,
|
||||||
frame_types=config.get('frame_types', None),
|
def_frame_duration=def_frame_size,
|
||||||
mod=mod,
|
mod=mod,
|
||||||
mkt=mkt,
|
mkt=mkt,
|
||||||
shm=shm,
|
shm=shm,
|
||||||
|
|
|
@ -616,6 +616,18 @@ def detect_price_gaps(
|
||||||
# ])
|
# ])
|
||||||
...
|
...
|
||||||
|
|
||||||
|
# TODO: probably just use the null_segs impl above?
|
||||||
|
def detect_vlm_gaps(
|
||||||
|
df: pl.DataFrame,
|
||||||
|
col: str = 'volume',
|
||||||
|
|
||||||
|
) -> pl.DataFrame:
|
||||||
|
|
||||||
|
vnull: pl.DataFrame = w_dts.filter(
|
||||||
|
pl.col(col) == 0
|
||||||
|
)
|
||||||
|
return vnull
|
||||||
|
|
||||||
|
|
||||||
def dedupe(
|
def dedupe(
|
||||||
src_df: pl.DataFrame,
|
src_df: pl.DataFrame,
|
||||||
|
@ -626,7 +638,6 @@ def dedupe(
|
||||||
|
|
||||||
) -> tuple[
|
) -> tuple[
|
||||||
pl.DataFrame, # with dts
|
pl.DataFrame, # with dts
|
||||||
pl.DataFrame, # gaps
|
|
||||||
pl.DataFrame, # with deduplicated dts (aka gap/repeat removal)
|
pl.DataFrame, # with deduplicated dts (aka gap/repeat removal)
|
||||||
int, # len diff between input and deduped
|
int, # len diff between input and deduped
|
||||||
]:
|
]:
|
||||||
|
@ -639,19 +650,22 @@ def dedupe(
|
||||||
'''
|
'''
|
||||||
wdts: pl.DataFrame = with_dts(src_df)
|
wdts: pl.DataFrame = with_dts(src_df)
|
||||||
|
|
||||||
# maybe sort on any time field
|
deduped = wdts
|
||||||
if sort:
|
|
||||||
wdts = wdts.sort(by='time')
|
|
||||||
# TODO: detect out-of-order segments which were corrected!
|
|
||||||
# -[ ] report in log msg
|
|
||||||
# -[ ] possibly return segment sections which were moved?
|
|
||||||
|
|
||||||
# remove duplicated datetime samples/sections
|
# remove duplicated datetime samples/sections
|
||||||
deduped: pl.DataFrame = wdts.unique(
|
deduped: pl.DataFrame = wdts.unique(
|
||||||
subset=['dt'],
|
# subset=['dt'],
|
||||||
|
subset=['time'],
|
||||||
maintain_order=True,
|
maintain_order=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# maybe sort on any time field
|
||||||
|
if sort:
|
||||||
|
deduped = deduped.sort(by='time')
|
||||||
|
# TODO: detect out-of-order segments which were corrected!
|
||||||
|
# -[ ] report in log msg
|
||||||
|
# -[ ] possibly return segment sections which were moved?
|
||||||
|
|
||||||
diff: int = (
|
diff: int = (
|
||||||
wdts.height
|
wdts.height
|
||||||
-
|
-
|
||||||
|
|
Loading…
Reference in New Issue