From f5d4f58610c9526ed1e67d48b3eac6852b26c6fd Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 7 Jul 2023 22:22:06 -0400 Subject: [PATCH] `Account` api update and refine Rename `open_pps()` -> `open_account()` for obvious reasons as well as expect a bit tighter integration with `SymbologyCache` and consequently `LedgerTransaction` in order to drop `Transaction.sym: MktPair` dependence when compiling / allocating new `Position`s from a ledger. Also we drop a bunch of prior attrs and do some cleaning, - `Position.first_clear_dt` we no longer sort during insert. - `._clears` now replaces by `._events` table. - drop the now masked `.ensure_state()` method (eventually moved to `.calc` submod for maybe-later-use). - drop `.sym=` from all remaining txns init calls. - clean out the `Position.add_clear()` method and only add the provided txn directly to the `._events` table. Improve some `Account` docs and interface: - fill out the main type descr. - add the backend broker modules as `Account.mod` allowing to drop `.brokername` as input and instead wrap as a `@property`. - make `.update_from_trans()` now a new `.update_from_ledger()` and expect either of a `TransactionLedger` (user-dict) or a dict of txns; in the latter case if we have not been also passed a symcache as input then runtime error since the symcache is necessary to allocate positions. - also, delegate to `TransactionLedger.iter_txns()` instead of a manual datetime sorted iter-loop. - drop all the clears datetime don't-insert-if-earlier-then-first logic. - rename `.to_toml()` -> `.prep_toml()`. - drop old `PpTable` alias. - rename `load_pps_from_ledger()` -> `load_account_from_ledger()` and make it only deliver the account instance and also move out all the `polars.DataFrame` related stuff (to `.calc`). And tweak some account clears table formatting, - store datetimes as TOML native equivs. - drop `be_price` fixing. - obvsly drop `.ensure_state()` call to pps. --- piker/accounting/_pos.py | 394 +++++++++++++++------------------------ 1 file changed, 148 insertions(+), 246 deletions(-) diff --git a/piker/accounting/_pos.py b/piker/accounting/_pos.py index 65eb67a8..fbb6997f 100644 --- a/piker/accounting/_pos.py +++ b/piker/accounting/_pos.py @@ -22,18 +22,17 @@ that doesn't try to cuk most humans who prefer to not lose their moneys.. ''' from __future__ import annotations -# from bisect import insort from contextlib import contextmanager as cm from decimal import Decimal from pprint import pformat from pathlib import Path +from types import ModuleType from typing import ( Any, Iterator, Generator ) -import polars as pl import pendulum from pendulum import ( datetime, @@ -43,7 +42,6 @@ import tomlkit from ._ledger import ( Transaction, - open_trade_ledger, TransactionLedger, ) from ._mktinfo import ( @@ -60,6 +58,7 @@ from ..clearing._messages import ( BrokerdPosition, ) from ..data.types import Struct +from ..data._symcache import SymbologyCache from ..log import get_logger log = get_logger(__name__) @@ -105,19 +104,12 @@ class Position(Struct): split_ratio: int | None = None - # ordered record of known constituent trade messages - _clears: list[ - dict[str, Any], # transaction history summaries - ] = [] - - # _events: pl.DataFrame | None = None + # TODO: use a `pl.DataFrame` intead? _events: dict[str, Transaction | dict] = {} - # first_clear_dt: datetime | None = None - @property def expiry(self) -> datetime | None: - exp: str = self.mkt.expiry + exp: str = self.mkt.expiry.lower() match exp: # empty str, 'perp' (contract) or simply a null # signifies instrument with NO expiry. @@ -188,7 +180,7 @@ class Position(Struct): ''' # scan for the last "net zero" position by iterating - # transactions until the next net-zero accum_size, rinse, + # transactions until the next net-zero cumsize, rinse, # repeat. cumsize: float = 0 clears_since_zero: list[dict] = [] @@ -223,6 +215,7 @@ class Position(Struct): ''' mkt: MktPair = self.mkt assert isinstance(mkt, MktPair) + # TODO: we need to figure out how to have one top level # listing venue here even when the backend isn't providing # it via the trades ledger.. @@ -239,16 +232,19 @@ class Position(Struct): asdict: dict[str, Any] = { 'bs_mktid': self.bs_mktid, - 'expiry': self.expiry or '', + # 'expiry': self.expiry or '', 'asset_type': asset_type, 'price_tick': mkt.price_tick, 'size_tick': mkt.size_tick, } - if exp := self.expiry: asdict['expiry'] = exp clears_since_zero: list[dict] = self.minimized_clears() + + # setup a "multi-line array of inline tables" which we call + # the "clears table", contained by each position entry in + # an "account file". clears_table: tomlkit.Array = tomlkit.array() clears_table.multiline( multiline=True, @@ -267,69 +263,21 @@ class Position(Struct): for k in ['price', 'size', 'cost']: inline_table[k] = entry[k] - # serialize datetime to parsable `str` - inline_table['dt'] = entry['dt']#.isoformat('T') - # assert 'Datetime' not in inline_table['dt'] + # NOTE: we don't actually need to serialize datetime to parsable `str` + # since `tomlkit` supports a native `DateTime` but + # seems like we're not doing it entirely in clearing + # tables yet? + inline_table['dt'] = entry['dt'] # .isoformat('T') tid: str = entry['tid'] inline_table['tid'] = tid clears_table.append(inline_table) - # if val < 0: - # breakpoint() # assert not events asdict['clears'] = clears_table return fqme, asdict - # def ensure_state(self) -> None: - # ''' - # Audit either the `.cumsize` and `.ppu` local instance vars against - # the clears table calculations and return the calc-ed values if - # they differ and log warnings to console. - - # ''' - # # clears: list[dict] = self._clears - - # # self.first_clear_dt = min(clears, key=lambda e: e['dt'])['dt'] - # last_clear: dict = clears[-1] - # csize: float = self.calc_size() - # accum: float = last_clear['accum_size'] - - # if not self.expired(): - # if ( - # csize != accum - # and csize != round(accum * (self.split_ratio or 1)) - # ): - # raise ValueError(f'Size mismatch: {csize}') - # else: - # assert csize == 0, 'Contract is expired but non-zero size?' - - # if self.cumsize != csize: - # log.warning( - # 'Position state mismatch:\n' - # f'{self.cumsize} => {csize}' - # ) - # self.cumsize = csize - - # cppu: float = self.calc_ppu() - # ppu: float = last_clear['ppu'] - # if ( - # cppu != ppu - # and self.split_ratio is not None - - # # handle any split info entered (for now) manually by user - # and cppu != (ppu / self.split_ratio) - # ): - # raise ValueError(f'PPU mismatch: {cppu}') - - # if self.ppu != cppu: - # log.warning( - # 'Position state mismatch:\n' - # f'{self.ppu} => {cppu}' - # ) - # self.ppu = cppu - def update_from_msg( self, msg: BrokerdPosition, @@ -337,12 +285,13 @@ class Position(Struct): ) -> None: mkt: MktPair = self.mkt - # we summarize the pos with a single summary transaction - # (for now) until we either pass THIS type as msg directly - # from emsd or come up with a better way? + + # NOTE WARNING XXX: we summarize the pos with a single + # summary transaction (for now) until we either pass THIS + # type as msg directly from emsd or come up with a better + # way? t = Transaction( - fqme=mkt.bs_mktid, - sym=mkt, + fqme=mkt.fqme, bs_mktid=mkt.bs_mktid, tid='unknown', size=msg['size'], @@ -357,15 +306,16 @@ class Position(Struct): @property def dsize(self) -> float: ''' - The "dollar" size of the pp, normally in trading (fiat) unit - terms. + The "dollar" size of the pp, normally in source asset + (fiat) units. ''' return self.ppu * self.size def expired(self) -> bool: ''' - Predicate which checks if the contract/instrument is past its expiry. + Predicate which checks if the contract/instrument is past + its expiry. ''' return bool(self.expiry) and self.expiry < now() @@ -388,36 +338,23 @@ class Position(Struct): log.warning(f'{t} is already added?!') return added - # clear: dict[str, float | str | int] = { - # 'tid': t.tid, - # 'cost': t.cost, - # 'price': t.price, - # 'size': t.size, - # 'dt': t.dt - # } - self._events[tid] = t - return True + # TODO: apparently this IS possible with a dict but not + # common and probably not that beneficial unless we're also + # going to do cum-calcs on each insert? + # https://stackoverflow.com/questions/38079171/python-insert-new-element-into-sorted-list-of-dictionaries + # from bisect import insort # insort( # self._clears, # clear, # key=lambda entry: entry['dt'] # ) + self._events[tid] = t + return True - # TODO: compute these incrementally instead - # of re-looping through each time resulting in O(n**2) - # behaviour..? - - # NOTE: we compute these **after** adding the entry in order to - # make the recurrence relation math work inside - # ``.calc_size()``. - # self.size = clear['accum_size'] = self.calc_size() - # self.ppu = clear['ppu'] = self.calc_ppu() - # self.size: float = self.calc_size() - # self.ppu: float = self.calc_ppu() - - # assert len(self._events) == len(self._clears) - # return clear - + # TODO: compute these incrementally instead + # of re-looping through each time resulting in O(n**2) + # behaviour..? Can we have some kinda clears len to cached + # output subsys? def calc_ppu(self) -> float: return ppu(self.iter_by_type('clear')) @@ -487,20 +424,50 @@ class Position(Struct): class Account(Struct): + ''' + The real-time (double-entry accounting) state of + a given **asset ownership tracking system**, normally offered + or measured from some brokerage, CEX or (implied virtual) + summary crypto$ "wallets" aggregated and tracked over some set + of DEX-es. - brokername: str + Both market-mapped and ledger-system-native (aka inter-account + "transfers") transactions are accounted and they pertain to + (implied) PnL relatve to any other accountable asset. + + More specifically in piker terms, an account tracks all of: + + - the *balances* of all assets currently available for use either + in (future) market or (inter-account/wallet) transfer + transactions. + - a transaction *ledger* from a given brokerd backend whic + is a recording of all (know) such transactions from the past. + - a set of financial *positions* as measured from the current + ledger state. + + See the semantic origins from double-bookeeping: + https://en.wikipedia.org/wiki/Double-entry_bookkeeping + + ''' + mod: ModuleType acctid: str pps: dict[str, Position] + conf_path: Path conf: dict | None = {} # TODO: track a table of asset balances as `.balances: # dict[Asset, float]`? - def update_from_trans( + @property + def brokername(self) -> str: + return self.mod.name + + def update_from_ledger( self, - trans: dict[str, Transaction], + ledger: TransactionLedger, cost_scalar: float = 2, + symcache: SymbologyCache | None = None, ) -> dict[str, Position]: ''' @@ -509,24 +476,36 @@ class Account(Struct): accumulative size for each entry. ''' + if ( + not isinstance(ledger, TransactionLedger) + and symcache is None + ): + raise RuntimeError( + 'No ledger provided!\n' + 'We can not determine the `MktPair`s without a symcache..\n' + 'Please provide `symcache: SymbologyCache` when ' + 'processing NEW positions!' + ) + pps = self.pps updated: dict[str, Position] = {} # lifo update all pps from records, ensuring # we compute the PPU and size sorted in time! - for t in sorted( - trans.values(), - key=lambda t: t.dt, - # reverse=True, - ): - fqme: str = t.fqme - bs_mktid: str = t.bs_mktid + for tid, txn in ledger.iter_txns(): + # for t in sorted( + # trans.values(), + # key=lambda t: t.dt, + # ): + fqme: str = txn.fqme + bs_mktid: str = txn.bs_mktid # template the mkt-info presuming a legacy market ticks # if no info exists in the transactions.. - mkt: MktPair = t.sys + mkt: MktPair = ledger._symcache.mktmaps[fqme] if not (pos := pps.get(bs_mktid)): + # if no existing pos, allocate fresh one. pos = pps[bs_mktid] = Position( mkt=mkt, @@ -541,33 +520,16 @@ class Account(Struct): if len(pos.mkt.fqme) < len(fqme): pos.mkt = mkt - # clears: list[dict] = pos._clears - # if clears: - # # first_clear_dt = pos.first_clear_dt - - # # don't do updates for ledger records we already have - # # included in the current pps state. - # if ( - # t.tid in clears - # # or ( - # # first_clear_dt - # # and t.dt < first_clear_dt - # # ) - # ): - # # NOTE: likely you'll see repeats of the same - # # ``Transaction`` passed in here if/when you are restarting - # # a ``brokerd.ib`` where the API will re-report trades from - # # the current session, so we need to make sure we don't - # # "double count" these in pp calculations. - # continue - - # update clearing table - pos.add_clear(t) - updated[t.bs_mktid] = pos - - # re-calc ppu and accumulative sizing. - # for bs_mktid, pos in updated.items(): - # pos.ensure_state() + # update clearing table! + # NOTE: likely you'll see repeats of the same + # ``Transaction`` passed in here if/when you are restarting + # a ``brokerd.ib`` where the API will re-report trades from + # the current session, so we need to make sure we don't + # "double count" these in pp calculations; + # `Position.add_clear()` stores txs in a `dict[tid, + # tx]` which should always ensure this is true B) + pos.add_clear(txn) + updated[txn.bs_mktid] = pos # NOTE: deliver only the position entries that were # actually updated (modified the state) from the input @@ -614,7 +576,7 @@ class Account(Struct): return open_pp_objs, closed_pp_objs - def to_toml( + def prep_toml( self, active: dict[str, Position] | None = None, @@ -629,12 +591,12 @@ class Account(Struct): pos: Position for bs_mktid, pos in active.items(): - # NOTE: we only store the minimal amount of clears that make up this - # position since the last net-zero state. - # pos.minimize_clears() # pos.ensure_state() # serialize to pre-toml form + # NOTE: we only store the minimal amount of clears that + # make up this position since the last net-zero state, + # see `Position.to_pretoml()` for details fqme, asdict = pos.to_pretoml() # clears: list[dict] = asdict['clears'] @@ -650,7 +612,8 @@ class Account(Struct): def write_config(self) -> None: ''' - Write the current position table to the user's ``pps.toml``. + Write the current account state to the user's account TOML file, normally + something like ``pps.toml``. ''' # TODO: show diff output? @@ -658,7 +621,7 @@ class Account(Struct): # active, closed_pp_objs = table.dump_active() active, closed = self.dump_active() - pp_entries = self.to_toml(active=active) + pp_entries = self.prep_toml(active=active) if pp_entries: log.info( f'Updating positions in ``{self.conf_path}``:\n' @@ -705,24 +668,12 @@ class Account(Struct): # super weird --1 thing going on for cumsize!?1! # NOTE: the fix was to always float() the size value loaded # in open_pps() below! - - # confclears = self.conf["tsla.nasdaq.ib"]['clears'] - # firstcum = confclears[0]['cumsize'] - # if firstcum: - # breakpoint() - config.write( config=self.conf, path=self.conf_path, fail_empty=False, ) - # breakpoint() - - -# TODO: move over all broker backend usage to new name.. -PpTable = Account - def load_account( brokername: str, @@ -784,12 +735,12 @@ def load_account( @cm -def open_pps( +def open_account( brokername: str, acctid: str, write_on_exit: bool = False, -) -> Generator[PpTable, None, None]: +) -> Generator[Account, None, None]: ''' Read out broker-specific position entries from incremental update file: ``pps.toml``. @@ -820,10 +771,12 @@ def open_pps( # engine proc if we decide to always spawn it?), # - do diffs against updates from the ledger writer # actor and the in-mem state here? + from ..brokers import get_brokermod + mod: ModuleType = get_brokermod(brokername) - pp_objs = {} - table = PpTable( - brokername, + pp_objs: dict[str, Position] = {} + table = Account( + mod, acctid, pp_objs, conf_path, @@ -831,12 +784,10 @@ def open_pps( ) # unmarshal/load ``pps.toml`` config entries into object form - # and update `PpTable` obj entries. + # and update `Account` obj entries. for fqme, entry in conf.items(): - # atype = entry.get('asset_type', '') - - # unique broker market id + # unique broker-backend-system market id bs_mktid = str( entry.get('bsuid') or entry.get('bs_mktid') @@ -860,7 +811,7 @@ def open_pps( fqme, price_tick=price_tick, size_tick=size_tick, - bs_mktid=bs_mktid + bs_mktid=bs_mktid, ) # TODO: RE: general "events" instead of just "clears": @@ -875,6 +826,7 @@ def open_pps( # for toml re-presentation) back into a master table. toml_clears_list: list[dict[str, Any]] = entry['clears'] trans: list[Transaction] = [] + for clears_table in toml_clears_list: tid = clears_table['tid'] dt: tomlkit.items.DateTime | str = clears_table['dt'] @@ -887,23 +839,18 @@ def open_pps( clears_table['dt'] = dt trans.append(Transaction( fqme=bs_mktid, - sym=mkt, + # sym=mkt, bs_mktid=bs_mktid, tid=tid, + # XXX: not sure why sometimes these are loaded as + # `tomlkit.Integer` and are eventually written with + # an extra `-` in front like `--1`? size=float(clears_table['size']), price=float(clears_table['price']), cost=clears_table['cost'], dt=dt, )) - # size = entry['size'] - - # # TODO: remove but, handle old field name for now - # ppu = entry.get( - # 'ppu', - # entry.get('be_price', 0), - # ) - split_ratio = entry.get('split_ratio') # if a string-ified expiry field is loaded we try to parse @@ -929,9 +876,6 @@ def open_pps( for t in trans: pp.add_clear(t) - # audit entries loaded from toml - # pp.ensure_state() - try: yield table finally: @@ -939,7 +883,21 @@ def open_pps( table.write_config() -def load_pps_from_ledger( +# TODO: drop the old name and THIS! +@cm +def open_pps( + *args, + **kwargs, +) -> Generator[Account, None, None]: + log.warning( + '`open_pps()` is now deprecated!\n' + 'Please use `with open_account() as cnt:`' + ) + with open_account(*args, **kwargs) as acnt: + yield acnt + + +def load_account_from_ledger( brokername: str, acctname: str, @@ -947,10 +905,9 @@ def load_pps_from_ledger( # post normalization filter on ledger entries to be processed filter_by_ids: dict[str, list[str]] | None = None, -) -> tuple[ - pl.DataFrame, - PpTable, -]: + ledger: TransactionLedger | None = None, + +) -> Account: ''' Open a ledger file by broker name and account and read in and process any trade records into our normalized ``Transaction`` form @@ -958,67 +915,12 @@ def load_pps_from_ledger( bs_mktid-mapped dict-sets of the transactions and pps. ''' - ledger: TransactionLedger - table: PpTable - with ( - open_trade_ledger(brokername, acctname) as ledger, - open_pps(brokername, acctname) as table, - ): - if not ledger: - # null case, no ledger file with content - return {} + acnt: Account + with open_pps( + brokername, + acctname, + ) as acnt: + if ledger is not None: + acnt.update_from_ledger(ledger) - from ..brokers import get_brokermod - mod = get_brokermod(brokername) - src_records: dict[str, Transaction] = mod.norm_trade_records( - ledger - ) - table.update_from_trans(src_records) - - fdf = df = pl.DataFrame( - list(rec.to_dict() for rec in src_records.values()), - # schema=[ - # ('tid', str), - # ('fqme', str), - # ('dt', str), - # ('size', pl.Float64), - # ('price', pl.Float64), - # ('cost', pl.Float64), - # ('expiry', str), - # ('bs_mktid', str), - # ], - ).sort('dt').select([ - pl.col('fqme'), - pl.col('dt').str.to_datetime(), - # pl.col('expiry').dt.datetime(), - pl.col('bs_mktid'), - pl.col('size'), - pl.col('price'), - ]) - # ppt = df.groupby('fqme').agg([ - # # TODO: ppu and bep !! - # pl.cumsum('size').alias('cumsum'), - # ]) - acts = df.partition_by('fqme', as_dict=True) - # ppt: dict[str, pl.DataFrame] = {} - # for fqme, ppt in act.items(): - # ppt.with_columuns - # # TODO: ppu and bep !! - # pl.cumsum('size').alias('cumsum'), - # ]) - - # filter out to the columns matching values filter passed - # as input. - if filter_by_ids: - for col, vals in filter_by_ids.items(): - str_vals = set(map(str, vals)) - pred: pl.Expr = pl.col(col).eq(str_vals.pop()) - for val in str_vals: - pred |= pl.col(col).eq(val) - - fdf = df.filter(pred) - - bs_mktid: str = fdf[0]['bs_mktid'] - # pos: Position = table.pps[bs_mktid] - - return fdf, acts, table + return acnt