`Account` api update and refine

Rename `open_pps()` -> `open_account()` for obvious reasons as well as expect a bit tighter integration with `SymbologyCache` and consequently `LedgerTransaction` in order to drop `Transaction.sym: MktPair` dependence when compiling / allocating new `Position`s from a ledger. Also we drop a bunch of prior attrs and do some cleaning, - `Position.first_clear_dt` we no longer sort during insert. - `._clears` now replaces by `._events` table. - drop the now masked `.ensure_state()` method (eventually moved to `.calc` submod for maybe-later-use). - drop `.sym=` from all remaining txns init calls. - clean out the `Position.add_clear()` method and only add the provided txn directly to the `._events` table. Improve some `Account` docs and interface: - fill out the main type descr. - add the backend broker modules as `Account.mod` allowing to drop `.brokername` as input and instead wrap as a `@property`. - make `.update_from_trans()` now a new `.update_from_ledger()` and expect either of a `TransactionLedger` (user-dict) or a dict of txns; in the latter case if we have not been also passed a symcache as input then runtime error since the symcache is necessary to allocate positions. - also, delegate to `TransactionLedger.iter_txns()` instead of a manual datetime sorted iter-loop. - drop all the clears datetime don't-insert-if-earlier-then-first logic. - rename `.to_toml()` -> `.prep_toml()`. - drop old `PpTable` alias. - rename `load_pps_from_ledger()` -> `load_account_from_ledger()` and make it only deliver the account instance and also move out all the `polars.DataFrame` related stuff (to `.calc`). And tweak some account clears table formatting, - store datetimes as TOML native equivs. - drop `be_price` fixing. - obvsly drop `.ensure_state()` call to pps.
2023-07-07 22:22:06 -04:00 · 2023-07-07 22:22:06 -04:00 · f5d4f58610
parent 0e94e89373
commit f5d4f58610
1 changed files with 148 additions and 246 deletions
--- a/piker/accounting/_pos.py
+++ b/piker/accounting/_pos.py
@ -22,18 +22,17 @@ that doesn't try to cuk most humans who prefer to not lose their moneys..

 '''
 from __future__ import annotations
-# from bisect import insort
 from contextlib import contextmanager as cm
 from decimal import Decimal
 from pprint import pformat
 from pathlib import Path
+from types import ModuleType
 from typing import (
    Any,
    Iterator,
    Generator
 )

-import polars as pl
 import pendulum
 from pendulum import (
    datetime,
@ -43,7 +42,6 @@ import tomlkit

 from ._ledger import (
    Transaction,
-    open_trade_ledger,
    TransactionLedger,
 )
 from ._mktinfo import (
@ -60,6 +58,7 @@ from ..clearing._messages import (
    BrokerdPosition,
 )
 from ..data.types import Struct
+from ..data._symcache import SymbologyCache
 from ..log import get_logger

 log = get_logger(__name__)
@ -105,19 +104,12 @@ class Position(Struct):

    split_ratio: int | None = None

-    # ordered record of known constituent trade messages
-    _clears: list[
-        dict[str, Any],  # transaction history summaries
-    ] = []
-
-    # _events: pl.DataFrame | None = None
+    # TODO: use a `pl.DataFrame` intead?
    _events: dict[str, Transaction | dict] = {}

-    # first_clear_dt: datetime | None = None
-
    @property
    def expiry(self) -> datetime | None:
-        exp: str = self.mkt.expiry
+        exp: str = self.mkt.expiry.lower()
        match exp:
            # empty str, 'perp' (contract) or simply a null
            # signifies instrument with NO expiry.
@ -188,7 +180,7 @@ class Position(Struct):

        '''
        # scan for the last "net zero" position by iterating
-        # transactions until the next net-zero accum_size, rinse,
+        # transactions until the next net-zero cumsize, rinse,
        # repeat.
        cumsize: float = 0
        clears_since_zero: list[dict] = []
@ -223,6 +215,7 @@ class Position(Struct):
        '''
        mkt: MktPair = self.mkt
        assert isinstance(mkt, MktPair)
+
        # TODO: we need to figure out how to have one top level
        # listing venue here even when the backend isn't providing
        # it via the trades ledger..
@ -239,16 +232,19 @@ class Position(Struct):

        asdict: dict[str, Any] = {
            'bs_mktid': self.bs_mktid,
-            'expiry': self.expiry or '',
+            # 'expiry': self.expiry or '',
            'asset_type': asset_type,
            'price_tick': mkt.price_tick,
            'size_tick': mkt.size_tick,
        }
-
        if exp := self.expiry:
            asdict['expiry'] = exp

        clears_since_zero: list[dict] = self.minimized_clears()
+
+        # setup a "multi-line array of inline tables" which we call
+        # the "clears table", contained by each position entry in
+        # an "account file".
        clears_table: tomlkit.Array = tomlkit.array()
        clears_table.multiline(
            multiline=True,
@ -267,69 +263,21 @@ class Position(Struct):
            for k in ['price', 'size', 'cost']:
                inline_table[k] = entry[k]

-            # serialize datetime to parsable `str`
-            inline_table['dt'] = entry['dt']#.isoformat('T')
-            # assert 'Datetime' not in inline_table['dt']
+            # NOTE: we don't actually need to serialize datetime to parsable `str`
+            # since `tomlkit` supports a native `DateTime` but
+            # seems like we're not doing it entirely in clearing
+            # tables yet?
+            inline_table['dt'] = entry['dt']  # .isoformat('T')

            tid: str = entry['tid']
            inline_table['tid'] = tid
            clears_table.append(inline_table)
-            # if val < 0:
-            #     breakpoint()

        # assert not events
        asdict['clears'] = clears_table

        return fqme, asdict

-    # def ensure_state(self) -> None:
-    #     '''
-    #     Audit either the `.cumsize` and `.ppu` local instance vars against
-    #     the clears table calculations and return the calc-ed values if
-    #     they differ and log warnings to console.
-
-    #     '''
-    #     # clears: list[dict] = self._clears
-
-    #     # self.first_clear_dt = min(clears, key=lambda e: e['dt'])['dt']
-    #     last_clear: dict = clears[-1]
-    #     csize: float = self.calc_size()
-    #     accum: float = last_clear['accum_size']
-
-    #     if not self.expired():
-    #         if (
-    #             csize != accum
-    #             and csize != round(accum * (self.split_ratio or 1))
-    #         ):
-    #             raise ValueError(f'Size mismatch: {csize}')
-    #     else:
-    #         assert csize == 0, 'Contract is expired but non-zero size?'
-
-    #     if self.cumsize != csize:
-    #         log.warning(
-    #             'Position state mismatch:\n'
-    #             f'{self.cumsize} => {csize}'
-    #         )
-    #         self.cumsize = csize
-
-    #     cppu: float = self.calc_ppu()
-    #     ppu: float = last_clear['ppu']
-    #     if (
-    #         cppu != ppu
-    #         and self.split_ratio is not None
-
-    #         # handle any split info entered (for now) manually by user
-    #         and cppu != (ppu / self.split_ratio)
-    #     ):
-    #         raise ValueError(f'PPU mismatch: {cppu}')
-
-    #     if self.ppu != cppu:
-    #         log.warning(
-    #             'Position state mismatch:\n'
-    #             f'{self.ppu} => {cppu}'
-    #         )
-    #         self.ppu = cppu
-
    def update_from_msg(
        self,
        msg: BrokerdPosition,
@ -337,12 +285,13 @@ class Position(Struct):
    ) -> None:

        mkt: MktPair = self.mkt
-        # we summarize the pos with a single summary transaction
-        # (for now) until we either pass THIS type as msg directly
-        # from emsd or come up with a better way?
+
+        # NOTE WARNING XXX: we summarize the pos with a single
+        # summary transaction (for now) until we either pass THIS
+        # type as msg directly from emsd or come up with a better
+        # way?
        t = Transaction(
-            fqme=mkt.bs_mktid,
-            sym=mkt,
+            fqme=mkt.fqme,
            bs_mktid=mkt.bs_mktid,
            tid='unknown',
            size=msg['size'],
@ -357,15 +306,16 @@ class Position(Struct):
    @property
    def dsize(self) -> float:
        '''
-        The "dollar" size of the pp, normally in trading (fiat) unit
-        terms.
+        The "dollar" size of the pp, normally in source asset
+        (fiat) units.

        '''
        return self.ppu * self.size

    def expired(self) -> bool:
        '''
-        Predicate which checks if the contract/instrument is past its expiry.
+        Predicate which checks if the contract/instrument is past
+        its expiry.

        '''
        return bool(self.expiry) and self.expiry < now()
@ -388,36 +338,23 @@ class Position(Struct):
            log.warning(f'{t} is already added?!')
            return added

-        # clear: dict[str, float | str | int] = {
-        #     'tid': t.tid,
-        #     'cost': t.cost,
-        #     'price': t.price,
-        #     'size': t.size,
-        #     'dt': t.dt
-        # }
-        self._events[tid] = t
-        return True
+        # TODO: apparently this IS possible with a dict but not
+        # common and probably not that beneficial unless we're also
+        # going to do cum-calcs on each insert?
+        # https://stackoverflow.com/questions/38079171/python-insert-new-element-into-sorted-list-of-dictionaries
+        # from bisect import insort
        # insort(
        #     self._clears,
        #     clear,
        #     key=lambda entry: entry['dt']
        # )
+        self._events[tid] = t
+        return True

    # TODO: compute these incrementally instead
    # of re-looping through each time resulting in O(n**2)
-        # behaviour..?
-
-        # NOTE: we compute these **after** adding the entry in order to
-        # make the recurrence relation math work inside
-        # ``.calc_size()``.
-        # self.size = clear['accum_size'] = self.calc_size()
-        # self.ppu = clear['ppu'] = self.calc_ppu()
-        # self.size: float = self.calc_size()
-        # self.ppu: float = self.calc_ppu()
-
-        # assert len(self._events) == len(self._clears)
-        # return clear
-
+    # behaviour..? Can we have some kinda clears len to cached
+    # output subsys?
    def calc_ppu(self) -> float:
        return ppu(self.iter_by_type('clear'))

@ -487,20 +424,50 @@ class Position(Struct):


 class Account(Struct):
+    '''
+    The real-time (double-entry accounting) state of
+    a given **asset ownership tracking system**, normally offered
+    or measured from some brokerage, CEX or (implied virtual)
+    summary crypto$ "wallets" aggregated and tracked over some set
+    of DEX-es.

-    brokername: str
+    Both market-mapped and ledger-system-native (aka inter-account
+    "transfers") transactions are accounted and they pertain to
+    (implied) PnL relatve to any other accountable asset.
+
+    More specifically in piker terms, an account tracks all of:
+
+    - the *balances* of all assets currently available for use either
+      in (future) market or (inter-account/wallet) transfer
+      transactions.
+    - a transaction *ledger* from a given brokerd backend whic
+      is a recording of all (know) such transactions from the past.
+    - a set of financial *positions* as measured from the current
+      ledger state.
+
+    See the semantic origins from double-bookeeping:
+    https://en.wikipedia.org/wiki/Double-entry_bookkeeping
+
+    '''
+    mod: ModuleType
    acctid: str
    pps: dict[str, Position]
+
    conf_path: Path
    conf: dict | None = {}

    # TODO: track a table of asset balances as `.balances:
    # dict[Asset, float]`?

-    def update_from_trans(
+    @property
+    def brokername(self) -> str:
+        return self.mod.name
+
+    def update_from_ledger(
        self,
-        trans: dict[str, Transaction],
+        ledger: TransactionLedger,
        cost_scalar: float = 2,
+        symcache: SymbologyCache | None = None,

    ) -> dict[str, Position]:
        '''
@ -509,24 +476,36 @@ class Account(Struct):
        accumulative size for each entry.

        '''
+        if (
+            not isinstance(ledger, TransactionLedger)
+            and symcache is None
+        ):
+            raise RuntimeError(
+                'No ledger provided!\n'
+                'We can not determine the `MktPair`s without a symcache..\n'
+                'Please provide `symcache: SymbologyCache` when '
+                'processing NEW positions!'
+            )
+
        pps = self.pps
        updated: dict[str, Position] = {}

        # lifo update all pps from records, ensuring
        # we compute the PPU and size sorted in time!
-        for t in sorted(
-            trans.values(),
-            key=lambda t: t.dt,
-            # reverse=True,
-        ):
-            fqme: str = t.fqme
-            bs_mktid: str = t.bs_mktid
+        for tid, txn in ledger.iter_txns():
+        # for t in sorted(
+        #     trans.values(),
+        #     key=lambda t: t.dt,
+        # ):
+            fqme: str = txn.fqme
+            bs_mktid: str = txn.bs_mktid

            # template the mkt-info presuming a legacy market ticks
            # if no info exists in the transactions..
-            mkt: MktPair = t.sys
+            mkt: MktPair = ledger._symcache.mktmaps[fqme]

            if not (pos := pps.get(bs_mktid)):
+
                # if no existing pos, allocate fresh one.
                pos = pps[bs_mktid] = Position(
                    mkt=mkt,
@ -541,33 +520,16 @@ class Account(Struct):
                if len(pos.mkt.fqme) < len(fqme):
                    pos.mkt = mkt

-            # clears: list[dict] = pos._clears
-            # if clears:
-            #     # first_clear_dt = pos.first_clear_dt
-
-            #     # don't do updates for ledger records we already have
-            #     # included in the current pps state.
-            #     if (
-            #         t.tid in clears
-            #         # or (
-            #         #     first_clear_dt
-            #         #     and t.dt < first_clear_dt
-            #         # )
-            #     ):
-            #         # NOTE: likely you'll see repeats of the same
-            #         # ``Transaction`` passed in here if/when you are restarting
-            #         # a ``brokerd.ib`` where the API will re-report trades from
-            #         # the current session, so we need to make sure we don't
-            #         # "double count" these in pp calculations.
-            #         continue
-
-            # update clearing table
-            pos.add_clear(t)
-            updated[t.bs_mktid] = pos
-
-        # re-calc ppu and accumulative sizing.
-        # for bs_mktid, pos in updated.items():
-        #     pos.ensure_state()
+            # update clearing table!
+            # NOTE: likely you'll see repeats of the same
+            # ``Transaction`` passed in here if/when you are restarting
+            # a ``brokerd.ib`` where the API will re-report trades from
+            # the current session, so we need to make sure we don't
+            # "double count" these in pp calculations;
+            # `Position.add_clear()` stores txs in a `dict[tid,
+            # tx]` which should always ensure this is true B)
+            pos.add_clear(txn)
+            updated[txn.bs_mktid] = pos

        # NOTE: deliver only the position entries that were
        # actually updated (modified the state) from the input
@ -614,7 +576,7 @@ class Account(Struct):

        return open_pp_objs, closed_pp_objs

-    def to_toml(
+    def prep_toml(
        self,
        active: dict[str, Position] | None = None,

@ -629,12 +591,12 @@ class Account(Struct):

        pos: Position
        for bs_mktid, pos in active.items():
-            # NOTE: we only store the minimal amount of clears that make up this
-            # position since the last net-zero state.
-            # pos.minimize_clears()
            # pos.ensure_state()

            # serialize to pre-toml form
+            # NOTE: we only store the minimal amount of clears that
+            # make up this position since the last net-zero state,
+            # see `Position.to_pretoml()` for details
            fqme, asdict = pos.to_pretoml()

            # clears: list[dict] = asdict['clears']
@ -650,7 +612,8 @@ class Account(Struct):

    def write_config(self) -> None:
        '''
-        Write the current position table to the user's ``pps.toml``.
+        Write the current account state to the user's account TOML file, normally
+        something like ``pps.toml``.

        '''
        # TODO: show diff output?
@ -658,7 +621,7 @@ class Account(Struct):
        # active, closed_pp_objs = table.dump_active()

        active, closed = self.dump_active()
-        pp_entries = self.to_toml(active=active)
+        pp_entries = self.prep_toml(active=active)
        if pp_entries:
            log.info(
                f'Updating positions in ``{self.conf_path}``:\n'
@ -705,24 +668,12 @@ class Account(Struct):
        # super weird --1 thing going on for cumsize!?1!
        # NOTE: the fix was to always float() the size value loaded
        # in open_pps() below!
-
-        # confclears = self.conf["tsla.nasdaq.ib"]['clears']
-        # firstcum = confclears[0]['cumsize']
-        # if firstcum:
-        #     breakpoint()
-
        config.write(
            config=self.conf,
            path=self.conf_path,
            fail_empty=False,
        )

-        # breakpoint()
-
-
-# TODO: move over all broker backend usage to new name..
-PpTable = Account
-

 def load_account(
    brokername: str,
@ -784,12 +735,12 @@ def load_account(


@cm
-def open_pps(
+def open_account(
    brokername: str,
    acctid: str,
    write_on_exit: bool = False,

-) -> Generator[PpTable, None, None]:
+) -> Generator[Account, None, None]:
    '''
    Read out broker-specific position entries from
    incremental update file: ``pps.toml``.
@ -820,10 +771,12 @@ def open_pps(
    #   engine proc if we decide to always spawn it?),
    # - do diffs against updates from the ledger writer
    #   actor and the in-mem state here?
+    from ..brokers import get_brokermod
+    mod: ModuleType = get_brokermod(brokername)

-    pp_objs = {}
-    table = PpTable(
-        brokername,
+    pp_objs: dict[str, Position] = {}
+    table = Account(
+        mod,
        acctid,
        pp_objs,
        conf_path,
@ -831,12 +784,10 @@ def open_pps(
    )

    # unmarshal/load ``pps.toml`` config entries into object form
-    # and update `PpTable` obj entries.
+    # and update `Account` obj entries.
    for fqme, entry in conf.items():

-        # atype = entry.get('asset_type', '<unknown>')
-
-        # unique broker market id
+        # unique broker-backend-system market id
        bs_mktid = str(
            entry.get('bsuid')
            or entry.get('bs_mktid')
@ -860,7 +811,7 @@ def open_pps(
            fqme,
            price_tick=price_tick,
            size_tick=size_tick,
-            bs_mktid=bs_mktid
+            bs_mktid=bs_mktid,
        )

        # TODO: RE: general "events" instead of just "clears":
@ -875,6 +826,7 @@ def open_pps(
        # for toml re-presentation) back into a master table.
        toml_clears_list: list[dict[str, Any]] = entry['clears']
        trans: list[Transaction] = []
+
        for clears_table in toml_clears_list:
            tid = clears_table['tid']
            dt: tomlkit.items.DateTime | str = clears_table['dt']
@ -887,23 +839,18 @@ def open_pps(
            clears_table['dt'] = dt
            trans.append(Transaction(
                fqme=bs_mktid,
-                sym=mkt,
+                # sym=mkt,
                bs_mktid=bs_mktid,
                tid=tid,
+                # XXX: not sure why sometimes these are loaded as
+                # `tomlkit.Integer` and are eventually written with
+                # an extra `-` in front like `--1`?
                size=float(clears_table['size']),
                price=float(clears_table['price']),
                cost=clears_table['cost'],
                dt=dt,
            ))

-        # size = entry['size']
-
-        # # TODO: remove but, handle old field name for now
-        # ppu = entry.get(
-        #     'ppu',
-        #     entry.get('be_price', 0),
-        # )
-
        split_ratio = entry.get('split_ratio')

        # if a string-ified expiry field is loaded we try to parse
@ -929,9 +876,6 @@ def open_pps(
        for t in trans:
            pp.add_clear(t)

-        # audit entries loaded from toml
-        # pp.ensure_state()
-
    try:
        yield table
    finally:
@ -939,7 +883,21 @@ def open_pps(
            table.write_config()


-def load_pps_from_ledger(
+# TODO: drop the old name and THIS!
+@cm
+def open_pps(
+    *args,
+    **kwargs,
+) -> Generator[Account, None, None]:
+    log.warning(
+        '`open_pps()` is now deprecated!\n'
+        'Please use `with open_account() as cnt:`'
+    )
+    with open_account(*args, **kwargs) as acnt:
+        yield acnt
+
+
+def load_account_from_ledger(

    brokername: str,
    acctname: str,
@ -947,10 +905,9 @@ def load_pps_from_ledger(
    # post normalization filter on ledger entries to be processed
    filter_by_ids: dict[str, list[str]] | None = None,

-) -> tuple[
-    pl.DataFrame,
-    PpTable,
-]:
+    ledger: TransactionLedger | None = None,
+
+) -> Account:
    '''
    Open a ledger file by broker name and account and read in and
    process any trade records into our normalized ``Transaction`` form
@ -958,67 +915,12 @@ def load_pps_from_ledger(
    bs_mktid-mapped dict-sets of the transactions and pps.

    '''
-    ledger: TransactionLedger
-    table: PpTable
-    with (
-        open_trade_ledger(brokername, acctname) as ledger,
-        open_pps(brokername, acctname) as table,
-    ):
-        if not ledger:
-            # null case, no ledger file with content
-            return {}
+    acnt: Account
+    with open_pps(
+        brokername,
+        acctname,
+    ) as acnt:
+        if ledger is not None:
+            acnt.update_from_ledger(ledger)

-        from ..brokers import get_brokermod
-        mod = get_brokermod(brokername)
-        src_records: dict[str, Transaction] = mod.norm_trade_records(
-            ledger
-        )
-        table.update_from_trans(src_records)
-
-        fdf = df = pl.DataFrame(
-            list(rec.to_dict() for rec in src_records.values()),
-            # schema=[
-            #     ('tid', str),
-            #     ('fqme', str),
-            #     ('dt', str),
-            #     ('size', pl.Float64),
-            #     ('price', pl.Float64),
-            #     ('cost', pl.Float64),
-            #     ('expiry', str),
-            #     ('bs_mktid', str),
-            # ],
-        ).sort('dt').select([
-            pl.col('fqme'),
-            pl.col('dt').str.to_datetime(),
-            # pl.col('expiry').dt.datetime(),
-            pl.col('bs_mktid'),
-            pl.col('size'),
-            pl.col('price'),
-        ])
-        # ppt = df.groupby('fqme').agg([
-        #     # TODO: ppu and bep !!
-        #     pl.cumsum('size').alias('cumsum'),
-        # ])
-        acts = df.partition_by('fqme', as_dict=True)
-        # ppt: dict[str, pl.DataFrame] = {}
-        # for fqme, ppt in act.items():
-        #     ppt.with_columuns
-        #     # TODO: ppu and bep !!
-        #     pl.cumsum('size').alias('cumsum'),
-        # ])
-
-        # filter out to the columns matching values filter passed
-        # as input.
-        if filter_by_ids:
-            for col, vals in filter_by_ids.items():
-                str_vals = set(map(str, vals))
-                pred: pl.Expr = pl.col(col).eq(str_vals.pop())
-                for val in str_vals:
-                    pred |= pl.col(col).eq(val)
-
-            fdf = df.filter(pred)
-
-            bs_mktid: str = fdf[0]['bs_mktid']
-            # pos: Position = table.pps[bs_mktid]
-
-    return fdf, acts, table
+    return acnt