Move cum-calcs to `open_ledger_dfs()`, always parse `str`->`Datetime`

Previously the cum-size calc(s) was in the `disect` CLI but it's better stuffed into the backing df converter. Also, ensure that whenever a `dt` field is type-detected as a `str` we parse it to `DateTime`.
2023-07-15 15:43:09 -04:00 · 2023-07-15 15:43:09 -04:00 · 3d20490ee5
parent 69314e9fca
commit 3d20490ee5
2 changed files with 117 additions and 97 deletions
--- a/piker/accounting/calc.py
+++ b/piker/accounting/calc.py
@ -20,6 +20,7 @@ you know when you're losing money (if possible) XD
 '''
 from __future__ import annotations
 from collections.abc import ValuesView
 from contextlib import contextmanager as cm
 from math import copysign
 from typing import (
@ -39,6 +40,7 @@ from pendulum import (
 if TYPE_CHECKING:
    from ._ledger import (
        Transaction,
        TransactionLedger,
    )
 def ppu(
@ -72,18 +74,23 @@ def ppu(
    where `cost_basis` for the current step is simply the price
    * size of the most recent clearing transaction.
    -----
    TODO: get the BEP computed and working similarly!
    -----
    the equivalent "break even price" or bep at each new clear
    event step conversely only changes when an "position exiting
    clear" which **decreases** the cumulative dst asset size:
    bep[-1] = ppu[-1] - (cum_pnl[-1] / cumsize[-1])
    '''
    asize_h: list[float] = []  # historical accumulative size
    ppu_h: list[float] = []  # historical price-per-unit
    ledger: dict[str, dict] = {}
    # entry: dict[str, Any] | Transaction
    t: Transaction
    for t in clears:
        # tid: str = entry['tid']
        # clear_size = entry['size']
        clear_size: float = t.size
        # clear_price: str | float = entry['price']
        clear_price: str | float = t.price
        is_clear: bool = not isinstance(clear_price, str)
@ -152,7 +159,6 @@ def ppu(
                clear_price * abs(clear_size)
                +
                # transaction cost
                # accum_sign * cost_scalar * entry['cost']
                accum_sign * cost_scalar * t.cost
            )
@ -187,13 +193,13 @@ def ppu(
        asize_h.append(accum_size)
        # ledger[t.tid] = {
-            # 'tx': t,
+            # 'txn': t,
        ledger[t.tid] = t.to_dict() | {
            'ppu': ppu,
            'cumsize': accum_size,
            'sign_change': sign_change,
-            # TODO: cumpnl, bep
+            # TODO: cum_pnl, bep
        }
    final_ppu = ppu_h[-1] if ppu_h else 0
@ -212,6 +218,7 @@ def ppu(
 def iter_by_dt(
    records: (
        dict[str, dict[str, Any]]
        | ValuesView[dict]  # eg. `Position._events.values()`
        | list[dict]
        | list[Transaction]  # XXX preferred!
    ),
@ -220,7 +227,7 @@ def iter_by_dt(
    # so if you know that the record stats show some field
    # is more common then others, stick it at the top B)
    parsers: dict[str, Callable | None] = {
-        'dt': None,  # parity case
+        'dt': parse,  # parity case
        'datetime': parse,  # datetime-str
        'time': from_timestamp,  # float epoch
    },
@ -259,8 +266,13 @@ def iter_by_dt(
                # the `parsers` table above (when NOT using
                # `.get()`), otherwise pass through the value and
                # sort on it directly
-                parser: Callable | None = parsers[k]
+                if (
-                return parser(v) if (parser is not None) else v
+                    not isinstance(v, DateTime)
                    and (parser := parsers.get(k))
                ):
                    return parser(v)
                else:
                    return v
        else:
            # XXX: should never get here..
@ -271,6 +283,7 @@ def iter_by_dt(
        records,
        key=key or dyn_parse_to_dt,
    ):
        # NOTE the type sig above; either pairs or txns B)
        yield entry
@ -331,7 +344,14 @@ def open_ledger_dfs(
    brokername: str,
    acctname: str,
-) -> dict[str, pl.DataFrame]:
+    ledger: TransactionLedger | None = None,
    **kwargs,
 ) -> tuple[
    dict[str, pl.DataFrame],
    TransactionLedger,
 ]:
    '''
    Open a ledger of trade records (presumably from some broker
    backend), normalize the records into `Transactions` via the
@ -341,25 +361,32 @@ def open_ledger_dfs(
    '''
    from ._ledger import (
        open_trade_ledger,
        # Transaction,
        TransactionLedger,
    )
-    ledger: TransactionLedger
+    if not ledger:
        import time
        from tractor._debug import open_crash_handler
        now = time.time()
        with (
            open_crash_handler(),
            open_trade_ledger(
                brokername,
                acctname,
                rewrite=True,
                allow_from_sync_code=True,
                # proxied through from caller
                **kwargs,
            ) as ledger,
        ):
            if not ledger:
                raise ValueError(f'No ledger for {acctname}@{brokername} exists?')
            print(f'LEDGER LOAD TIME: {time.time() - now}')
            # process raw TOML ledger into txns using the
            # appropriate backend normalizer.
            # cache: AssetsInfo = get_symcache(
@ -367,12 +394,7 @@ def open_ledger_dfs(
            #     allow_reload=True,
            # )
-        txns: dict[str, Transaction]
+    txns: dict[str, Transaction] = ledger.to_txns()
        if acctname != 'paper':
            txns = ledger.mod.norm_trade_records(ledger)
        else:
            txns = ledger.to_txns()
    ldf = pl.DataFrame(
        list(txn.to_dict() for txn in txns.values()),
        # schema=[
@ -385,13 +407,14 @@ def open_ledger_dfs(
        #     ('expiry', str),
        #     ('bs_mktid', str),
        # ],
-        ).sort('dt').select([
+    # ).sort('dt').select([
-            pl.col('fqme'),
+    ).sort('dt').with_columns([
        # pl.col('fqme'),
        pl.col('dt').str.to_datetime(),
        # pl.col('expiry').dt.datetime(),
-            pl.col('bs_mktid'),
+        # pl.col('bs_mktid'),
-            pl.col('size'),
+        # pl.col('size'),
-            pl.col('price'),
+        # pl.col('price'),
    ])
    # filter out to the columns matching values filter passed
@ -408,19 +431,19 @@ def open_ledger_dfs(
        # bs_mktid: str = fdf[0]['bs_mktid']
        # pos: Position = acnt.pps[bs_mktid]
    # TODO: not sure if this is even possible but..
    # ppt = df.groupby('fqme').agg([
    #     # TODO: ppu and bep !!
    #     pl.cumsum('size').alias('cumsum'),
    # ])
    dfs: dict[str, pl.DataFrame] = ldf.partition_by(
        'fqme',
        as_dict=True,
    )
    for key in dfs:
        df = dfs[key]
        dfs[key] = df.with_columns([
            pl.cumsum('size').alias('cumsize'),
        ])
-        # for fqme, ppt in act.items():
+    yield dfs, ledger
        #     ppt.with_columns
        #     # TODO: ppu and bep !!
        #     pl.cumsum('size').alias('cumsum'),
        # ])
        yield dfs
--- a/piker/accounting/cli.py
+++ b/piker/accounting/cli.py
@ -37,8 +37,8 @@ from ..calc import humanize
 from ..brokers._daemon import broker_init
 from ._ledger import (
    load_ledger,
    TransactionLedger,
    # open_trade_ledger,
    # TransactionLedger,
 )
 from .calc import (
    open_ledger_dfs,
@ -263,20 +263,17 @@ def disect(
    # ledger dfs groupby-partitioned by fqme
    dfs: dict[str, pl.DataFrame]
    # actual ledger ref filled in with all txns
    ldgr: TransactionLedger
    with open_ledger_dfs(
        brokername,
        account,
-    ) as dfs:
+    ) as (dfs, ldgr):
-        for key in dfs:
+        # look up specific frame for fqme-selected asset
-            df = dfs[key]
+        df = dfs[fqme]
            dfs[key] = df.with_columns([
                pl.cumsum('size').alias('cumsum'),
            ])
        ppt = dfs[fqme]
        assert not df.is_empty()
        assert not ppt.is_empty()
        # TODO: we REALLY need a better console REPL for this
        # kinda thing..