Move cum-calcs to `open_ledger_dfs()`, always parse `str`->`Datetime`

Previously the cum-size calc(s) was in the `disect` CLI but it's better stuffed into the backing df converter. Also, ensure that whenever a `dt` field is type-detected as a `str` we parse it to `DateTime`.
2023-07-15 15:43:09 -04:00 · 2023-07-15 15:43:09 -04:00 · 3d20490ee5
parent 69314e9fca
commit 3d20490ee5
2 changed files with 117 additions and 97 deletions
--- a/piker/accounting/calc.py
+++ b/piker/accounting/calc.py
@ -20,6 +20,7 @@ you know when you're losing money (if possible) XD
 '''
 from __future__ import annotations
 from collections.abc import ValuesView
 from contextlib import contextmanager as cm
 from math import copysign
 from typing import (
@ -39,6 +40,7 @@ from pendulum import (
 if TYPE_CHECKING:
    from ._ledger import (
        Transaction,
        TransactionLedger,
    )
 def ppu(
@ -72,18 +74,23 @@ def ppu(
    where `cost_basis` for the current step is simply the price
    * size of the most recent clearing transaction.
    -----
    TODO: get the BEP computed and working similarly!
    -----
    the equivalent "break even price" or bep at each new clear
    event step conversely only changes when an "position exiting
    clear" which **decreases** the cumulative dst asset size:
    bep[-1] = ppu[-1] - (cum_pnl[-1] / cumsize[-1])
    '''
    asize_h: list[float] = []  # historical accumulative size
    ppu_h: list[float] = []  # historical price-per-unit
    ledger: dict[str, dict] = {}
    # entry: dict[str, Any] | Transaction
    t: Transaction
    for t in clears:
        # tid: str = entry['tid']
        # clear_size = entry['size']
        clear_size: float = t.size
        # clear_price: str | float = entry['price']
        clear_price: str | float = t.price
        is_clear: bool = not isinstance(clear_price, str)
@ -152,7 +159,6 @@ def ppu(
                clear_price * abs(clear_size)
                +
                # transaction cost
                # accum_sign * cost_scalar * entry['cost']
                accum_sign * cost_scalar * t.cost
            )
@ -187,13 +193,13 @@ def ppu(
        asize_h.append(accum_size)
        # ledger[t.tid] = {
-            # 'tx': t,
+            # 'txn': t,
        ledger[t.tid] = t.to_dict() | {
            'ppu': ppu,
            'cumsize': accum_size,
            'sign_change': sign_change,
-            # TODO: cumpnl, bep
+            # TODO: cum_pnl, bep
        }
    final_ppu = ppu_h[-1] if ppu_h else 0
@ -212,6 +218,7 @@ def ppu(
 def iter_by_dt(
    records: (
        dict[str, dict[str, Any]]
        | ValuesView[dict]  # eg. `Position._events.values()`
        | list[dict]
        | list[Transaction]  # XXX preferred!
    ),
@ -220,7 +227,7 @@ def iter_by_dt(
    # so if you know that the record stats show some field
    # is more common then others, stick it at the top B)
    parsers: dict[str, Callable | None] = {
-        'dt': None,  # parity case
+        'dt': parse,  # parity case
        'datetime': parse,  # datetime-str
        'time': from_timestamp,  # float epoch
    },
@ -259,8 +266,13 @@ def iter_by_dt(
                # the `parsers` table above (when NOT using
                # `.get()`), otherwise pass through the value and
                # sort on it directly
-                parser: Callable | None = parsers[k]
+                if (
-                return parser(v) if (parser is not None) else v
+                    not isinstance(v, DateTime)
                    and (parser := parsers.get(k))
                ):
                    return parser(v)
                else:
                    return v
        else:
            # XXX: should never get here..
@ -271,6 +283,7 @@ def iter_by_dt(
        records,
        key=key or dyn_parse_to_dt,
    ):
        # NOTE the type sig above; either pairs or txns B)
        yield entry
@ -331,7 +344,14 @@ def open_ledger_dfs(
    brokername: str,
    acctname: str,
-) -> dict[str, pl.DataFrame]:
+    ledger: TransactionLedger | None = None,
    **kwargs,
 ) -> tuple[
    dict[str, pl.DataFrame],
    TransactionLedger,
 ]:
    '''
    Open a ledger of trade records (presumably from some broker
    backend), normalize the records into `Transactions` via the
@ -341,86 +361,89 @@ def open_ledger_dfs(
    '''
    from ._ledger import (
        open_trade_ledger,
        # Transaction,
        TransactionLedger,
    )
-    ledger: TransactionLedger
+    if not ledger:
-    import time
+        import time
-    now = time.time()
+        from tractor._debug import open_crash_handler
-    with (
+        now = time.time()
        open_trade_ledger(
            brokername,
            acctname,
            rewrite=True,
            allow_from_sync_code=True,
        ) as ledger,
    ):
        if not ledger:
            raise ValueError(f'No ledger for {acctname}@{brokername} exists?')
-        print(f'LEDGER LOAD TIME: {time.time() - now}')
+        with (
-        # process raw TOML ledger into txns using the
+            open_crash_handler(),
        # appropriate backend normalizer.
        # cache: AssetsInfo = get_symcache(
        #     brokername,
        #     allow_reload=True,
        # )
-        txns: dict[str, Transaction]
+            open_trade_ledger(
-        if acctname != 'paper':
+                brokername,
-            txns = ledger.mod.norm_trade_records(ledger)
+                acctname,
-        else:
+                rewrite=True,
-            txns = ledger.to_txns()
+                allow_from_sync_code=True,
-        ldf = pl.DataFrame(
+                # proxied through from caller
-            list(txn.to_dict() for txn in txns.values()),
+                **kwargs,
-            # schema=[
+
-            #     ('tid', str),
+            ) as ledger,
-            #     ('fqme', str),
+        ):
-            #     ('dt', str),
+            if not ledger:
-            #     ('size', pl.Float64),
+                raise ValueError(f'No ledger for {acctname}@{brokername} exists?')
-            #     ('price', pl.Float64),
+
-            #     ('cost', pl.Float64),
+            print(f'LEDGER LOAD TIME: {time.time() - now}')
-            #     ('expiry', str),
+
-            #     ('bs_mktid', str),
+            # process raw TOML ledger into txns using the
-            # ],
+            # appropriate backend normalizer.
-        ).sort('dt').select([
+            # cache: AssetsInfo = get_symcache(
-            pl.col('fqme'),
+            #     brokername,
-            pl.col('dt').str.to_datetime(),
+            #     allow_reload=True,
-            # pl.col('expiry').dt.datetime(),
+            # )
-            pl.col('bs_mktid'),
+
-            pl.col('size'),
+    txns: dict[str, Transaction] = ledger.to_txns()
-            pl.col('price'),
+    ldf = pl.DataFrame(
        list(txn.to_dict() for txn in txns.values()),
        # schema=[
        #     ('tid', str),
        #     ('fqme', str),
        #     ('dt', str),
        #     ('size', pl.Float64),
        #     ('price', pl.Float64),
        #     ('cost', pl.Float64),
        #     ('expiry', str),
        #     ('bs_mktid', str),
        # ],
    # ).sort('dt').select([
    ).sort('dt').with_columns([
        # pl.col('fqme'),
        pl.col('dt').str.to_datetime(),
        # pl.col('expiry').dt.datetime(),
        # pl.col('bs_mktid'),
        # pl.col('size'),
        # pl.col('price'),
    ])
    # filter out to the columns matching values filter passed
    # as input.
    # if filter_by_ids:
    #     for col, vals in filter_by_ids.items():
    #         str_vals = set(map(str, vals))
    #         pred: pl.Expr = pl.col(col).eq(str_vals.pop())
    #         for val in str_vals:
    #             pred |= pl.col(col).eq(val)
    #     fdf = df.filter(pred)
        # bs_mktid: str = fdf[0]['bs_mktid']
        # pos: Position = acnt.pps[bs_mktid]
    # TODO: not sure if this is even possible but..
    # ppt = df.groupby('fqme').agg([
    #     # TODO: ppu and bep !!
    #     pl.cumsum('size').alias('cumsum'),
    # ])
    dfs: dict[str, pl.DataFrame] = ldf.partition_by(
        'fqme',
        as_dict=True,
    )
    for key in dfs:
        df = dfs[key]
        dfs[key] = df.with_columns([
            pl.cumsum('size').alias('cumsize'),
        ])
-        # filter out to the columns matching values filter passed
+    yield dfs, ledger
        # as input.
        # if filter_by_ids:
        #     for col, vals in filter_by_ids.items():
        #         str_vals = set(map(str, vals))
        #         pred: pl.Expr = pl.col(col).eq(str_vals.pop())
        #         for val in str_vals:
        #             pred |= pl.col(col).eq(val)
        #     fdf = df.filter(pred)
            # bs_mktid: str = fdf[0]['bs_mktid']
            # pos: Position = acnt.pps[bs_mktid]
        # ppt = df.groupby('fqme').agg([
        #     # TODO: ppu and bep !!
        #     pl.cumsum('size').alias('cumsum'),
        # ])
        dfs: dict[str, pl.DataFrame] = ldf.partition_by(
            'fqme',
            as_dict=True,
        )
        # for fqme, ppt in act.items():
        #     ppt.with_columns
        #     # TODO: ppu and bep !!
        #     pl.cumsum('size').alias('cumsum'),
        # ])
        yield dfs
--- a/piker/accounting/cli.py
+++ b/piker/accounting/cli.py
@ -37,8 +37,8 @@ from ..calc import humanize
 from ..brokers._daemon import broker_init
 from ._ledger import (
    load_ledger,
    TransactionLedger,
    # open_trade_ledger,
    # TransactionLedger,
 )
 from .calc import (
    open_ledger_dfs,
@ -263,20 +263,17 @@ def disect(
    # ledger dfs groupby-partitioned by fqme
    dfs: dict[str, pl.DataFrame]
    # actual ledger ref filled in with all txns
    ldgr: TransactionLedger
    with open_ledger_dfs(
        brokername,
        account,
-    ) as dfs:
+    ) as (dfs, ldgr):
-        for key in dfs:
+        # look up specific frame for fqme-selected asset
-            df = dfs[key]
+        df = dfs[fqme]
            dfs[key] = df.with_columns([
                pl.cumsum('size').alias('cumsum'),
            ])
        ppt = dfs[fqme]
        assert not df.is_empty()
        assert not ppt.is_empty()
        # TODO: we REALLY need a better console REPL for this
        # kinda thing..