piker/piker/accounting/calc.py

# piker: trading gear for hackers
# Copyright (C) Tyler Goodlet (in stewardship for pikers)

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

'''
Calculation routines for balance and position tracking such that
you know when you're losing money (if possible) XD

'''
from __future__ import annotations
from collections.abc import ValuesView
from contextlib import contextmanager as cm
from math import copysign
from typing import (
    Any,
    Callable,
    Iterator,
    TYPE_CHECKING,
)

import polars as pl
from pendulum import (
    DateTime,
    from_timestamp,
    parse,
)

if TYPE_CHECKING:
    from ._ledger import (
        Transaction,
        TransactionLedger,
    )


def ppu(
    clears: Iterator[Transaction],

    # include transaction cost in breakeven price
    # and presume the worst case of the same cost
    # to exit this transaction (even though in reality
    # it will be dynamic based on exit stratetgy).
    cost_scalar: float = 2,

    # return the ledger of clears as a (now dt sorted) dict with
    # new position fields inserted alongside each entry.
    as_ledger: bool = False,

) -> float | list[(str, dict)]:
    '''
    Compute the "price-per-unit" price for the given non-zero sized
    rolling position.

    The recurrence relation which computes this (exponential) mean
    per new clear which **increases** the accumulative postiion size
    is:

    ppu[-1] = (
        ppu[-2] * accum_size[-2]
        +
        ppu[-1] * size
    ) / accum_size[-1]

    where `cost_basis` for the current step is simply the price
    * size of the most recent clearing transaction.

    -----
    TODO: get the BEP computed and working similarly!
    -----
    the equivalent "break even price" or bep at each new clear
    event step conversely only changes when an "position exiting
    clear" which **decreases** the cumulative dst asset size:

    bep[-1] = ppu[-1] - (cum_pnl[-1] / cumsize[-1])

    '''
    asize_h: list[float] = []  # historical accumulative size
    ppu_h: list[float] = []  # historical price-per-unit
    # ledger: dict[str, dict] = {}
    ledger: list[dict] = []

    t: Transaction
    for t in clears:
        clear_size: float = t.size
        clear_price: str | float = t.price
        is_clear: bool = not isinstance(clear_price, str)

        last_accum_size = asize_h[-1] if asize_h else 0
        accum_size: float = last_accum_size + clear_size
        accum_sign = copysign(1, accum_size)
        sign_change: bool = False

        # on transfers we normally write some non-valid
        # price since withdrawal to another account/wallet
        # has nothing to do with inter-asset-market prices.
        # TODO: this should be better handled via a `type: 'tx'`
        # field as per existing issue surrounding all this:
        # https://github.com/pikers/piker/issues/510
        if isinstance(clear_price, str):
            # TODO: we can't necessarily have this commit to
            # the overall pos size since we also need to
            # include other positions contributions to this
            # balance or we might end up with a -ve balance for
            # the position..
            continue

        # test if the pp somehow went "passed" a net zero size state
        # resulting in a change of the "sign" of the size (+ve for
        # long, -ve for short).
        sign_change = (
            copysign(1, last_accum_size) + accum_sign == 0
            and last_accum_size != 0
        )

        # since we passed the net-zero-size state the new size
        # after sum should be the remaining size the new
        # "direction" (aka, long vs. short) for this clear.
        if sign_change:
            clear_size: float = accum_size
            abs_diff: float = abs(accum_size)
            asize_h.append(0)
            ppu_h.append(0)

        else:
            # old size minus the new size gives us size diff with
            # +ve -> increase in pp size
            # -ve -> decrease in pp size
            abs_diff = abs(accum_size) - abs(last_accum_size)

        # XXX: LIFO breakeven price update. only an increaze in size
        # of the position contributes the breakeven price,
        # a decrease does not (i.e. the position is being made
        # smaller).
        # abs_clear_size = abs(clear_size)
        abs_new_size: float | int = abs(accum_size)

        if (
            abs_diff > 0
            and is_clear
        ):
            cost_basis = (
                # cost basis for this clear
                clear_price * abs(clear_size)
                +
                # transaction cost
                accum_sign * cost_scalar * t.cost
            )

            if asize_h:
                size_last: float = abs(asize_h[-1])
                cb_last: float = ppu_h[-1] * size_last
                ppu: float = (cost_basis + cb_last) / abs_new_size

            else:
                ppu: float = cost_basis / abs_new_size

        else:
            # TODO: for PPU we should probably handle txs out
            # (aka withdrawals) similarly by simply not having
            # them contrib to the running PPU calc and only
            # when the next entry clear comes in (which will
            # then have a higher weighting on the PPU).

            # on "exit" clears from a given direction,
            # only the size changes not the price-per-unit
            # need to be updated since the ppu remains constant
            # and gets weighted by the new size.
            ppu: float = ppu_h[-1] if ppu_h else 0  # set to previous value

        # extend with new rolling metric for this step
        ppu_h.append(ppu)
        asize_h.append(accum_size)

        # ledger[t.tid] = {
            # 'txn': t,
        # ledger[t.tid] = t.to_dict() | {
        ledger.append((
            t.tid,
            t.to_dict() | {
                'ppu': ppu,
                'cumsize': accum_size,
                'sign_change': sign_change,

                # TODO: cum_pnl, bep
            }
        ))

    final_ppu = ppu_h[-1] if ppu_h else 0
    # TODO: once we have etypes in all ledger entries..
    # handle any split info entered (for now) manually by user
    # if self.split_ratio is not None:
    #     final_ppu /= self.split_ratio

    if as_ledger:
        return ledger

    else:
        return final_ppu


def iter_by_dt(
    records: (
        dict[str, dict[str, Any]]
        | ValuesView[dict]  # eg. `Position._events.values()`
        | list[dict]
        | list[Transaction]  # XXX preferred!
    ),

    # NOTE: parsers are looked up in the insert order
    # so if you know that the record stats show some field
    # is more common then others, stick it at the top B)
    parsers: dict[str, Callable | None] = {
        'dt': parse,  # parity case
        'datetime': parse,  # datetime-str
        'time': from_timestamp,  # float epoch
    },
    key: Callable | None = None,

) -> Iterator[tuple[str, dict]]:
    '''
    Iterate entries of a transaction table sorted by entry recorded
    datetime presumably set at the ``'dt'`` field in each entry.

    '''
    if isinstance(records, dict):
        records: list[tuple[str, dict]] = list(records.items())

    def dyn_parse_to_dt(
        tx: tuple[str, dict[str, Any]] | Transaction,
    ) -> DateTime:

        # handle `.items()` inputs
        if isinstance(tx, tuple):
            tx = tx[1]

        # dict or tx object?
        isdict: bool = isinstance(tx, dict)

        # get best parser for this record..
        for k in parsers:
            if (
                isdict and k in tx
                 or getattr(tx, k, None)
            ):
                v = tx[k] if isdict else tx.dt
                assert v is not None, f'No valid value for `{k}`!?'

                # only call parser on the value if not None from
                # the `parsers` table above (when NOT using
                # `.get()`), otherwise pass through the value and
                # sort on it directly
                if (
                    not isinstance(v, DateTime)
                    and (parser := parsers.get(k))
                ):
                    return parser(v)
                else:
                    return v

        else:
            # XXX: should never get here..
            breakpoint()

    entry: tuple[str, dict] | Transaction
    for entry in sorted(
        records,
        key=key or dyn_parse_to_dt,
    ):
        # NOTE the type sig above; either pairs or txns B)
        yield entry


# TODO: probably just move this into the test suite or
# keep it here for use from as such?
# def ensure_state(self) -> None:
#     '''
#     Audit either the `.cumsize` and `.ppu` local instance vars against
#     the clears table calculations and return the calc-ed values if
#     they differ and log warnings to console.

#     '''
#     # clears: list[dict] = self._clears

#     # self.first_clear_dt = min(clears, key=lambda e: e['dt'])['dt']
#     last_clear: dict = clears[-1]
#     csize: float = self.calc_size()
#     accum: float = last_clear['accum_size']

#     if not self.expired():
#         if (
#             csize != accum
#             and csize != round(accum * (self.split_ratio or 1))
#         ):
#             raise ValueError(f'Size mismatch: {csize}')
#     else:
#         assert csize == 0, 'Contract is expired but non-zero size?'

#     if self.cumsize != csize:
#         log.warning(
#             'Position state mismatch:\n'
#             f'{self.cumsize} => {csize}'
#         )
#         self.cumsize = csize

#     cppu: float = self.calc_ppu()
#     ppu: float = last_clear['ppu']
#     if (
#         cppu != ppu
#         and self.split_ratio is not None

#         # handle any split info entered (for now) manually by user
#         and cppu != (ppu / self.split_ratio)
#     ):
#         raise ValueError(f'PPU mismatch: {cppu}')

#     if self.ppu != cppu:
#         log.warning(
#             'Position state mismatch:\n'
#             f'{self.ppu} => {cppu}'
#         )
#         self.ppu = cppu


@cm
def open_ledger_dfs(

    brokername: str,
    acctname: str,

    ledger: TransactionLedger | None = None,

    **kwargs,

) -> tuple[
    dict[str, pl.DataFrame],
    TransactionLedger,
]:
    '''
    Open a ledger of trade records (presumably from some broker
    backend), normalize the records into `Transactions` via the
    backend's declared endpoint, cast to a `polars.DataFrame` which
    can update the ledger on exit.

    '''

    if not ledger:
        import time
        from tractor._debug import open_crash_handler
        from ._ledger import open_trade_ledger

        now = time.time()
        with (
            open_crash_handler(),

            open_trade_ledger(
                brokername,
                acctname,
                rewrite=True,
                allow_from_sync_code=True,

                # proxied through from caller
                **kwargs,

            ) as ledger,
        ):
            if not ledger:
                raise ValueError(f'No ledger for {acctname}@{brokername} exists?')

            print(f'LEDGER LOAD TIME: {time.time() - now}')

            # process raw TOML ledger into txns using the
            # appropriate backend normalizer.
            # cache: AssetsInfo = get_symcache(
            #     brokername,
            #     allow_reload=True,
            # )

        yield ledger_to_dfs(ledger), ledger


def ledger_to_dfs(
    ledger: TransactionLedger,

    # include transaction cost in breakeven price
    # and presume the worst case of the same cost
    # to exit this transaction (even though in reality
    # it will be dynamic based on exit stratetgy).
    cost_scalar: float = 1,

) -> dict[str, pl.DataFrame]:

    txns: dict[str, Transaction] = ledger.to_txns()

    # ldf = pl.DataFrame(
    #     list(txn.to_dict() for txn in txns.values()),
    ldf = pl.from_dicts(
        list(txn.to_dict() for txn in txns.values()),

        # only for ordering the cols
        schema=[
            ('fqme', str),
            ('tid', str),
            ('bs_mktid', str),
            ('expiry', str),
            ('etype', str),
            ('dt', str),
            ('size', pl.Float64),
            ('price', pl.Float64),
            ('cost', pl.Float64),
        ],
    ).sort(  # chronological order
        'dt'
    ).with_columns([
        pl.col('dt').str.to_datetime(),
        # pl.col('expiry').str.to_datetime(),
        # pl.col('expiry').dt.date(),
    ])

    # filter out to the columns matching values filter passed
    # as input.
    # if filter_by_ids:
    #     for col, vals in filter_by_ids.items():
    #         str_vals = set(map(str, vals))
    #         pred: pl.Expr = pl.col(col).eq(str_vals.pop())
    #         for val in str_vals:
    #             pred |= pl.col(col).eq(val)

    #     fdf = df.filter(pred)

    # break up into a frame per mkt / fqme
    dfs: dict[str, pl.DataFrame] = ldf.partition_by(
        'bs_mktid',
        as_dict=True,
    )

    # TODO: not sure if this is even possible but..
    # - it'd be more ideal to use `ppt = df.groupby('fqme').agg([`
    # - ppu and bep calcs!
    for key in dfs:

        # covert to lazy form (since apparently we might need it
        # eventually ...)
        df = dfs[key]

        ldf = df.lazy()
        # TODO: pass back the current `Position` object loaded from
        # the account as well? Would provide incentive to do all
        # this ledger loading inside a new async open_account().
        # bs_mktid: str = df[0]['bs_mktid']
        # pos: Position = acnt.pps[bs_mktid]

        df = dfs[key] = ldf.with_columns([

            pl.cumsum('size').alias('cumsize'),

            # amount of source asset "sent" (via buy txns in
            # the market) to acquire the dst asset, PER txn.
            # when this value is -ve (i.e. a sell operation) then
            # the amount sent is actually "returned".
            (
                (pl.col('price') * pl.col('size'))
                +
                pl.col('cost')
            ).alias('dst_bot'),

        ]).with_columns([

            # rolling balance in src asset units
            (pl.col('dst_bot').cumsum() * -1).alias('src_balance'),

            # "position operation type" in terms of increasing the
            # amount in the dst asset (entering) or decreasing the
            # amount in the dst asset (exiting).
            pl.when(
                pl.col('size').sign() == pl.col('cumsize').sign()

            ).then(
                pl.lit('enter')  # see above, but is just price * size per txn

            ).otherwise(
                pl.when(pl.col('cumsize') == 0)
                .then(pl.lit('exit_to_zero'))
                .otherwise(pl.lit('exit'))
            ).alias('descr'),

            (pl.col('cumsize').sign() == pl.col('size').sign())
            .alias('is_enter'),

        ]).with_columns([

            pl.lit(0, dtype=pl.Float64).alias('pos_ppu'),
            pl.lit(0, dtype=pl.Float64).alias('per_exit_pnl'),
            pl.lit(0, dtype=pl.Float64).alias('cum_pos_pnl'),
            pl.lit(0, dtype=pl.Float64).alias('pos_bep'),
            pl.lit(0, dtype=pl.Float64).alias('cum_ledger_pnl'),
            pl.lit(None, dtype=pl.Float64).alias('ledger_bep'),

            # TODO: instead of the iterative loop below i guess we
            # could try using embedded lists to track which txns
            # are part of which ppu / bep calcs? Not sure this will
            # look any better nor be any more performant though xD
            # pl.lit([[0]], dtype=pl.List).alias('list'),

        # choose fields to emit for accounting puposes
        ]).select([
            pl.exclude([
                'tid',
                # 'dt',
                'expiry',
                'bs_mktid',
                'etype',
            ]),
        ]).collect()

        # compute recurrence relations for ppu and bep
        last_ppu: float = 0
        last_cumsize: float = 0
        last_ledger_pnl: float = 0
        last_pos_pnl: float = 0
        # last_is_enter: bool = False  # TODO: drop right?

        # imperatively compute the PPU (price per unit) and BEP
        # (break even price) iteratively over the ledger, oriented
        # around each position state: a state of split balances in
        # > 1 asset.
        for i, row in enumerate(df.iter_rows(named=True)):

            cumsize: float = row['cumsize']
            is_enter: bool = row['is_enter']
            price: float = row['price']
            size: float = row['size']

            # ALWAYS reset per-position cum PnL
            if last_cumsize == 0:
                last_pos_pnl: float = 0

            # the profit is ALWAYS decreased, aka made a "loss"
            # by the constant fee charged by the txn provider!
            # TODO: support exit txn virtual cost which we
            # resolve on exit txns incrementally?
            pnl: float = -1 * row['cost']

            # a "position size INCREASING" or ENTER transaction
            # which "makes larger", in src asset unit terms, the
            # trade's side-size of the destination asset:
            # - "buying" (more) units of the dst asset
            # - "selling" (more short) units of the dst asset
            if is_enter:

                # a cumulative mean of the price-per-unit acquired
                # in the destination asset:
                # https://en.wikipedia.org/wiki/Moving_average#Cumulative_average
                # You could also think of this measure more
                # generally as an exponential mean with `alpha
                # = 1/N` where `N` is the current number of txns
                # included in the "position" defining set:
                # https://en.wikipedia.org/wiki/Exponential_smoothing
                ppu: float = (
                    (
                        (last_ppu * last_cumsize)
                        +
                        (price * size)
                    ) /
                    cumsize
                )

            # a "position size DECREASING" or EXIT transaction
            # which "makes smaller" the trade's side-size of the
            # destination asset:
            # - selling previously bought units of the dst asset
            #   (aka 'closing' a long position).
            # - buying previously borrowed and sold (short) units
            #   of the dst asset (aka 'covering'/'closing' a short
            #   position).
            else:
                # only changes on position size increasing txns
                ppu: float = last_ppu

                # include the per-txn profit or loss given we are
                # "closing" the position with this txn.
                pnl += (last_ppu - price) * size

            # cumulative PnLs per txn
            last_ledger_pnl = (
                last_ledger_pnl + pnl
            )
            last_pos_pnl = df[i, 'cum_pos_pnl'] = (
                last_pos_pnl + pnl
            )

            if cumsize == 0:
                last_ppu = ppu = 0

            # compute the BEP: "break even price", a value that
            # determines at what price the remaining cumsize can be
            # liquidated such that the net-PnL on the current
            # position will result in ZERO gain or loss from open
            # to close including all txn costs B)
            if (
                abs(cumsize) > 0  # non-exit-to-zero position txn
            ):
                ledger_bep: float = (
                    (
                        (ppu * cumsize)
                        -
                        (last_ledger_pnl * copysign(1, cumsize))
                    ) / cumsize
                )

                # NOTE: when we "enter more" dst asset units (aka
                # increase position state) AFTER having exited some
                # units (aka decreasing the pos size some) the bep
                # needs to be RECOMPUTED based on new ppu such that
                # liquidation of the cumsize at the bep price
                # results in a zero-pnl for the existing position
                # (since the last one).
                # for position lifetime BEP we never can have
                # a valid value once the position is "closed"
                # / full exitted Bo
                pos_bep: float = (
                    (
                        (ppu * cumsize)
                        -
                        (last_pos_pnl * copysign(1, cumsize))
                    ) / cumsize
                )

            # inject DF row with all values
            df[i, 'pos_ppu'] = ppu
            df[i, 'per_exit_pnl'] = pnl
            df[i, 'cum_pos_pnl'] = last_pos_pnl
            df[i, 'pos_bep'] = pos_bep
            df[i, 'cum_ledger_pnl'] = last_ledger_pnl
            df[i, 'ledger_bep'] = ledger_bep

            # keep backrefs to suffice reccurence relation
            last_ppu: float = ppu
            last_cumsize: float = cumsize

    return dfs