652 lines
21 KiB
Python
652 lines
21 KiB
Python
# piker: trading gear for hackers
|
|
# Copyright (C) Tyler Goodlet (in stewardship for pikers)
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
'''
|
|
Calculation routines for balance and position tracking such that
|
|
you know when you're losing money (if possible) XD
|
|
|
|
'''
|
|
from __future__ import annotations
|
|
from collections.abc import ValuesView
|
|
from contextlib import contextmanager as cm
|
|
from math import copysign
|
|
from typing import (
|
|
Any,
|
|
Callable,
|
|
Iterator,
|
|
TYPE_CHECKING,
|
|
)
|
|
|
|
import polars as pl
|
|
from pendulum import (
|
|
DateTime,
|
|
from_timestamp,
|
|
parse,
|
|
)
|
|
|
|
if TYPE_CHECKING:
|
|
from ._ledger import (
|
|
Transaction,
|
|
TransactionLedger,
|
|
)
|
|
|
|
|
|
def ppu(
|
|
clears: Iterator[Transaction],
|
|
|
|
# include transaction cost in breakeven price
|
|
# and presume the worst case of the same cost
|
|
# to exit this transaction (even though in reality
|
|
# it will be dynamic based on exit stratetgy).
|
|
cost_scalar: float = 2,
|
|
|
|
# return the ledger of clears as a (now dt sorted) dict with
|
|
# new position fields inserted alongside each entry.
|
|
as_ledger: bool = False,
|
|
|
|
) -> float | list[(str, dict)]:
|
|
'''
|
|
Compute the "price-per-unit" price for the given non-zero sized
|
|
rolling position.
|
|
|
|
The recurrence relation which computes this (exponential) mean
|
|
per new clear which **increases** the accumulative postiion size
|
|
is:
|
|
|
|
ppu[-1] = (
|
|
ppu[-2] * accum_size[-2]
|
|
+
|
|
ppu[-1] * size
|
|
) / accum_size[-1]
|
|
|
|
where `cost_basis` for the current step is simply the price
|
|
* size of the most recent clearing transaction.
|
|
|
|
-----
|
|
TODO: get the BEP computed and working similarly!
|
|
-----
|
|
the equivalent "break even price" or bep at each new clear
|
|
event step conversely only changes when an "position exiting
|
|
clear" which **decreases** the cumulative dst asset size:
|
|
|
|
bep[-1] = ppu[-1] - (cum_pnl[-1] / cumsize[-1])
|
|
|
|
'''
|
|
asize_h: list[float] = [] # historical accumulative size
|
|
ppu_h: list[float] = [] # historical price-per-unit
|
|
# ledger: dict[str, dict] = {}
|
|
ledger: list[dict] = []
|
|
|
|
t: Transaction
|
|
for t in clears:
|
|
clear_size: float = t.size
|
|
clear_price: str | float = t.price
|
|
is_clear: bool = not isinstance(clear_price, str)
|
|
|
|
last_accum_size = asize_h[-1] if asize_h else 0
|
|
accum_size: float = last_accum_size + clear_size
|
|
accum_sign = copysign(1, accum_size)
|
|
sign_change: bool = False
|
|
|
|
# on transfers we normally write some non-valid
|
|
# price since withdrawal to another account/wallet
|
|
# has nothing to do with inter-asset-market prices.
|
|
# TODO: this should be better handled via a `type: 'tx'`
|
|
# field as per existing issue surrounding all this:
|
|
# https://github.com/pikers/piker/issues/510
|
|
if isinstance(clear_price, str):
|
|
# TODO: we can't necessarily have this commit to
|
|
# the overall pos size since we also need to
|
|
# include other positions contributions to this
|
|
# balance or we might end up with a -ve balance for
|
|
# the position..
|
|
continue
|
|
|
|
# test if the pp somehow went "passed" a net zero size state
|
|
# resulting in a change of the "sign" of the size (+ve for
|
|
# long, -ve for short).
|
|
sign_change = (
|
|
copysign(1, last_accum_size) + accum_sign == 0
|
|
and last_accum_size != 0
|
|
)
|
|
|
|
# since we passed the net-zero-size state the new size
|
|
# after sum should be the remaining size the new
|
|
# "direction" (aka, long vs. short) for this clear.
|
|
if sign_change:
|
|
clear_size: float = accum_size
|
|
abs_diff: float = abs(accum_size)
|
|
asize_h.append(0)
|
|
ppu_h.append(0)
|
|
|
|
else:
|
|
# old size minus the new size gives us size diff with
|
|
# +ve -> increase in pp size
|
|
# -ve -> decrease in pp size
|
|
abs_diff = abs(accum_size) - abs(last_accum_size)
|
|
|
|
# XXX: LIFO breakeven price update. only an increaze in size
|
|
# of the position contributes the breakeven price,
|
|
# a decrease does not (i.e. the position is being made
|
|
# smaller).
|
|
# abs_clear_size = abs(clear_size)
|
|
abs_new_size: float | int = abs(accum_size)
|
|
|
|
if (
|
|
abs_diff > 0
|
|
and is_clear
|
|
):
|
|
cost_basis = (
|
|
# cost basis for this clear
|
|
clear_price * abs(clear_size)
|
|
+
|
|
# transaction cost
|
|
accum_sign * cost_scalar * t.cost
|
|
)
|
|
|
|
if asize_h:
|
|
size_last: float = abs(asize_h[-1])
|
|
cb_last: float = ppu_h[-1] * size_last
|
|
ppu: float = (cost_basis + cb_last) / abs_new_size
|
|
|
|
else:
|
|
ppu: float = cost_basis / abs_new_size
|
|
|
|
else:
|
|
# TODO: for PPU we should probably handle txs out
|
|
# (aka withdrawals) similarly by simply not having
|
|
# them contrib to the running PPU calc and only
|
|
# when the next entry clear comes in (which will
|
|
# then have a higher weighting on the PPU).
|
|
|
|
# on "exit" clears from a given direction,
|
|
# only the size changes not the price-per-unit
|
|
# need to be updated since the ppu remains constant
|
|
# and gets weighted by the new size.
|
|
ppu: float = ppu_h[-1] if ppu_h else 0 # set to previous value
|
|
|
|
# extend with new rolling metric for this step
|
|
ppu_h.append(ppu)
|
|
asize_h.append(accum_size)
|
|
|
|
# ledger[t.tid] = {
|
|
# 'txn': t,
|
|
# ledger[t.tid] = t.to_dict() | {
|
|
ledger.append((
|
|
t.tid,
|
|
t.to_dict() | {
|
|
'ppu': ppu,
|
|
'cumsize': accum_size,
|
|
'sign_change': sign_change,
|
|
|
|
# TODO: cum_pnl, bep
|
|
}
|
|
))
|
|
|
|
final_ppu = ppu_h[-1] if ppu_h else 0
|
|
# TODO: once we have etypes in all ledger entries..
|
|
# handle any split info entered (for now) manually by user
|
|
# if self.split_ratio is not None:
|
|
# final_ppu /= self.split_ratio
|
|
|
|
if as_ledger:
|
|
return ledger
|
|
|
|
else:
|
|
return final_ppu
|
|
|
|
|
|
def iter_by_dt(
|
|
records: (
|
|
dict[str, dict[str, Any]]
|
|
| ValuesView[dict] # eg. `Position._events.values()`
|
|
| list[dict]
|
|
| list[Transaction] # XXX preferred!
|
|
),
|
|
|
|
# NOTE: parsers are looked up in the insert order
|
|
# so if you know that the record stats show some field
|
|
# is more common then others, stick it at the top B)
|
|
parsers: dict[str, Callable | None] = {
|
|
'dt': parse, # parity case
|
|
'datetime': parse, # datetime-str
|
|
'time': from_timestamp, # float epoch
|
|
},
|
|
key: Callable | None = None,
|
|
|
|
) -> Iterator[tuple[str, dict]]:
|
|
'''
|
|
Iterate entries of a transaction table sorted by entry recorded
|
|
datetime presumably set at the ``'dt'`` field in each entry.
|
|
|
|
'''
|
|
if isinstance(records, dict):
|
|
records: list[tuple[str, dict]] = list(records.items())
|
|
|
|
def dyn_parse_to_dt(
|
|
tx: tuple[str, dict[str, Any]] | Transaction,
|
|
) -> DateTime:
|
|
|
|
# handle `.items()` inputs
|
|
if isinstance(tx, tuple):
|
|
tx = tx[1]
|
|
|
|
# dict or tx object?
|
|
isdict: bool = isinstance(tx, dict)
|
|
|
|
# get best parser for this record..
|
|
for k in parsers:
|
|
if (
|
|
isdict and k in tx
|
|
or getattr(tx, k, None)
|
|
):
|
|
v = tx[k] if isdict else tx.dt
|
|
assert v is not None, f'No valid value for `{k}`!?'
|
|
|
|
# only call parser on the value if not None from
|
|
# the `parsers` table above (when NOT using
|
|
# `.get()`), otherwise pass through the value and
|
|
# sort on it directly
|
|
if (
|
|
not isinstance(v, DateTime)
|
|
and (parser := parsers.get(k))
|
|
):
|
|
return parser(v)
|
|
else:
|
|
return v
|
|
|
|
else:
|
|
# XXX: should never get here..
|
|
breakpoint()
|
|
|
|
entry: tuple[str, dict] | Transaction
|
|
for entry in sorted(
|
|
records,
|
|
key=key or dyn_parse_to_dt,
|
|
):
|
|
# NOTE the type sig above; either pairs or txns B)
|
|
yield entry
|
|
|
|
|
|
# TODO: probably just move this into the test suite or
|
|
# keep it here for use from as such?
|
|
# def ensure_state(self) -> None:
|
|
# '''
|
|
# Audit either the `.cumsize` and `.ppu` local instance vars against
|
|
# the clears table calculations and return the calc-ed values if
|
|
# they differ and log warnings to console.
|
|
|
|
# '''
|
|
# # clears: list[dict] = self._clears
|
|
|
|
# # self.first_clear_dt = min(clears, key=lambda e: e['dt'])['dt']
|
|
# last_clear: dict = clears[-1]
|
|
# csize: float = self.calc_size()
|
|
# accum: float = last_clear['accum_size']
|
|
|
|
# if not self.expired():
|
|
# if (
|
|
# csize != accum
|
|
# and csize != round(accum * (self.split_ratio or 1))
|
|
# ):
|
|
# raise ValueError(f'Size mismatch: {csize}')
|
|
# else:
|
|
# assert csize == 0, 'Contract is expired but non-zero size?'
|
|
|
|
# if self.cumsize != csize:
|
|
# log.warning(
|
|
# 'Position state mismatch:\n'
|
|
# f'{self.cumsize} => {csize}'
|
|
# )
|
|
# self.cumsize = csize
|
|
|
|
# cppu: float = self.calc_ppu()
|
|
# ppu: float = last_clear['ppu']
|
|
# if (
|
|
# cppu != ppu
|
|
# and self.split_ratio is not None
|
|
|
|
# # handle any split info entered (for now) manually by user
|
|
# and cppu != (ppu / self.split_ratio)
|
|
# ):
|
|
# raise ValueError(f'PPU mismatch: {cppu}')
|
|
|
|
# if self.ppu != cppu:
|
|
# log.warning(
|
|
# 'Position state mismatch:\n'
|
|
# f'{self.ppu} => {cppu}'
|
|
# )
|
|
# self.ppu = cppu
|
|
|
|
|
|
@cm
|
|
def open_ledger_dfs(
|
|
|
|
brokername: str,
|
|
acctname: str,
|
|
|
|
ledger: TransactionLedger | None = None,
|
|
|
|
**kwargs,
|
|
|
|
) -> tuple[
|
|
dict[str, pl.DataFrame],
|
|
TransactionLedger,
|
|
]:
|
|
'''
|
|
Open a ledger of trade records (presumably from some broker
|
|
backend), normalize the records into `Transactions` via the
|
|
backend's declared endpoint, cast to a `polars.DataFrame` which
|
|
can update the ledger on exit.
|
|
|
|
'''
|
|
|
|
if not ledger:
|
|
import time
|
|
from tractor._debug import open_crash_handler
|
|
from ._ledger import open_trade_ledger
|
|
|
|
now = time.time()
|
|
with (
|
|
open_crash_handler(),
|
|
|
|
open_trade_ledger(
|
|
brokername,
|
|
acctname,
|
|
rewrite=True,
|
|
allow_from_sync_code=True,
|
|
|
|
# proxied through from caller
|
|
**kwargs,
|
|
|
|
) as ledger,
|
|
):
|
|
if not ledger:
|
|
raise ValueError(f'No ledger for {acctname}@{brokername} exists?')
|
|
|
|
print(f'LEDGER LOAD TIME: {time.time() - now}')
|
|
|
|
# process raw TOML ledger into txns using the
|
|
# appropriate backend normalizer.
|
|
# cache: AssetsInfo = get_symcache(
|
|
# brokername,
|
|
# allow_reload=True,
|
|
# )
|
|
|
|
yield ledger_to_dfs(ledger), ledger
|
|
|
|
|
|
def ledger_to_dfs(
|
|
ledger: TransactionLedger,
|
|
|
|
# include transaction cost in breakeven price
|
|
# and presume the worst case of the same cost
|
|
# to exit this transaction (even though in reality
|
|
# it will be dynamic based on exit stratetgy).
|
|
cost_scalar: float = 1,
|
|
|
|
) -> dict[str, pl.DataFrame]:
|
|
|
|
txns: dict[str, Transaction] = ledger.to_txns()
|
|
|
|
# ldf = pl.DataFrame(
|
|
# list(txn.to_dict() for txn in txns.values()),
|
|
ldf = pl.from_dicts(
|
|
list(txn.to_dict() for txn in txns.values()),
|
|
|
|
# only for ordering the cols
|
|
schema=[
|
|
('fqme', str),
|
|
('tid', str),
|
|
('bs_mktid', str),
|
|
('expiry', str),
|
|
('etype', str),
|
|
('dt', str),
|
|
('size', pl.Float64),
|
|
('price', pl.Float64),
|
|
('cost', pl.Float64),
|
|
],
|
|
).sort( # chronological order
|
|
'dt'
|
|
).with_columns([
|
|
pl.col('dt').str.to_datetime(),
|
|
# pl.col('expiry').str.to_datetime(),
|
|
# pl.col('expiry').dt.date(),
|
|
])
|
|
|
|
# filter out to the columns matching values filter passed
|
|
# as input.
|
|
# if filter_by_ids:
|
|
# for col, vals in filter_by_ids.items():
|
|
# str_vals = set(map(str, vals))
|
|
# pred: pl.Expr = pl.col(col).eq(str_vals.pop())
|
|
# for val in str_vals:
|
|
# pred |= pl.col(col).eq(val)
|
|
|
|
# fdf = df.filter(pred)
|
|
|
|
# break up into a frame per mkt / fqme
|
|
dfs: dict[str, pl.DataFrame] = ldf.partition_by(
|
|
'bs_mktid',
|
|
as_dict=True,
|
|
)
|
|
|
|
# TODO: not sure if this is even possible but..
|
|
# - it'd be more ideal to use `ppt = df.groupby('fqme').agg([`
|
|
# - ppu and bep calcs!
|
|
for key in dfs:
|
|
|
|
# covert to lazy form (since apparently we might need it
|
|
# eventually ...)
|
|
df = dfs[key]
|
|
|
|
ldf = df.lazy()
|
|
# TODO: pass back the current `Position` object loaded from
|
|
# the account as well? Would provide incentive to do all
|
|
# this ledger loading inside a new async open_account().
|
|
# bs_mktid: str = df[0]['bs_mktid']
|
|
# pos: Position = acnt.pps[bs_mktid]
|
|
|
|
df = dfs[key] = ldf.with_columns([
|
|
|
|
pl.cumsum('size').alias('cumsize'),
|
|
|
|
# amount of source asset "sent" (via buy txns in
|
|
# the market) to acquire the dst asset, PER txn.
|
|
# when this value is -ve (i.e. a sell operation) then
|
|
# the amount sent is actually "returned".
|
|
(
|
|
(pl.col('price') * pl.col('size'))
|
|
+
|
|
pl.col('cost')
|
|
).alias('dst_bot'),
|
|
|
|
]).with_columns([
|
|
|
|
# rolling balance in src asset units
|
|
(pl.col('dst_bot').cumsum() * -1).alias('src_balance'),
|
|
|
|
# "position operation type" in terms of increasing the
|
|
# amount in the dst asset (entering) or decreasing the
|
|
# amount in the dst asset (exiting).
|
|
pl.when(
|
|
pl.col('size').sign() == pl.col('cumsize').sign()
|
|
|
|
).then(
|
|
pl.lit('enter') # see above, but is just price * size per txn
|
|
|
|
).otherwise(
|
|
pl.when(pl.col('cumsize') == 0)
|
|
.then(pl.lit('exit_to_zero'))
|
|
.otherwise(pl.lit('exit'))
|
|
).alias('descr'),
|
|
|
|
(pl.col('cumsize').sign() == pl.col('size').sign())
|
|
.alias('is_enter'),
|
|
|
|
]).with_columns([
|
|
|
|
pl.lit(0, dtype=pl.Float64).alias('pos_ppu'),
|
|
pl.lit(0, dtype=pl.Float64).alias('per_exit_pnl'),
|
|
pl.lit(0, dtype=pl.Float64).alias('cum_pos_pnl'),
|
|
pl.lit(0, dtype=pl.Float64).alias('pos_bep'),
|
|
pl.lit(0, dtype=pl.Float64).alias('cum_ledger_pnl'),
|
|
pl.lit(None, dtype=pl.Float64).alias('ledger_bep'),
|
|
|
|
# TODO: instead of the iterative loop below i guess we
|
|
# could try using embedded lists to track which txns
|
|
# are part of which ppu / bep calcs? Not sure this will
|
|
# look any better nor be any more performant though xD
|
|
# pl.lit([[0]], dtype=pl.List).alias('list'),
|
|
|
|
# choose fields to emit for accounting puposes
|
|
]).select([
|
|
pl.exclude([
|
|
'tid',
|
|
# 'dt',
|
|
'expiry',
|
|
'bs_mktid',
|
|
'etype',
|
|
]),
|
|
]).collect()
|
|
|
|
# compute recurrence relations for ppu and bep
|
|
last_ppu: float = 0
|
|
last_cumsize: float = 0
|
|
last_ledger_pnl: float = 0
|
|
last_pos_pnl: float = 0
|
|
# last_is_enter: bool = False # TODO: drop right?
|
|
|
|
# imperatively compute the PPU (price per unit) and BEP
|
|
# (break even price) iteratively over the ledger, oriented
|
|
# around each position state: a state of split balances in
|
|
# > 1 asset.
|
|
for i, row in enumerate(df.iter_rows(named=True)):
|
|
|
|
cumsize: float = row['cumsize']
|
|
is_enter: bool = row['is_enter']
|
|
price: float = row['price']
|
|
size: float = row['size']
|
|
|
|
# ALWAYS reset per-position cum PnL
|
|
if last_cumsize == 0:
|
|
last_pos_pnl: float = 0
|
|
|
|
# the profit is ALWAYS decreased, aka made a "loss"
|
|
# by the constant fee charged by the txn provider!
|
|
# TODO: support exit txn virtual cost which we
|
|
# resolve on exit txns incrementally?
|
|
pnl: float = -1 * row['cost']
|
|
|
|
# a "position size INCREASING" or ENTER transaction
|
|
# which "makes larger", in src asset unit terms, the
|
|
# trade's side-size of the destination asset:
|
|
# - "buying" (more) units of the dst asset
|
|
# - "selling" (more short) units of the dst asset
|
|
if is_enter:
|
|
|
|
# a cumulative mean of the price-per-unit acquired
|
|
# in the destination asset:
|
|
# https://en.wikipedia.org/wiki/Moving_average#Cumulative_average
|
|
# You could also think of this measure more
|
|
# generally as an exponential mean with `alpha
|
|
# = 1/N` where `N` is the current number of txns
|
|
# included in the "position" defining set:
|
|
# https://en.wikipedia.org/wiki/Exponential_smoothing
|
|
ppu: float = (
|
|
(
|
|
(last_ppu * last_cumsize)
|
|
+
|
|
(price * size)
|
|
) /
|
|
cumsize
|
|
)
|
|
|
|
# a "position size DECREASING" or EXIT transaction
|
|
# which "makes smaller" the trade's side-size of the
|
|
# destination asset:
|
|
# - selling previously bought units of the dst asset
|
|
# (aka 'closing' a long position).
|
|
# - buying previously borrowed and sold (short) units
|
|
# of the dst asset (aka 'covering'/'closing' a short
|
|
# position).
|
|
else:
|
|
# only changes on position size increasing txns
|
|
ppu: float = last_ppu
|
|
|
|
# include the per-txn profit or loss given we are
|
|
# "closing" the position with this txn.
|
|
pnl += (last_ppu - price) * size
|
|
|
|
# cumulative PnLs per txn
|
|
last_ledger_pnl = (
|
|
last_ledger_pnl + pnl
|
|
)
|
|
last_pos_pnl = df[i, 'cum_pos_pnl'] = (
|
|
last_pos_pnl + pnl
|
|
)
|
|
|
|
if cumsize == 0:
|
|
last_ppu = ppu = 0
|
|
|
|
# compute the BEP: "break even price", a value that
|
|
# determines at what price the remaining cumsize can be
|
|
# liquidated such that the net-PnL on the current
|
|
# position will result in ZERO gain or loss from open
|
|
# to close including all txn costs B)
|
|
if (
|
|
abs(cumsize) > 0 # non-exit-to-zero position txn
|
|
):
|
|
ledger_bep: float = (
|
|
(
|
|
(ppu * cumsize)
|
|
-
|
|
(last_ledger_pnl * copysign(1, cumsize))
|
|
) / cumsize
|
|
)
|
|
|
|
# NOTE: when we "enter more" dst asset units (aka
|
|
# increase position state) AFTER having exited some
|
|
# units (aka decreasing the pos size some) the bep
|
|
# needs to be RECOMPUTED based on new ppu such that
|
|
# liquidation of the cumsize at the bep price
|
|
# results in a zero-pnl for the existing position
|
|
# (since the last one).
|
|
# for position lifetime BEP we never can have
|
|
# a valid value once the position is "closed"
|
|
# / full exitted Bo
|
|
pos_bep: float = (
|
|
(
|
|
(ppu * cumsize)
|
|
-
|
|
(last_pos_pnl * copysign(1, cumsize))
|
|
) / cumsize
|
|
)
|
|
|
|
# inject DF row with all values
|
|
df[i, 'pos_ppu'] = ppu
|
|
df[i, 'per_exit_pnl'] = pnl
|
|
df[i, 'cum_pos_pnl'] = last_pos_pnl
|
|
df[i, 'pos_bep'] = pos_bep
|
|
df[i, 'cum_ledger_pnl'] = last_ledger_pnl
|
|
df[i, 'ledger_bep'] = ledger_bep
|
|
|
|
# keep backrefs to suffice reccurence relation
|
|
last_ppu: float = ppu
|
|
last_cumsize: float = cumsize
|
|
|
|
return dfs
|