Drop `pandas` to `numpy` converter
parent
7df795435e
commit
1837e467be
|
@ -22,8 +22,7 @@ from typing import Any
|
||||||
import decimal
|
import decimal
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
from pydantic import BaseModel
|
||||||
from pydantic import BaseModel, validate_arguments
|
|
||||||
# from numba import from_dtype
|
# from numba import from_dtype
|
||||||
|
|
||||||
|
|
||||||
|
@ -254,61 +253,6 @@ class Symbol(BaseModel):
|
||||||
return keys
|
return keys
|
||||||
|
|
||||||
|
|
||||||
def from_df(
|
|
||||||
|
|
||||||
df: pd.DataFrame,
|
|
||||||
source=None,
|
|
||||||
default_tf=None
|
|
||||||
|
|
||||||
) -> np.recarray:
|
|
||||||
"""Convert OHLC formatted ``pandas.DataFrame`` to ``numpy.recarray``.
|
|
||||||
|
|
||||||
"""
|
|
||||||
df.reset_index(inplace=True)
|
|
||||||
|
|
||||||
# hackery to convert field names
|
|
||||||
date = 'Date'
|
|
||||||
if 'date' in df.columns:
|
|
||||||
date = 'date'
|
|
||||||
|
|
||||||
# convert to POSIX time
|
|
||||||
df[date] = [d.timestamp() for d in df[date]]
|
|
||||||
|
|
||||||
# try to rename from some camel case
|
|
||||||
columns = {
|
|
||||||
'Date': 'time',
|
|
||||||
'date': 'time',
|
|
||||||
'Open': 'open',
|
|
||||||
'High': 'high',
|
|
||||||
'Low': 'low',
|
|
||||||
'Close': 'close',
|
|
||||||
'Volume': 'volume',
|
|
||||||
|
|
||||||
# most feeds are providing this over sesssion anchored
|
|
||||||
'vwap': 'bar_wap',
|
|
||||||
|
|
||||||
# XXX: ib_insync calls this the "wap of the bar"
|
|
||||||
# but no clue what is actually is...
|
|
||||||
# https://github.com/pikers/piker/issues/119#issuecomment-729120988
|
|
||||||
'average': 'bar_wap',
|
|
||||||
}
|
|
||||||
|
|
||||||
df = df.rename(columns=columns)
|
|
||||||
|
|
||||||
for name in df.columns:
|
|
||||||
# if name not in base_ohlc_dtype.names[1:]:
|
|
||||||
if name not in base_ohlc_dtype.names:
|
|
||||||
del df[name]
|
|
||||||
|
|
||||||
# TODO: it turns out column access on recarrays is actually slower:
|
|
||||||
# https://jakevdp.github.io/PythonDataScienceHandbook/02.09-structured-data-numpy.html#RecordArrays:-Structured-Arrays-with-a-Twist
|
|
||||||
# it might make sense to make these structured arrays?
|
|
||||||
array = df.to_records(index=False)
|
|
||||||
_nan_to_closest_num(array)
|
|
||||||
|
|
||||||
return array
|
|
||||||
|
|
||||||
|
|
||||||
def _nan_to_closest_num(array: np.ndarray):
|
def _nan_to_closest_num(array: np.ndarray):
|
||||||
"""Return interpolated values instead of NaN.
|
"""Return interpolated values instead of NaN.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue