Drop `pandas` to `numpy` converter

2022-03-29 13:15:23 -04:00 · 2022-03-29 13:15:23 -04:00 · 950cb03e07
parent 907b7dd5c6
commit 950cb03e07
1 changed files with 1 additions and 57 deletions
--- a/piker/data/_source.py
+++ b/piker/data/_source.py
@ -22,8 +22,7 @@ from typing import Any
 import decimal
 import numpy as np
-import pandas as pd
+from pydantic import BaseModel
 from pydantic import BaseModel, validate_arguments
 # from numba import from_dtype
@ -254,61 +253,6 @@ class Symbol(BaseModel):
        return keys
 def from_df(
    df: pd.DataFrame,
    source=None,
    default_tf=None
 ) -> np.recarray:
    """Convert OHLC formatted ``pandas.DataFrame`` to ``numpy.recarray``.
    """
    df.reset_index(inplace=True)
    # hackery to convert field names
    date = 'Date'
    if 'date' in df.columns:
        date = 'date'
    # convert to POSIX time
    df[date] = [d.timestamp() for d in df[date]]
    # try to rename from some camel case
    columns = {
        'Date': 'time',
        'date': 'time',
        'Open': 'open',
        'High': 'high',
        'Low': 'low',
        'Close': 'close',
        'Volume': 'volume',
        # most feeds are providing this over sesssion anchored
        'vwap': 'bar_wap',
        # XXX: ib_insync calls this the "wap of the bar"
        # but no clue what is actually is...
        # https://github.com/pikers/piker/issues/119#issuecomment-729120988
        'average': 'bar_wap',
    }
    df = df.rename(columns=columns)
    for name in df.columns:
        # if name not in base_ohlc_dtype.names[1:]:
        if name not in base_ohlc_dtype.names:
            del df[name]
    # TODO: it turns out column access on recarrays is actually slower:
    # https://jakevdp.github.io/PythonDataScienceHandbook/02.09-structured-data-numpy.html#RecordArrays:-Structured-Arrays-with-a-Twist
    # it might make sense to make these structured arrays?
    array = df.to_records(index=False)
    _nan_to_closest_num(array)
    return array
 def _nan_to_closest_num(array: np.ndarray):
    """Return interpolated values instead of NaN.