Drop `pandas` to `numpy` converter
							parent
							
								
									72de184c08
								
							
						
					
					
						commit
						6bb1f06813
					
				| 
						 | 
					@ -22,8 +22,7 @@ from typing import Any
 | 
				
			||||||
import decimal
 | 
					import decimal
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import numpy as np
 | 
					import numpy as np
 | 
				
			||||||
import pandas as pd
 | 
					from pydantic import BaseModel
 | 
				
			||||||
from pydantic import BaseModel, validate_arguments
 | 
					 | 
				
			||||||
# from numba import from_dtype
 | 
					# from numba import from_dtype
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -237,61 +236,6 @@ class Symbol(BaseModel):
 | 
				
			||||||
        return keys
 | 
					        return keys
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def from_df(
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    df: pd.DataFrame,
 | 
					 | 
				
			||||||
    source=None,
 | 
					 | 
				
			||||||
    default_tf=None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
) -> np.recarray:
 | 
					 | 
				
			||||||
    """Convert OHLC formatted ``pandas.DataFrame`` to ``numpy.recarray``.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    df.reset_index(inplace=True)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # hackery to convert field names
 | 
					 | 
				
			||||||
    date = 'Date'
 | 
					 | 
				
			||||||
    if 'date' in df.columns:
 | 
					 | 
				
			||||||
        date = 'date'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # convert to POSIX time
 | 
					 | 
				
			||||||
    df[date] = [d.timestamp() for d in df[date]]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # try to rename from some camel case
 | 
					 | 
				
			||||||
    columns = {
 | 
					 | 
				
			||||||
        'Date': 'time',
 | 
					 | 
				
			||||||
        'date': 'time',
 | 
					 | 
				
			||||||
        'Open': 'open',
 | 
					 | 
				
			||||||
        'High': 'high',
 | 
					 | 
				
			||||||
        'Low': 'low',
 | 
					 | 
				
			||||||
        'Close': 'close',
 | 
					 | 
				
			||||||
        'Volume': 'volume',
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # most feeds are providing this over sesssion anchored
 | 
					 | 
				
			||||||
        'vwap': 'bar_wap',
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # XXX: ib_insync calls this the "wap of the bar"
 | 
					 | 
				
			||||||
        # but no clue what is actually is...
 | 
					 | 
				
			||||||
        # https://github.com/pikers/piker/issues/119#issuecomment-729120988
 | 
					 | 
				
			||||||
        'average': 'bar_wap',
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    df = df.rename(columns=columns)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for name in df.columns:
 | 
					 | 
				
			||||||
        # if name not in base_ohlc_dtype.names[1:]:
 | 
					 | 
				
			||||||
        if name not in base_ohlc_dtype.names:
 | 
					 | 
				
			||||||
            del df[name]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # TODO: it turns out column access on recarrays is actually slower:
 | 
					 | 
				
			||||||
    # https://jakevdp.github.io/PythonDataScienceHandbook/02.09-structured-data-numpy.html#RecordArrays:-Structured-Arrays-with-a-Twist
 | 
					 | 
				
			||||||
    # it might make sense to make these structured arrays?
 | 
					 | 
				
			||||||
    array = df.to_records(index=False)
 | 
					 | 
				
			||||||
    _nan_to_closest_num(array)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return array
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def _nan_to_closest_num(array: np.ndarray):
 | 
					def _nan_to_closest_num(array: np.ndarray):
 | 
				
			||||||
    """Return interpolated values instead of NaN.
 | 
					    """Return interpolated values instead of NaN.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue