# NumPy Structured Array Patterns Detailed patterns for working with NumPy structured arrays in piker's financial data processing. ## Piker's OHLCV Array Dtype ```python # typical piker array dtype dtype = [ ('index', 'i8'), # absolute sequence index ('time', 'f8'), # unix epoch timestamp ('open', 'f8'), ('high', 'f8'), ('low', 'f8'), ('close', 'f8'), ('volume', 'f8'), ] arr = np.array( [(0, 1234.0, 100, 101, 99, 100.5, 1000)], dtype=dtype, ) # field access times = arr['time'] # returns view, not copy closes = arr['close'] ``` ## Structured Array Performance Gotchas ### 1. Field access in loops is slow ```python # BAD: repeated struct field access per iteration for i, row in enumerate(arr): x = row['index'] # struct access! y = row['close'] process(x, y) # GOOD: extract fields once, iterate plain arrays indices = arr['index'] # extract once closes = arr['close'] for i in range(len(arr)): x = indices[i] # plain array indexing y = closes[i] process(x, y) ``` ### 2. Dict comprehensions with struct arrays ```python # SLOW: field access per row in Python loop time_to_row = { float(row['time']): { 'index': float(row['index']), 'close': float(row['close']), } for row in matched_rows # struct access! } # FAST: extract to plain arrays first times = matched_rows['time'].astype(float) indices = matched_rows['index'].astype(float) closes = matched_rows['close'].astype(float) time_to_row = { t: {'index': idx, 'close': cls} for t, idx, cls in zip( times, indices, closes, ) } ``` ## Vectorized Boolean Operations ### Basic Filtering ```python # single condition recent = array[array['time'] > cutoff_time] # multiple conditions with &, | filtered = array[ (array['time'] > start_time) & (array['time'] < end_time) & (array['volume'] > min_volume) ] # IMPORTANT: parentheses required around each! # (operator precedence: & binds tighter than >) ``` ### Fancy Indexing ```python # boolean mask mask = array['close'] > array['open'] # up bars up_bars = array[mask] # integer indices indices = np.array([0, 5, 10, 15]) selected = array[indices] # combine boolean + fancy indexing mask = array['volume'] > threshold high_vol_indices = np.where(mask)[0] subset = array[high_vol_indices[::2]] # every other ``` ## Common Financial Patterns ### Gap Detection ```python # assume sorted by time time_diffs = np.diff(array['time']) expected_step = 60.0 # 1-minute bars # find gaps larger than expected gap_mask = time_diffs > (expected_step * 1.5) gap_indices = np.where(gap_mask)[0] # get gap start/end times gap_starts = array['time'][gap_indices] gap_ends = array['time'][gap_indices + 1] ``` ### Rolling Window Operations ```python # simple moving average (close) window = 20 sma = np.convolve( array['close'], np.ones(window) / window, mode='valid', ) # stride tricks for efficiency from numpy.lib.stride_tricks import ( sliding_window_view, ) windows = sliding_window_view( array['close'], window, ) sma = windows.mean(axis=1) ``` ### OHLC Resampling (NumPy) ```python # resample 1m bars to 5m bars def resample_ohlc(arr, old_step, new_step): n_bars = len(arr) factor = int(new_step / old_step) # truncate to multiple of factor n_complete = (n_bars // factor) * factor arr = arr[:n_complete] # reshape into chunks reshaped = arr.reshape(-1, factor) # aggregate OHLC opens = reshaped[:, 0]['open'] highs = reshaped['high'].max(axis=1) lows = reshaped['low'].min(axis=1) closes = reshaped[:, -1]['close'] volumes = reshaped['volume'].sum(axis=1) return np.rec.fromarrays( [opens, highs, lows, closes, volumes], names=[ 'open', 'high', 'low', 'close', 'volume', ], ) ``` ## Memory Considerations ### Views vs Copies ```python # VIEW: shares memory (fast, no copy) times = array['time'] # field access subset = array[10:20] # slicing reshaped = array.reshape(-1, 2) # COPY: new memory allocation filtered = array[array['time'] > cutoff] sorted_arr = np.sort(array) casted = array.astype(np.float32) # force copy when needed explicit_copy = array.copy() ``` ### In-Place Operations ```python # modify in-place (no new allocation) array['close'] *= 1.01 # scale prices array['volume'][mask] = 0 # zero out rows # careful: compound ops may create temporaries array['close'] = array['close'] * 1.01 # temp! array['close'] *= 1.01 # true in-place ```