Return input y-range min/max values from m4

mxmn_from_m4
Tyler Goodlet 2022-06-25 18:44:24 -04:00
parent 3977f1cc7e
commit d92ff9c7a0
1 changed files with 40 additions and 16 deletions

View File

@ -223,14 +223,20 @@ def ds_m4(
assert frames >= (xrange / uppx) assert frames >= (xrange / uppx)
# call into ``numba`` # call into ``numba``
nb, i_win, y_out = _m4( (
nb,
x_out,
y_out,
ymn,
ymx,
) = _m4(
x, x,
y, y,
frames, frames,
# TODO: see func below.. # TODO: see func below..
# i_win, # x_out,
# y_out, # y_out,
# first index in x data to start at # first index in x data to start at
@ -243,10 +249,11 @@ def ds_m4(
# filter out any overshoot in the input allocation arrays by # filter out any overshoot in the input allocation arrays by
# removing zero-ed tail entries which should start at a certain # removing zero-ed tail entries which should start at a certain
# index. # index.
i_win = i_win[i_win != 0] x_out = x_out[x_out != 0]
y_out = y_out[:i_win.size] y_out = y_out[:x_out.size]
return nb, i_win, y_out # print(f'M4 output ymn, ymx: {ymn},{ymx}')
return nb, x_out, y_out, ymn, ymx
@jit( @jit(
@ -260,8 +267,8 @@ def _m4(
frames: int, frames: int,
# TODO: using this approach by having the ``.zeros()`` alloc lines # TODO: using this approach, having the ``.zeros()`` alloc lines
# below, in put python was causing segs faults and alloc crashes.. # below in pure python, there were segs faults and alloc crashes..
# we might need to see how it behaves with shm arrays and consider # we might need to see how it behaves with shm arrays and consider
# allocating them once at startup? # allocating them once at startup?
@ -274,14 +281,22 @@ def _m4(
x_start: int, x_start: int,
step: float, step: float,
) -> int: ) -> tuple[
# nbins = len(i_win) int,
# count = len(xs) np.ndarray,
np.ndarray,
float,
float,
]:
'''
Implementation of the m4 algorithm in ``numba``:
http://www.vldb.org/pvldb/vol7/p797-jugel.pdf
'''
# these are pre-allocated and mutated by ``numba`` # these are pre-allocated and mutated by ``numba``
# code in-place. # code in-place.
y_out = np.zeros((frames, 4), ys.dtype) y_out = np.zeros((frames, 4), ys.dtype)
i_win = np.zeros(frames, xs.dtype) x_out = np.zeros(frames, xs.dtype)
bincount = 0 bincount = 0
x_left = x_start x_left = x_start
@ -295,24 +310,33 @@ def _m4(
# set all bins in the left-most entry to the starting left-most x value # set all bins in the left-most entry to the starting left-most x value
# (aka a row broadcast). # (aka a row broadcast).
i_win[bincount] = x_left x_out[bincount] = x_left
# set all y-values to the first value passed in. # set all y-values to the first value passed in.
y_out[bincount] = ys[0] y_out[bincount] = ys[0]
mx: float = 0
mn: float = np.inf
# compute OHLC style max / min values per window sized x-frame.
for i in range(len(xs)): for i in range(len(xs)):
x = xs[i] x = xs[i]
y = ys[i] y = ys[i]
if x < x_left + step: # the current window "step" is [bin, bin+1) if x < x_left + step: # the current window "step" is [bin, bin+1)
y_out[bincount, 1] = min(y, y_out[bincount, 1]) ymn = y_out[bincount, 1] = min(y, y_out[bincount, 1])
y_out[bincount, 2] = max(y, y_out[bincount, 2]) ymx = y_out[bincount, 2] = max(y, y_out[bincount, 2])
y_out[bincount, 3] = y y_out[bincount, 3] = y
mx = max(mx, ymx)
mn = min(mn, ymn)
else: else:
# Find the next bin # Find the next bin
while x >= x_left + step: while x >= x_left + step:
x_left += step x_left += step
bincount += 1 bincount += 1
i_win[bincount] = x_left x_out[bincount] = x_left
y_out[bincount] = y y_out[bincount] = y
return bincount, i_win, y_out return bincount, x_out, y_out, mn, mx