Add (ostensibly) working first attempt at M4 algo
All the refs are in the comments and original sample code from infinite has been reworked to expect the input x/y arrays to already be sliced (though we can later support passing in the start-end indexes if desired). The new routines are `ds_m4()` the python top level API and `_m4()` the fast `numba` implementation.big_data_lines
parent
e7481b1634
commit
39b7c9340d
|
@ -19,13 +19,13 @@ Graphics related downsampling routines for compressing to pixel
|
|||
limits on the display device.
|
||||
|
||||
'''
|
||||
# from typing import Optional
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
# from numpy.lib.recfunctions import structured_to_unstructured
|
||||
from numba import (
|
||||
jit,
|
||||
float64, optional, int64,
|
||||
# float64, optional, int64,
|
||||
)
|
||||
|
||||
from ..log import get_logger
|
||||
|
@ -36,7 +36,7 @@ log = get_logger(__name__)
|
|||
|
||||
def hl2mxmn(
|
||||
ohlc: np.ndarray,
|
||||
downsample_by: int = 0,
|
||||
# downsample_by: int = 0,
|
||||
|
||||
) -> np.ndarray:
|
||||
'''
|
||||
|
@ -61,17 +61,19 @@ def hl2mxmn(
|
|||
trace_hl(hls, mxmn, x, index[0])
|
||||
x = x + index[0]
|
||||
|
||||
if downsample_by < 2:
|
||||
return mxmn, x
|
||||
return mxmn, x
|
||||
|
||||
dsx, dsy = downsample(
|
||||
y=mxmn,
|
||||
x=x,
|
||||
bins=downsample_by,
|
||||
)
|
||||
log.info(f'downsampling by {downsample_by}')
|
||||
print(f'downsampling by {downsample_by}')
|
||||
return dsy, dsx
|
||||
# if downsample_by < 2:
|
||||
# return mxmn, x
|
||||
|
||||
# dsx, dsy = downsample(
|
||||
# y=mxmn,
|
||||
# x=x,
|
||||
# bins=downsample_by,
|
||||
# )
|
||||
# log.info(f'downsampling by {downsample_by}')
|
||||
# print(f'downsampling by {downsample_by}')
|
||||
# return dsy, dsx
|
||||
|
||||
|
||||
@jit(
|
||||
|
@ -129,8 +131,11 @@ def downsample(
|
|||
x: np.ndarray,
|
||||
y: np.ndarray,
|
||||
bins: int = 2,
|
||||
|
||||
method: str = 'peak',
|
||||
|
||||
**kwargs,
|
||||
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
'''
|
||||
Downsample x/y data for lesser curve graphics gen.
|
||||
|
@ -142,7 +147,6 @@ def downsample(
|
|||
# py3.10 syntax
|
||||
match method:
|
||||
case 'peak':
|
||||
# breakpoint()
|
||||
if bins < 2:
|
||||
log.warning('No downsampling taking place?')
|
||||
|
||||
|
@ -164,87 +168,142 @@ def downsample(
|
|||
|
||||
return x, y
|
||||
|
||||
# TODO: this algo from infinite, see
|
||||
# https://github.com/pikers/piker/issues/109
|
||||
case 'infinite_4px':
|
||||
|
||||
# Ex. from infinite on downsampling viewable graphics.
|
||||
# "one thing i remembered about the binning - if you are
|
||||
# picking a range within your timeseries the start and end bin
|
||||
# should be one more bin size outside the visual range, then
|
||||
# you get better visual fidelity at the edges of the graph"
|
||||
# "i didn't show it in the sample code, but it's accounted for
|
||||
# in the start and end indices and number of bins"
|
||||
|
||||
def build_subchart(
|
||||
self,
|
||||
subchart,
|
||||
width, # width of screen in pxs?
|
||||
chart_type,
|
||||
lower, # x start?
|
||||
upper, # x end?
|
||||
xvals,
|
||||
yvals
|
||||
):
|
||||
pts_per_pixel = len(xvals) / width
|
||||
if pts_per_pixel > 1:
|
||||
|
||||
# this is mutated in-place
|
||||
data = np.zeros((width, 4), yvals.dtype)
|
||||
bins = np.zeros(width, xvals.dtype)
|
||||
|
||||
nb = subset_by_x(
|
||||
xvals,
|
||||
yvals,
|
||||
bins,
|
||||
data,
|
||||
lower,
|
||||
# this is scaling the x-range by
|
||||
# the width of the screen?
|
||||
(upper-lower)/float(width),
|
||||
)
|
||||
print(f'downsampled to {nb} bins')
|
||||
|
||||
return x, y
|
||||
case 'm4':
|
||||
return ds_m4(x, y, kwargs['px_width'])
|
||||
|
||||
|
||||
@jit(nopython=True)
|
||||
def subset_by_x(
|
||||
def ds_m4(
|
||||
x: np.ndarray,
|
||||
y: np.ndarray,
|
||||
|
||||
# this is the width of the data in view
|
||||
# in display-device-local pixel units.
|
||||
px_width: int,
|
||||
|
||||
factor: Optional[int] = None,
|
||||
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
'''
|
||||
Downsample using the M4 algorithm.
|
||||
|
||||
'''
|
||||
|
||||
# NOTE: this method is a so called "visualization driven data
|
||||
# aggregation" approach. It gives error-free line chart
|
||||
# downsampling, see
|
||||
# further scientific paper resources:
|
||||
# - http://www.vldb.org/pvldb/vol7/p797-jugel.pdf
|
||||
# - http://www.vldb.org/2014/program/papers/demo/p997-jugel.pdf
|
||||
|
||||
# Details on implementation of this algo are based in,
|
||||
# https://github.com/pikers/piker/issues/109
|
||||
|
||||
# XXX: from infinite on downsampling viewable graphics:
|
||||
# "one thing i remembered about the binning - if you are
|
||||
# picking a range within your timeseries the start and end bin
|
||||
# should be one more bin size outside the visual range, then
|
||||
# you get better visual fidelity at the edges of the graph"
|
||||
# "i didn't show it in the sample code, but it's accounted for
|
||||
# in the start and end indices and number of bins"
|
||||
|
||||
assert px_width > 1 # width of screen in pxs?
|
||||
|
||||
# NOTE: if we didn't pre-slice the data to downsample
|
||||
# you could in theory pass these as the slicing params,
|
||||
# do we care though since we can always just pre-slice the
|
||||
# input?
|
||||
x_start = 0 # x index start
|
||||
x_end = len(x) # x index end
|
||||
|
||||
# uppx: units-per-pixel
|
||||
pts_per_pixel = len(x) / px_width
|
||||
print(f'UPPX: {pts_per_pixel}')
|
||||
|
||||
# ratio of indexed x-value to width of raster in pixels.
|
||||
if factor is None:
|
||||
w = (x_end-x_start) / float(px_width)
|
||||
print(f' pts/pxs = {w}')
|
||||
else:
|
||||
w = factor
|
||||
|
||||
# these are pre-allocated and mutated by ``numba``
|
||||
# code in-place.
|
||||
ds = np.zeros((px_width, 4), y.dtype)
|
||||
i_win = np.zeros(px_width, x.dtype)
|
||||
|
||||
# call into ``numba``
|
||||
nb = _m4(
|
||||
x,
|
||||
y,
|
||||
|
||||
i_win,
|
||||
ds,
|
||||
|
||||
# first index in x data to start at
|
||||
x_start,
|
||||
# window size for each "frame" of data to downsample (normally
|
||||
# scaled by the ratio of pixels on screen to data in x-range).
|
||||
w,
|
||||
)
|
||||
print(f'downsampled to {nb} bins')
|
||||
|
||||
return i_win, ds.flatten()
|
||||
|
||||
|
||||
@jit(
|
||||
nopython=True,
|
||||
)
|
||||
def _m4(
|
||||
|
||||
xs: np.ndarray,
|
||||
ys: np.ndarray,
|
||||
bins: np.ndarray,
|
||||
data: np.ndarray,
|
||||
|
||||
# pre-alloc array of x indices mapping to the start
|
||||
# of each window used for downsampling in y.
|
||||
i_win: np.ndarray,
|
||||
|
||||
# pre-alloc array of output downsampled y values
|
||||
ds: np.ndarray,
|
||||
|
||||
x_start: int,
|
||||
step: float,
|
||||
|
||||
) -> int:
|
||||
# nbins = len(bins)
|
||||
count = len(xs)
|
||||
# nbins = len(i_win)
|
||||
# count = len(xs)
|
||||
|
||||
bincount = 0
|
||||
x_left = x_start
|
||||
|
||||
# Find the first bin
|
||||
# Find the first window's starting index which *includes* the
|
||||
# first value in the x-domain array.
|
||||
# (this allows passing in an array which is indexed (and thus smaller then)
|
||||
# the ``x_start`` value normally passed in - say if you normally
|
||||
# want to start 0-indexed.
|
||||
first = xs[0]
|
||||
while first >= x_left + step:
|
||||
x_left += step
|
||||
|
||||
bins[bincount] = x_left
|
||||
data[bincount] = ys[0]
|
||||
# set all bins in the left-most entry to the starting left-most x value
|
||||
# (aka a row broadcast).
|
||||
i_win[bincount] = x_left
|
||||
# set all y-values to the first value passed in.
|
||||
ds[bincount] = ys[0]
|
||||
|
||||
for i in range(count):
|
||||
for i in range(len(xs)):
|
||||
x = xs[i]
|
||||
y = ys[i]
|
||||
if x < x_left + step: # Interval is [bin, bin+1)
|
||||
data[bincount, 1] = min(y, data[bincount, 1])
|
||||
data[bincount, 2] = max(y, data[bincount, 2])
|
||||
data[bincount, 3] = y
|
||||
if x < x_left + step: # the current window "step" is [bin, bin+1)
|
||||
ds[bincount, 1] = min(y, ds[bincount, 1])
|
||||
ds[bincount, 2] = max(y, ds[bincount, 2])
|
||||
ds[bincount, 3] = y
|
||||
else:
|
||||
# Find the next bin
|
||||
while x >= x_left + step:
|
||||
x_left += step
|
||||
|
||||
bincount += 1
|
||||
bins[bincount] = x_left
|
||||
data[bincount] = y
|
||||
i_win[bincount] = x_left
|
||||
ds[bincount] = y
|
||||
|
||||
return bincount
|
||||
|
|
Loading…
Reference in New Issue