Fix x-range -> # of frames calculation

Obviously determining the x-range from indices was wrong and was the
reason for the incorrect (downsampled) output size XD. Instead correctly
determine the x range and start value from the *values of* the input
x-array. Pretty sure this makes the implementation nearly production
ready.

Relates to #109
big_data_lines
Tyler Goodlet 2022-03-15 14:03:44 -04:00
parent 4d4f745918
commit d02b1a17ad
1 changed files with 36 additions and 28 deletions

View File

@ -19,7 +19,7 @@ Graphics related downsampling routines for compressing to pixel
limits on the display device. limits on the display device.
''' '''
from typing import Optional import math
import numpy as np import numpy as np
# from numpy.lib.recfunctions import structured_to_unstructured # from numpy.lib.recfunctions import structured_to_unstructured
@ -141,7 +141,9 @@ def downsample(
Downsample x/y data for lesser curve graphics gen. Downsample x/y data for lesser curve graphics gen.
The "peak" method is originally copied verbatim from The "peak" method is originally copied verbatim from
``pyqtgraph.PlotDataItem.getDisplayDataset()``. ``pyqtgraph.PlotDataItem.getDisplayDataset()`` which gets
all credit, though we will likely drop this in favor of the M4
algo below.
''' '''
# py3.10 syntax # py3.10 syntax
@ -180,14 +182,13 @@ def ds_m4(
# in display-device-local pixel units. # in display-device-local pixel units.
px_width: int, px_width: int,
factor: Optional[int] = None, ) -> tuple[int, np.ndarray, np.ndarray]:
) -> tuple[np.ndarray, np.ndarray]:
''' '''
Downsample using the M4 algorithm. Downsample using the M4 algorithm.
''' This is more or less an OHLC style sampling of a line-style series.
'''
# NOTE: this method is a so called "visualization driven data # NOTE: this method is a so called "visualization driven data
# aggregation" approach. It gives error-free line chart # aggregation" approach. It gives error-free line chart
# downsampling, see # downsampling, see
@ -212,24 +213,34 @@ def ds_m4(
# you could in theory pass these as the slicing params, # you could in theory pass these as the slicing params,
# do we care though since we can always just pre-slice the # do we care though since we can always just pre-slice the
# input? # input?
x_start = 0 # x index start x_start = x[0] # x value start/lowest in domain
x_end = len(x) # x index end x_end = x[-1] # x end value/highest in domain
# uppx: units-per-pixel # XXX: always round up on the input pixels
pts_per_pixel = len(x) / px_width px_width = math.ceil(px_width)
print(f'UPPX: {pts_per_pixel}')
x_range = x_end - x_start
# ratio of indexed x-value to width of raster in pixels. # ratio of indexed x-value to width of raster in pixels.
if factor is None: # this is more or less, uppx: units-per-pixel.
w = (x_end-x_start) / float(px_width) w = x_range / float(px_width)
print(f' pts/pxs = {w}')
else: # ensure we make more then enough
w = factor # frames (windows) for the output pixel
frames = px_width
# if we have more and then exact integer's
# (uniform quotient output) worth of datum-domain-points
# per windows-frame, add one more window to ensure
# we have room for all output down-samples.
pts_per_pixel, r = divmod(len(x), px_width)
if r:
frames += 1
# these are pre-allocated and mutated by ``numba`` # these are pre-allocated and mutated by ``numba``
# code in-place. # code in-place.
ds = np.zeros((px_width, 4), y.dtype) y_out = np.zeros((frames, 4), y.dtype)
i_win = np.zeros(px_width, x.dtype) i_win = np.zeros(frames, x.dtype)
# call into ``numba`` # call into ``numba``
nb = _m4( nb = _m4(
@ -237,7 +248,7 @@ def ds_m4(
y, y,
i_win, i_win,
ds, y_out,
# first index in x data to start at # first index in x data to start at
x_start, x_start,
@ -245,9 +256,8 @@ def ds_m4(
# scaled by the ratio of pixels on screen to data in x-range). # scaled by the ratio of pixels on screen to data in x-range).
w, w,
) )
print(f'downsampled to {nb} bins')
return i_win, ds.flatten() return nb, i_win, y_out
@jit( @jit(
@ -275,13 +285,11 @@ def _m4(
bincount = 0 bincount = 0
x_left = x_start x_left = x_start
# Find the first window's starting index which *includes* the # Find the first window's starting value which *includes* the
# first value in the x-domain array. # first value in the x-domain array, i.e. the first
# (this allows passing in an array which is indexed (and thus smaller then) # "left-side-of-window" **plus** the downsampling step,
# the ``x_start`` value normally passed in - say if you normally # creates a window which includes the first x **value**.
# want to start 0-indexed. while xs[0] >= x_left + step:
first = xs[0]
while first >= x_left + step:
x_left += step x_left += step
# set all bins in the left-most entry to the starting left-most x value # set all bins in the left-most entry to the starting left-most x value