Add (ostensibly) working first attempt at M4 algo
All the refs are in the comments and original sample code from infinite has been reworked to expect the input x/y arrays to already be sliced (though we can later support passing in the start-end indexes if desired). The new routines are `ds_m4()` the python top level API and `_m4()` the fast `numba` implementation.mkts_backup
							parent
							
								
									803c65bc88
								
							
						
					
					
						commit
						11bda4f9b4
					
				| 
						 | 
					@ -19,13 +19,13 @@ Graphics related downsampling routines for compressing to pixel
 | 
				
			||||||
limits on the display device.
 | 
					limits on the display device.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
# from typing import Optional
 | 
					from typing import Optional
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import numpy as np
 | 
					import numpy as np
 | 
				
			||||||
# from numpy.lib.recfunctions import structured_to_unstructured
 | 
					# from numpy.lib.recfunctions import structured_to_unstructured
 | 
				
			||||||
from numba import (
 | 
					from numba import (
 | 
				
			||||||
    jit,
 | 
					    jit,
 | 
				
			||||||
    float64, optional, int64,
 | 
					    # float64, optional, int64,
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ..log import get_logger
 | 
					from ..log import get_logger
 | 
				
			||||||
| 
						 | 
					@ -36,7 +36,7 @@ log = get_logger(__name__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def hl2mxmn(
 | 
					def hl2mxmn(
 | 
				
			||||||
    ohlc: np.ndarray,
 | 
					    ohlc: np.ndarray,
 | 
				
			||||||
    downsample_by: int = 0,
 | 
					    # downsample_by: int = 0,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
) -> np.ndarray:
 | 
					) -> np.ndarray:
 | 
				
			||||||
    '''
 | 
					    '''
 | 
				
			||||||
| 
						 | 
					@ -61,17 +61,19 @@ def hl2mxmn(
 | 
				
			||||||
    trace_hl(hls, mxmn, x, index[0])
 | 
					    trace_hl(hls, mxmn, x, index[0])
 | 
				
			||||||
    x = x + index[0]
 | 
					    x = x + index[0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if downsample_by < 2:
 | 
					 | 
				
			||||||
    return mxmn, x
 | 
					    return mxmn, x
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    dsx, dsy = downsample(
 | 
					    # if downsample_by < 2:
 | 
				
			||||||
        y=mxmn,
 | 
					    #     return mxmn, x
 | 
				
			||||||
        x=x,
 | 
					
 | 
				
			||||||
        bins=downsample_by,
 | 
					    # dsx, dsy = downsample(
 | 
				
			||||||
    )
 | 
					    #     y=mxmn,
 | 
				
			||||||
    log.info(f'downsampling by {downsample_by}')
 | 
					    #     x=x,
 | 
				
			||||||
    print(f'downsampling by {downsample_by}')
 | 
					    #     bins=downsample_by,
 | 
				
			||||||
    return dsy, dsx
 | 
					    # )
 | 
				
			||||||
 | 
					    # log.info(f'downsampling by {downsample_by}')
 | 
				
			||||||
 | 
					    # print(f'downsampling by {downsample_by}')
 | 
				
			||||||
 | 
					    # return dsy, dsx
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@jit(
 | 
					@jit(
 | 
				
			||||||
| 
						 | 
					@ -129,8 +131,11 @@ def downsample(
 | 
				
			||||||
    x: np.ndarray,
 | 
					    x: np.ndarray,
 | 
				
			||||||
    y: np.ndarray,
 | 
					    y: np.ndarray,
 | 
				
			||||||
    bins: int = 2,
 | 
					    bins: int = 2,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    method: str = 'peak',
 | 
					    method: str = 'peak',
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    **kwargs,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
) -> tuple[np.ndarray, np.ndarray]:
 | 
					) -> tuple[np.ndarray, np.ndarray]:
 | 
				
			||||||
    '''
 | 
					    '''
 | 
				
			||||||
    Downsample x/y data for lesser curve graphics gen.
 | 
					    Downsample x/y data for lesser curve graphics gen.
 | 
				
			||||||
| 
						 | 
					@ -142,7 +147,6 @@ def downsample(
 | 
				
			||||||
    # py3.10 syntax
 | 
					    # py3.10 syntax
 | 
				
			||||||
    match method:
 | 
					    match method:
 | 
				
			||||||
        case 'peak':
 | 
					        case 'peak':
 | 
				
			||||||
            # breakpoint()
 | 
					 | 
				
			||||||
            if bins < 2:
 | 
					            if bins < 2:
 | 
				
			||||||
                log.warning('No downsampling taking place?')
 | 
					                log.warning('No downsampling taking place?')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -164,11 +168,37 @@ def downsample(
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            return x, y
 | 
					            return x, y
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # TODO: this algo from infinite, see
 | 
					        case 'm4':
 | 
				
			||||||
        # https://github.com/pikers/piker/issues/109
 | 
					            return ds_m4(x, y, kwargs['px_width'])
 | 
				
			||||||
        case 'infinite_4px':
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # Ex. from infinite on downsampling viewable graphics.
 | 
					
 | 
				
			||||||
 | 
					def ds_m4(
 | 
				
			||||||
 | 
					    x: np.ndarray,
 | 
				
			||||||
 | 
					    y: np.ndarray,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # this is the width of the data in view
 | 
				
			||||||
 | 
					    # in display-device-local pixel units.
 | 
				
			||||||
 | 
					    px_width: int,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    factor: Optional[int] = None,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					) -> tuple[np.ndarray, np.ndarray]:
 | 
				
			||||||
 | 
					    '''
 | 
				
			||||||
 | 
					    Downsample using the M4 algorithm.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    '''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # NOTE: this method is a so called "visualization driven data
 | 
				
			||||||
 | 
					    # aggregation" approach. It gives error-free line chart
 | 
				
			||||||
 | 
					    # downsampling, see
 | 
				
			||||||
 | 
					    # further scientific paper resources:
 | 
				
			||||||
 | 
					    # - http://www.vldb.org/pvldb/vol7/p797-jugel.pdf
 | 
				
			||||||
 | 
					    # - http://www.vldb.org/2014/program/papers/demo/p997-jugel.pdf
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Details on implementation of this algo are based in,
 | 
				
			||||||
 | 
					    # https://github.com/pikers/piker/issues/109
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # XXX: from infinite on downsampling viewable graphics:
 | 
				
			||||||
    # "one thing i remembered about the binning - if you are
 | 
					    # "one thing i remembered about the binning - if you are
 | 
				
			||||||
    # picking a range within your timeseries the start and end bin
 | 
					    # picking a range within your timeseries the start and end bin
 | 
				
			||||||
    # should be one more bin size outside the visual range, then
 | 
					    # should be one more bin size outside the visual range, then
 | 
				
			||||||
| 
						 | 
					@ -176,75 +206,104 @@ def downsample(
 | 
				
			||||||
    # "i didn't show it in the sample code, but it's accounted for
 | 
					    # "i didn't show it in the sample code, but it's accounted for
 | 
				
			||||||
    # in the start and end indices and number of bins"
 | 
					    # in the start and end indices and number of bins"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            def build_subchart(
 | 
					    assert px_width > 1  # width of screen in pxs?
 | 
				
			||||||
                self,
 | 
					 | 
				
			||||||
                subchart,
 | 
					 | 
				
			||||||
                width,  # width of screen in pxs?
 | 
					 | 
				
			||||||
                chart_type,
 | 
					 | 
				
			||||||
                lower,  # x start?
 | 
					 | 
				
			||||||
                upper,  # x end?
 | 
					 | 
				
			||||||
                xvals,
 | 
					 | 
				
			||||||
                yvals
 | 
					 | 
				
			||||||
            ):
 | 
					 | 
				
			||||||
                pts_per_pixel = len(xvals) / width
 | 
					 | 
				
			||||||
                if pts_per_pixel > 1:
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    # this is mutated in-place
 | 
					    # NOTE: if we didn't pre-slice the data to downsample
 | 
				
			||||||
                    data = np.zeros((width, 4), yvals.dtype)
 | 
					    # you could in theory pass these as the slicing params,
 | 
				
			||||||
                    bins = np.zeros(width, xvals.dtype)
 | 
					    # do we care though since we can always just pre-slice the
 | 
				
			||||||
 | 
					    # input?
 | 
				
			||||||
 | 
					    x_start = 0  # x index start
 | 
				
			||||||
 | 
					    x_end = len(x)  # x index end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    nb = subset_by_x(
 | 
					    # uppx: units-per-pixel
 | 
				
			||||||
                        xvals,
 | 
					    pts_per_pixel = len(x) / px_width
 | 
				
			||||||
                        yvals,
 | 
					    print(f'UPPX: {pts_per_pixel}')
 | 
				
			||||||
                        bins,
 | 
					
 | 
				
			||||||
                        data,
 | 
					    # ratio of indexed x-value to width of raster in pixels.
 | 
				
			||||||
                        lower,
 | 
					    if factor is None:
 | 
				
			||||||
                        # this is scaling the x-range by
 | 
					        w = (x_end-x_start) / float(px_width)
 | 
				
			||||||
                        # the width of the screen?
 | 
					        print(f' pts/pxs = {w}')
 | 
				
			||||||
                        (upper-lower)/float(width),
 | 
					    else:
 | 
				
			||||||
 | 
					        w = factor
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # these are pre-allocated and mutated by ``numba``
 | 
				
			||||||
 | 
					    # code in-place.
 | 
				
			||||||
 | 
					    ds = np.zeros((px_width, 4), y.dtype)
 | 
				
			||||||
 | 
					    i_win = np.zeros(px_width, x.dtype)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # call into ``numba``
 | 
				
			||||||
 | 
					    nb = _m4(
 | 
				
			||||||
 | 
					        x,
 | 
				
			||||||
 | 
					        y,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        i_win,
 | 
				
			||||||
 | 
					        ds,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # first index in x data to start at
 | 
				
			||||||
 | 
					        x_start,
 | 
				
			||||||
 | 
					        # window size for each "frame" of data to downsample (normally
 | 
				
			||||||
 | 
					        # scaled by the ratio of pixels on screen to data in x-range).
 | 
				
			||||||
 | 
					        w,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    print(f'downsampled to {nb} bins')
 | 
					    print(f'downsampled to {nb} bins')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            return x, y
 | 
					    return i_win, ds.flatten()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@jit(nopython=True)
 | 
					@jit(
 | 
				
			||||||
def subset_by_x(
 | 
					    nopython=True,
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					def _m4(
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    xs: np.ndarray,
 | 
					    xs: np.ndarray,
 | 
				
			||||||
    ys: np.ndarray,
 | 
					    ys: np.ndarray,
 | 
				
			||||||
    bins: np.ndarray,
 | 
					
 | 
				
			||||||
    data: np.ndarray,
 | 
					    # pre-alloc array of x indices mapping to the start
 | 
				
			||||||
 | 
					    # of each window used for downsampling in y.
 | 
				
			||||||
 | 
					    i_win: np.ndarray,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # pre-alloc array of output downsampled y values
 | 
				
			||||||
 | 
					    ds: np.ndarray,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    x_start: int,
 | 
					    x_start: int,
 | 
				
			||||||
    step: float,
 | 
					    step: float,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
) -> int:
 | 
					) -> int:
 | 
				
			||||||
    # nbins = len(bins)
 | 
					    # nbins = len(i_win)
 | 
				
			||||||
    count = len(xs)
 | 
					    # count = len(xs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    bincount = 0
 | 
					    bincount = 0
 | 
				
			||||||
    x_left = x_start
 | 
					    x_left = x_start
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Find the first bin
 | 
					    # Find the first window's starting index which *includes* the
 | 
				
			||||||
 | 
					    # first value in the x-domain array.
 | 
				
			||||||
 | 
					    # (this allows passing in an array which is indexed (and thus smaller then)
 | 
				
			||||||
 | 
					    # the ``x_start`` value normally passed in - say if you normally
 | 
				
			||||||
 | 
					    # want to start 0-indexed.
 | 
				
			||||||
    first = xs[0]
 | 
					    first = xs[0]
 | 
				
			||||||
    while first >= x_left + step:
 | 
					    while first >= x_left + step:
 | 
				
			||||||
        x_left += step
 | 
					        x_left += step
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    bins[bincount] = x_left
 | 
					    # set all bins in the left-most entry to the starting left-most x value
 | 
				
			||||||
    data[bincount] = ys[0]
 | 
					    # (aka a row broadcast).
 | 
				
			||||||
 | 
					    i_win[bincount] = x_left
 | 
				
			||||||
 | 
					    # set all y-values to the first value passed in.
 | 
				
			||||||
 | 
					    ds[bincount] = ys[0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for i in range(count):
 | 
					    for i in range(len(xs)):
 | 
				
			||||||
        x = xs[i]
 | 
					        x = xs[i]
 | 
				
			||||||
        y = ys[i]
 | 
					        y = ys[i]
 | 
				
			||||||
        if x < x_left + step:   # Interval is [bin, bin+1)
 | 
					        if x < x_left + step:   # the current window "step" is [bin, bin+1)
 | 
				
			||||||
            data[bincount, 1] = min(y, data[bincount, 1])
 | 
					            ds[bincount, 1] = min(y, ds[bincount, 1])
 | 
				
			||||||
            data[bincount, 2] = max(y, data[bincount, 2])
 | 
					            ds[bincount, 2] = max(y, ds[bincount, 2])
 | 
				
			||||||
            data[bincount, 3] = y
 | 
					            ds[bincount, 3] = y
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            # Find the next bin
 | 
					            # Find the next bin
 | 
				
			||||||
            while x >= x_left + step:
 | 
					            while x >= x_left + step:
 | 
				
			||||||
                x_left += step
 | 
					                x_left += step
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            bincount += 1
 | 
					            bincount += 1
 | 
				
			||||||
            bins[bincount] = x_left
 | 
					            i_win[bincount] = x_left
 | 
				
			||||||
            data[bincount] = y
 | 
					            ds[bincount] = y
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return bincount
 | 
					    return bincount
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue