Source code for dclab.kde.base

import warnings

import numpy as np
from scipy.interpolate import RegularGridInterpolator as RGI

from ..cached import umbrella_cache
from ..definitions import feat_const
from .binning import bin_width_percentile
from .contours import find_contours_level, get_quantile_levels
from .helpers import get_bad_vals
from .methods import methods


def kde_handler(kde):
    return [
        # uniquely identifies an RTDC file, hierarchy child, or online data
        kde.rtdc_ds.hash,
        # filters that are set define the plot data points
        kde.rtdc_ds.filter.all,
        # custom computation arguments influence e.g. Young's modulus
        kde.rtdc_ds.config.get("calculation", ""),
    ]


class ContourSpacingTooLarge(UserWarning):
    pass


[docs] class KernelDensityEstimator: def __init__(self, rtdc_ds): self.rtdc_ds = rtdc_ds
[docs] @staticmethod def apply_scale(a, scale, feat): """Helper function for transforming an array to log-scale Parameters ---------- a: np.ndarray Input array scale: str If set to "log", take the logarithm of `a`; if set to "linear" return `a` unchanged. feat: str Feature name (required for debugging) Returns ------- b: np.ndarray The scaled array Notes ----- If the scale is not "linear", then a new array is returned. All warnings are suppressed when computing `np.log(a)`, as `a` may have negative or nan values. """ if scale == "linear": b = a elif scale == "log": with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") b = np.log(a) if len(w): # Tell the user that the log-transformation issued # a warning. warnings.warn( f"Invalid values encounterd in np.log " f"while scaling feature '{feat}'!" ) else: raise ValueError( f"`scale` must be either 'linear' or 'log', got '{scale}'!" ) return b
[docs] @staticmethod def check_feat_kde_applicability(xax: str, yax: str) -> bool: """Return True when it makes sense to compute KDE data""" if xax == yax: # Same feature -> just a line return False elif (xax in feat_const.FEATURES_MONOTONOUS and yax in feat_const.FEATURES_MONOTONOUS): # Monotonous features -> just a line return False else: return True
[docs] @staticmethod def estimate_spacing( a, method, scale="linear", method_kw=None, feat="undefined", ret_scaled=False ): """Helper function for guessing the spacing/accuracy for KDE plots Parameters ---------- a: ndarray feature data scale: str how the data should be scaled ("log" or "linear") method: callable Binning method to use method_kw: dict keyword arguments to `method` feat: str feature name for debugging ret_scaled: bool whether to return the scaled array of `a` """ if method_kw is None: method_kw = {} # Apply scale (no change for linear scale) asc = KernelDensityEstimator.apply_scale(a, scale, feat) # Apply multiplicator acc = method(asc, **method_kw) if ret_scaled: return acc, asc else: return acc
[docs] @umbrella_cache( topic="kde-at", custom_handlers={"KernelDensityEstimator": kde_handler} ) def get_at( self, xax="area_um", yax="deform", positions=None, kde_type="histogram", kde_kwargs=None, xscale="linear", yscale="linear", xacc=None, yacc=None, ): """Fast evaluation of the KDE via linear interpolation The KDE is computed via linear interpolation from the output of :meth:`KernelDensityEstimator.get_raster`. Parameters ---------- xax, yax: str Identifier for X- and Y-axis (e.g. "area_um", "aspect", "deform") positions: list of two 1d ndarrays or ndarray of shape (2, N) The positions where the KDE will be computed. Note that the KDE estimate is computed from the points that are set in `self.rtdc_ds.filter.all`. kde_type: str The KDE method to use, see :const:`.kde_methods.methods` kde_kwargs: dict Additional keyword arguments to the KDE method xscale, yscale: str If set to "log", take the logarithm of the axes values before computing the KDE. This is useful when data are displayed on a log-scale. Defaults to "linear". xacc, yacc: float KDE grid spacing in x and y direction if set to None, will use :func:`.bin_width_percentile` Returns ------- density : 1d ndarray The kernel density evaluated for the filtered events. """ if kde_kwargs is None: kde_kwargs = {} xax = xax.lower() yax = yax.lower() kde_type = kde_type.lower() if kde_type not in methods: raise ValueError(f"Not a valid kde type: {kde_type}!") # Get data x = self.rtdc_ds[xax][self.rtdc_ds.filter.all] y = self.rtdc_ds[yax][self.rtdc_ds.filter.all] # Apply scale (no change for linear scale) if positions: xs = KernelDensityEstimator.apply_scale(positions[0], xscale, xax) ys = KernelDensityEstimator.apply_scale(positions[1], yscale, yax) else: xs = KernelDensityEstimator.apply_scale(x, xscale, xax) ys = KernelDensityEstimator.apply_scale(y, yscale, yax) if len(xs): xr, yr, density_grid = self.get_raster( xax=xax, yax=yax, kde_type=kde_type, kde_kwargs=kde_kwargs, xscale=xscale, yscale=yscale, xacc=xacc, yacc=yacc, ) if xr.size: # Apply scale (no change for linear scale) xrs = KernelDensityEstimator.apply_scale(xr, xscale, xax) yrs = KernelDensityEstimator.apply_scale(yr, yscale, yax) # 'scipy.interp2d' has been removed in SciPy 1.14.0 # https://scipy.github.io/devdocs/tutorial/interpolate/interp_transition_guide.html interp_func = RGI( (xrs[:, 0], yrs[0, :]), density_grid, method="linear", bounds_error=False, fill_value=np.nan, ) density = interp_func((xs, ys)) else: # We don't have a raster to interpolate on. This can happen # when a feature has all-equal values. density = np.full(len(xs), np.nan) else: # No input coordinates. density = np.array([]) return density
[docs] @umbrella_cache( topic="kde-contour-lines", custom_handlers={"KernelDensityEstimator": kde_handler}, ) def get_contour_lines( self, quantiles=None, xax="area_um", yax="deform", kde_type="histogram", kde_kwargs=None, xscale="linear", yscale="linear", xacc=None, yacc=None, ret_levels=False, ): """Compute contour lines for a given kernel density estimate. Parameters ---------- quantiles: list or array of floats KDE Quantiles for which contour levels are computed. The values must be between 0 and 1. If set to None, use [0.5, 0.95] as default. xax, yax: str Identifier for X- and Y-axis (e.g. "area_um", "aspect", "deform") kde_type: str The KDE method to use kde_kwargs: dict Additional keyword arguments to the KDE method xscale, yscale: str If set to "log", take the logarithm of the axis-values before computing the KDE. This is useful when data are displayed on a log-scale. Defaults to "linear". xacc, yacc: float KDE grid spacing in x and y direction; Affects contour shape. If set to None, will use :func:`.find_smooth_contour_spacing` ret_levels: bool If set to True, return the levels of the contours (default: False) Returns ------- contour_lines: list of lists (of lists) For every number in `quantiles`, this list contains a list of corresponding contour lines. Each contour line is a 2D array of shape (N, 2), where N is the number of points in the contour line. levels: list of floats The density levels corresponding to each number in `quantiles`. Only returned if `ret_levels` is set to True. """ if not quantiles: quantiles = [0.5, 0.95] if not KernelDensityEstimator.check_feat_kde_applicability(xax, yax): # There is no point in computing contours. contours = [[]] * len(quantiles) levels = [np.nan] * len(quantiles) if ret_levels: return contours, levels else: return contours if xacc is None or yacc is None: # workaround for circular import from .smooth_contour import find_smooth_contour_spacing sc_res = find_smooth_contour_spacing( [self.rtdc_ds], xax=xax, yax=yax, xrange=(np.min(self.rtdc_ds[xax]), np.max(self.rtdc_ds[xax])), yrange=(np.min(self.rtdc_ds[yax]), np.max(self.rtdc_ds[yax])), quantiles=quantiles, xscale=xscale, yscale=yscale, ) xacc = xacc or sc_res["spacing x"] yacc = yacc or sc_res["spacing y"] try: x, y, density = self.get_raster( xax=xax, yax=yax, xacc=xacc, yacc=yacc, xscale=xscale, yscale=yscale, kde_type=kde_type, kde_kwargs=kde_kwargs, ) except ValueError: # most-likely there is nothing to compute a contour for return [] if density.shape[0] < 3 or density.shape[1] < 3: warnings.warn( "Contour not possible; spacing may be too large!", ContourSpacingTooLarge, ) return [] levels = get_quantile_levels( density=density, x=x, y=y, xp=self.rtdc_ds[xax][self.rtdc_ds.filter.all], yp=self.rtdc_ds[yax][self.rtdc_ds.filter.all], q=np.array(quantiles), normalize=False, ) contours = [] # Normalize levels to [0, 1] nlevels = np.array(levels) / density.max() for nlev in nlevels: # make sure that the contour levels are not at the boundaries if not ( np.allclose(nlev, 0, atol=1e-12, rtol=0) or np.allclose(nlev, 1, atol=1e-12, rtol=0) ): cc = find_contours_level(density, x=x, y=y, level=nlev) contours.append(cc) else: contours.append([]) if ret_levels: return contours, levels else: return contours
[docs] @umbrella_cache( topic="kde-raster", custom_handlers={"KernelDensityEstimator": kde_handler} ) def get_raster( self, xax="area_um", yax="deform", kde_type="histogram", kde_kwargs=None, xscale="linear", yscale="linear", xacc=None, yacc=None, ): """Evaluate the kernel density estimate on a grid Parameters ---------- xax, yax: str Identifier for X- and Y-axis (e.g. "area_um", "aspect", "deform") kde_type: str The KDE method to use kde_kwargs: dict Additional keyword arguments to the KDE method xscale, yscale: str If set to "log", take the logarithm of the axes values before computing the KDE. This is useful when data are displayed on a log-scale. Defaults to "linear". xacc, yacc: float Grid spacing size for the axes; If set to None, will use :func:`bin_width_percentile` Returns ------- X, Y, Z : coordinates The kernel density Z evaluated on a rectangular grid (X,Y). """ if kde_kwargs is None: kde_kwargs = {} kde_type = kde_type.lower() if kde_type not in methods: raise ValueError(f"Not a valid kde type: {kde_type}!") # Get data x = self.rtdc_ds[xax][self.rtdc_ds.filter.all] y = self.rtdc_ds[yax][self.rtdc_ds.filter.all] xs = KernelDensityEstimator.apply_scale(x, scale=xscale, feat=xax) ys = KernelDensityEstimator.apply_scale(y, scale=yscale, feat=yax) if xacc is None or xacc == 0: xacc = bin_width_percentile(xs) if yacc is None or yacc == 0: yacc = bin_width_percentile(ys) # Ignore infs and nans bad = get_bad_vals(xs, ys) xc = xs[~bad] yc = ys[~bad] if xc.size: # Compute the mesh for rastering xnum = int(np.ceil((xc.max() - xc.min()) / xacc)) ynum = int(np.ceil((yc.max() - yc.min()) / yacc)) xlin = np.linspace(xc.min(), xc.max(), xnum, endpoint=True) ylin = np.linspace(yc.min(), yc.max(), ynum, endpoint=True) xmesh, ymesh = np.meshgrid(xlin, ylin, indexing="ij") # Compute the KDE for each point on the mesh kde_fct = methods[kde_type] density = kde_fct( events_x=xs, events_y=ys, xout=xmesh, yout=ymesh, **kde_kwargs ) else: xmesh, ymesh = np.meshgrid([0, 1], [0, 1], indexing="ij") density = np.array(np.nan * xmesh) # Convert mesh back to linear scale if applicable if xscale == "log": xmesh = np.exp(xmesh) if yscale == "log": ymesh = np.exp(ymesh) return xmesh, ymesh, density
[docs] @umbrella_cache( topic="kde-scatter", custom_handlers={"KernelDensityEstimator": kde_handler} ) def get_scatter( self, xax="area_um", yax="deform", positions=None, kde_type="histogram", kde_kwargs=None, xscale="linear", yscale="linear", ): """Evaluate the KDE at specific positions The KDE is evaluated with the `kde_type` function for every point, which is significantly slower than using interpolation via :meth:`.KernelDensityEstimator.get_at`. Parameters ---------- xax, yax: str Identifier for X- and Y-axis (e.g. "area_um", "aspect", "deform") positions: list of two 1d ndarrays or ndarray of shape (2, N) The positions where the KDE will be computed. Note that the KDE estimate is computed from the points that are set in `self.rtdc_ds.filter.all`. kde_type: str The KDE method to use, see :const:`.kde_methods.methods` kde_kwargs: dict Additional keyword arguments to the KDE method xscale, yscale: str If set to "log", take the logarithm of the axis-values before computing the KDE. This is useful when data are displayed on a log-scale. Defaults to "linear". Returns ------- density : 1d ndarray The kernel density evaluated for the filtered data points. """ if kde_kwargs is None: kde_kwargs = {} xax = xax.lower() yax = yax.lower() kde_type = kde_type.lower() if kde_type not in methods: raise ValueError(f"Not a valid kde type: {kde_type}!") # Get data x = self.rtdc_ds[xax][self.rtdc_ds.filter.all] y = self.rtdc_ds[yax][self.rtdc_ds.filter.all] # Apply scale (no change for linear scale) xs = KernelDensityEstimator.apply_scale(x, xscale, xax) ys = KernelDensityEstimator.apply_scale(y, yscale, yax) if positions is None: px = None py = None else: px = KernelDensityEstimator.apply_scale(positions[0], xscale, xax) py = KernelDensityEstimator.apply_scale(positions[1], yscale, yax) kde_fct = methods[kde_type] if len(x): density = kde_fct( events_x=xs, events_y=ys, xout=px, yout=py, **kde_kwargs ) else: density = np.array([]) return density