# Source code for dclab.kde_methods

#!/usr/bin/python
# -*- coding: utf-8 -*-
"""Kernel Density Estimation methods"""
from __future__ import division, print_function, unicode_literals

import numpy as np
from scipy.interpolate import RectBivariateSpline
from scipy.stats import gaussian_kde, skew

from .cached import Cache
from .external.statsmodels.nonparametric.kernel_density import KDEMultivariate

[docs]def bin_num_doane(a): """Compute number of bins based on Doane's formula""" bad = np.isnan(a) | np.isinf(a) data = a[~bad] acc = bin_width_doane(a) num = np.int(np.round((data.max() - data.min()) / acc)) return num
[docs]def bin_width_doane(a): """Compute accuracy (bin width) based on Doane's formula References ---------- - `<https://en.wikipedia.org/wiki/Histogram#Number_of_bins_and_width>`_ - `<https://stats.stackexchange.com/questions/55134/ doanes-formula-for-histogram-binning>`_ """ bad = np.isnan(a) | np.isinf(a) data = a[~bad] n = data.size g1 = skew(data) sigma_g1 = np.sqrt(6 * (n - 2) / ((n + 1) * (n + 3))) k = 1 + np.log2(n) + np.log2(1 + np.abs(g1) / sigma_g1) acc = (data.max() - data.min()) / k return acc
[docs]def get_bad_vals(x, y): return np.isnan(x) | np.isinf(x) | np.isnan(y) | np.isinf(y)