Source code for deerlab.diststats

# diststats.py - Distance distribution descriptors
# -----------------------------------------------
# This file is a part of DeerLab. License is MIT (see LICENSE.md).
# Copyright(c) 2019-2023: Luis Fabregas, Stefan Stoll and other contributors.

import numpy as np
import warnings
import copy
from scipy.signal import find_peaks
from scipy.integrate import cumtrapz

[docs] def diststats(r, P, Puq=None, verbose=False, threshold=None): r""" Computes statistical quantities for the location, spread, and shape of a distance distribution, with or without their corresponding uncertainties. This function calculates various statistical quantities of a distance distribution, such as its mean, median, mode, etc. It can also calculate the uncertainty of these quantities using moment-based or bootstrapped uncertainty quantification, if provided. The function allows to specify whether to print a summary of the results and the peak detection threshold for the calculation of the mode of the distribution. Parameters ---------- r : array_like Distance axis, in nm. The distances in the distribution. P : array_like Distance distribution, does not have to be normalized. The probability or relative frequency of the distances in `r`. Puq : :ref:`UQResult`, optional Uncertainty quantification of the distance distribution. If Puq is not given, a single output is returned without any uncertainty estimation. If given, two outputs are returned containing the uncertainty estimation. verbose : boolean, optional Print a summary of all statistical quantities (and their uncertainties if calculated). threshold : float, optional Peak detection threshold for the calculation of modes of a distribution. The default is ``max(P)/10``. Returns ------- estimators : dict Dictionary of shape, location, and spread descriptors of the input distance distribution: General parameters * ``'rmin'`` - Minimum distance in the distribution range, in nm * ``'rmax'`` - Maximum distance in the distribution range, in nm * ``'int'`` - Integral of the distance distribution Location parameters * ``'mean'`` or ``'moment1'`` - Mean distance, in nm (see `more <https://en.wikipedia.org/wiki/Mean>`__) * ``'median'`` - Median distance, in nm (see `more <https://en.wikipedia.org/wiki/Median>`__) * ``'iqm'`` - Interquartile mean (IQM) distance, in nm (see `more <https://en.wikipedia.org/wiki/Interquartile_mean>`__) * ``'mode'`` - First modal distance, in nm (see `more <https://en.wikipedia.org/wiki/Mode_(statistics)>`__) * ``'modes'`` - All modal distances, in nm (see `more <https://en.wikipedia.org/wiki/Mode_(statistics)>`__) Spread parameters * ``'iqr'`` - Interquartile range, in nm (see `more <https://en.wikipedia.org/wiki/Interquartile_range>`__) * ``'mad'`` - Mean absolute deviation (MAD), in nm (see `more <https://en.wikipedia.org/wiki/Average_absolute_deviation>`__) * ``'std'`` - Standard deviation, in nm (see `more <https://en.wikipedia.org/wiki/Standard_deviation>`__) * ``'var'`` or ``'moment2'`` - Variance, in nm² (see `more <https://en.wikipedia.org/wiki/Variance>`__) * ``'entropy'`` - Shannon entropy, in nat (see `more <https://en.wikipedia.org/wiki/Entropy_(information_theory)>`__) Shape parameters * ``'modality'`` - Modality (number of modes) * ``'skewness'`` or ``'moment3'`` - Skewness (see `more <https://en.wikipedia.org/wiki/Skewness>`__) * ``'kurtosis'`` - Excess kurtosis (see `more <https://en.wikipedia.org/wiki/Kurtosis>`__) * ``'moment4'`` - 4th moment (kurtosis) (see `more <https://en.wikipedia.org/wiki/Kurtosis>`__) uq : dict of :ref:`UQResult` Dictionary of the parameters moment-based uncertainty quantification. See above for the dictionary keys. Only calculated if ``Puq`` is provided. Notes ----- For the ``'mode'``, ``'modes'`` and ``'modality'`` parameters, moment-based uncertainties are not available. Uncertainties can, however, be calculated via bootsrapping of these quantities, e.g. :: def analyze_rmode(V): fit = dl.fitmodel(V,t,r) rmode = dl.diststats(fit.P,r)[0]['mode'] return rmode # Bootstrap analysis of distance mode rmode_uq = dl.bootstrap_analysis(V,Vfit,analyze_rmode) """ P,r = np.atleast_1d(P,r) # Check to avoid non-sensical results with syntax diststats(P,r) if not np.all(np.diff(r)>0): raise KeyError('The distance axis (1st argument) must be a monotnously increasing vector.') if threshold is None: threshold = np.max(P)/10 # Auxiliary functions # ------------------- int = np.trapz(P,r) if not np.all(P==0) else 1 def normalize(P): return P/int # Percentile function def pctile(r,P,p): cdf = cumtrapz(normalize(P),r,initial=0) cdf, index = np.lib.arraysetops.unique(cdf,return_index=True) rpctile = np.interp(p/100,cdf,r[index]) return rpctile # Expectation operator function def E(x,P,r): return np.trapz(x*normalize(P),r) # Location estimators # ------------------- # 1st moment - Mean meanfcn = lambda P: E(r,P,r) # Median medianfcn = lambda P: pctile(r,P,50) # Interquartile mean def iqmfcn(P): IQrange = (r>pctile(r,P,25)) & (r<pctile(r,P,75)) return E(r[IQrange],P[IQrange]/np.trapz(normalize(P)[IQrange],r[IQrange]),r[IQrange]) # Mode modefcn = lambda P: r[np.argmax(P)] # Modes modesfcn = lambda P: r[find_peaks(P,height=threshold)[0]] # Spread estimators # ----------------- # Interquartile range iqrfcn = lambda P: pctile(r,P,75) - pctile(r,P,25) # Mean absolute deviation madfcn = lambda P: E(abs(r - meanfcn(P)),P,r) # 2nd moment - Variance variancefcn = lambda P: E((r - meanfcn(P))**2,P,r) # 2nd moment - Standard deviation stdfcn = lambda P: np.sqrt(variancefcn(P)) # Entropy (information theory) entropyfcn = lambda P: -E(np.log(np.maximum(np.finfo(float).eps,normalize(P))),P,r) # Shape estimators # ---------------- # Modality modalityfcn = lambda P: np.size(modesfcn(P)) # 3rd moment - Skewness skewnessfcn = lambda P: E(((r - meanfcn(P))/stdfcn(P))**3,P,r) # 4th moment - Kurtosis kurtosisfcn = lambda P: E(((r - meanfcn(P))/stdfcn(P))**4,P,r) # Excess kurtosis exkurtosisfcn = lambda P: 3 - E(((r - meanfcn(P))/stdfcn(P))**4,P,r) # 0th moment - Integral intfcn = lambda P: np.trapz(P,r) # Calculate distribution estimators estimators = { 'rmin': min(r), 'rmax': max(r), 'int': intfcn(P), 'mean': meanfcn(P), 'median': medianfcn(P), 'mode': modefcn(P), 'modes': modesfcn(P), 'iqm': iqmfcn(P), 'mad': madfcn(P), 'var': variancefcn(P), 'std': stdfcn(P), 'iqr': iqrfcn(P), 'entropy': entropyfcn(P), 'modality': modalityfcn(P), 'skewness': skewnessfcn(P), 'kurtosis': exkurtosisfcn(P), 'moment1': meanfcn(P), 'moment2': variancefcn(P), 'moment3': skewnessfcn(P), 'moment4': kurtosisfcn(P), } # Calculate distribution estimator uncertainties if uncertainty of P is given uq = None if Puq is not None: uq = { 'rmin': None, 'rmax': None, 'int': None, 'mean': _propagation(Puq,meanfcn), 'median': _propagation(Puq,medianfcn), 'mode': None, 'modes': None, 'iqm': _propagation(Puq,iqmfcn), 'mad': _propagation(Puq,madfcn), 'var': _propagation(Puq,variancefcn), 'std': _propagation(Puq,stdfcn), 'iqr': _propagation(Puq,iqrfcn), 'entropy': _propagation(Puq,entropyfcn), 'modality': None, 'skewness': _propagation(Puq,skewnessfcn), 'kurtosis': _propagation(Puq,exkurtosisfcn), 'moment1': _propagation(Puq,meanfcn), 'moment2': _propagation(Puq,variancefcn), 'moment3': _propagation(Puq,skewnessfcn), 'moment4': _propagation(Puq,kurtosisfcn), } # Print if requested if verbose: _print_estimators(r,estimators,uq) return estimators,uq
def _propagation(Puq,fcn): # Propagate the uncertainty to the function with warnings.catch_warnings(): # Ignore warnings when evaluating Jacobians outside of reasonable bounds warnings.simplefilter('ignore') uq_ = Puq.propagate(fcn) uq = copy.deepcopy(uq_) def ci(p): paramci = np.atleast_2d(uq_.ci(p)) return [paramci[:,0][0],paramci[:,1][0]] # Wrap the ci() method to simplify array uq.ci = ci return uq def _print_estimators(r,estimators,uq): # Print summary of estimators if requested if uq is None: print('-------------------------------------------------') print('Distribution Statistics') print('-------------------------------------------------') print(f'Range {min(r):.2f}-{max(r):.2f} nm') print(f'Integral {estimators["int"]:.2f}') print('-------------------------------------------------') print('Location') print('-------------------------------------------------') print(f'Mean {estimators["mean"]:.2f} nm') print(f'Median {estimators["median"]:.2f} nm') print(f'Interquartile mean {estimators["iqm"]:.2f} nm') print(f'Mode {estimators["mode"]:.2f} nm') print('-------------------------------------------------') print('Spread') print('-------------------------------------------------') print(f'Standard deviation {estimators["std"]:.2f} nm') print(f'Mean absolute deviation {estimators["mad"]:.2f} nm') print(f'Interquartile range {estimators["iqr"]:.2f} nm') print(f'Variance {estimators["var"]:.2f} nm²') print('-------------------------------------------------') print('Shape') print('-------------------------------------------------') print(f'Modality {estimators["modality"]} ') print(f'Skewness {estimators["skewness"]:.2f} ') print(f'Excess kurtosis {estimators["kurtosis"]:.2f} ') print('-------------------------------------------------') else: print('-------------------------------------------------') print('Distribution Statistics') print('-------------------------------------------------') print(f'Range {min(r):.2f}-{max(r):.2f} nm') print(f'Integral {estimators["int"]:.2f}') print('-------------------------------------------------') print('Location') print('-------------------------------------------------') print(f'Range {min(r):.2f}-{max(r):.2f} nm') print(f'Mean {estimators["mean"]:.2f} ({uq["mean"].ci(95)[0]:.2f},{uq["mean"].ci(95)[1]:.2f}) nm') print(f'Median {estimators["median"]:.2f} ({uq["median"].ci(95)[0]:.2f},{uq["median"].ci(95)[1]:.2f}) nm') print(f'Interquartile mean {estimators["iqm"]:.2f} ({uq["iqm"].ci(95)[0]:.2f},{uq["iqm"].ci(95)[1]:.2f}) nm') print(f'Mode {estimators["mode"]:.2f} nm') print('-------------------------------------------------') print('Spread') print('-------------------------------------------------') print(f'Standard deviation {estimators["std"]:.2f} ({uq["std"].ci(95)[0]:.2f},{uq["std"].ci(95)[1]:.2f}) nm') print(f'Mean absolute deviation {estimators["mad"]:.2f} ({uq["mad"].ci(95)[0]:.2f},{uq["mad"].ci(95)[1]:.2f}) nm') print(f'Interquartile range {estimators["iqr"]:.2f} ({uq["iqr"].ci(95)[0]:.2f},{uq["iqr"].ci(95)[1]:.2f}) nm') print(f'Variance {estimators["var"]:.2f} ({uq["var"].ci(95)[0]:.2f},{uq["var"].ci(95)[1]:.2f}) nm²') print('-------------------------------------------------') print('Shape') print('-------------------------------------------------') print(f'Modality {estimators["modality"]}') print(f'Skewness {estimators["skewness"]:.2f} ({uq["skewness"].ci(95)[0]:.2f},{uq["skewness"].ci(95)[1]:.2f}) ') print(f'Kurtosis {estimators["kurtosis"]:.2f} ({uq["kurtosis"].ci(95)[0]:.2f},{uq["kurtosis"].ci(95)[1]:.2f}) ') print('-------------------------------------------------')