# diststats.py - Distance distribution descriptors# -----------------------------------------------# This file is a part of DeerLab. License is MIT (see LICENSE.md).# Copyright(c) 2019-2023: Luis Fabregas, Stefan Stoll and other contributors.importnumpyasnpimportwarningsimportcopyfromscipy.signalimportfind_peaksfromscipy.integrateimportcumulative_trapezoidascumtrapz
[docs]defdiststats(r,P,Puq=None,verbose=False,threshold=None):r""" Computes statistical quantities for the location, spread, and shape of a distance distribution, with or without their corresponding uncertainties. This function calculates various statistical quantities of a distance distribution, such as its mean, median, mode, etc. It can also calculate the uncertainty of these quantities using moment-based or bootstrapped uncertainty quantification, if provided. The function allows to specify whether to print a summary of the results and the peak detection threshold for the calculation of the mode of the distribution. Parameters ---------- r : array_like Distance axis, in nm. The distances in the distribution. P : array_like Distance distribution, does not have to be normalized. The probability or relative frequency of the distances in `r`. Puq : :ref:`UQResult`, optional Uncertainty quantification of the distance distribution. If Puq is not given, a single output is returned without any uncertainty estimation. If given, two outputs are returned containing the uncertainty estimation. verbose : boolean, optional Print a summary of all statistical quantities (and their uncertainties if calculated). threshold : float, optional Peak detection threshold for the calculation of modes of a distribution. The default is ``max(P)/10``. Returns ------- estimators : dict Dictionary of shape, location, and spread descriptors of the input distance distribution: General parameters * ``'rmin'`` - Minimum distance in the distribution range, in nm * ``'rmax'`` - Maximum distance in the distribution range, in nm * ``'int'`` - Integral of the distance distribution Location parameters * ``'mean'`` or ``'moment1'`` - Mean distance, in nm (see `more <https://en.wikipedia.org/wiki/Mean>`__) * ``'median'`` - Median distance, in nm (see `more <https://en.wikipedia.org/wiki/Median>`__) * ``'iqm'`` - Interquartile mean (IQM) distance, in nm (see `more <https://en.wikipedia.org/wiki/Interquartile_mean>`__) * ``'mode'`` - First modal distance, in nm (see `more <https://en.wikipedia.org/wiki/Mode_(statistics)>`__) * ``'modes'`` - All modal distances, in nm (see `more <https://en.wikipedia.org/wiki/Mode_(statistics)>`__) Spread parameters * ``'iqr'`` - Interquartile range, in nm (see `more <https://en.wikipedia.org/wiki/Interquartile_range>`__) * ``'mad'`` - Mean absolute deviation (MAD), in nm (see `more <https://en.wikipedia.org/wiki/Average_absolute_deviation>`__) * ``'std'`` - Standard deviation, in nm (see `more <https://en.wikipedia.org/wiki/Standard_deviation>`__) * ``'var'`` or ``'moment2'`` - Variance, in nm² (see `more <https://en.wikipedia.org/wiki/Variance>`__) * ``'entropy'`` - Shannon entropy, in nat (see `more <https://en.wikipedia.org/wiki/Entropy_(information_theory)>`__) Shape parameters * ``'modality'`` - Modality (number of modes) * ``'skewness'`` or ``'moment3'`` - Skewness (see `more <https://en.wikipedia.org/wiki/Skewness>`__) * ``'kurtosis'`` - Excess kurtosis (see `more <https://en.wikipedia.org/wiki/Kurtosis>`__) * ``'moment4'`` - 4th moment (kurtosis) (see `more <https://en.wikipedia.org/wiki/Kurtosis>`__) uq : dict of :ref:`UQResult` Dictionary of the parameters moment-based uncertainty quantification. See above for the dictionary keys. Only calculated if ``Puq`` is provided. Notes ----- For the ``'mode'``, ``'modes'`` and ``'modality'`` parameters, moment-based uncertainties are not available. Uncertainties can, however, be calculated via bootsrapping of these quantities, e.g. :: def analyze_rmode(V): fit = dl.fitmodel(V,t,r) rmode = dl.diststats(fit.P,r)[0]['mode'] return rmode # Bootstrap analysis of distance mode rmode_uq = dl.bootstrap_analysis(V,Vfit,analyze_rmode) """P,r=np.atleast_1d(P,r)# Check to avoid non-sensical results with syntax diststats(P,r)ifnotnp.all(np.diff(r)>0):raiseKeyError('The distance axis (1st argument) must be a monotnously increasing vector.')ifthresholdisNone:threshold=np.max(P)/10# Auxiliary functions# -------------------int=np.trapz(P,r)ifnotnp.all(P==0)else1defnormalize(P):returnP/int# Percentile functiondefpctile(r,P,p):cdf=cumtrapz(normalize(P),r,initial=0)cdf,index=np.unique(cdf,return_index=True)rpctile=np.interp(p/100,cdf,r[index])returnrpctile# Expectation operator functiondefE(x,P,r):returnnp.trapz(x*normalize(P),r)# Location estimators# -------------------# 1st moment - Mean meanfcn=lambdaP:E(r,P,r)# Medianmedianfcn=lambdaP:pctile(r,P,50)# Interquartile meandefiqmfcn(P):IQrange=(r>pctile(r,P,25))&(r<pctile(r,P,75))returnE(r[IQrange],P[IQrange]/np.trapz(normalize(P)[IQrange],r[IQrange]),r[IQrange])# Modemodefcn=lambdaP:r[np.argmax(P)]# Modesmodesfcn=lambdaP:r[find_peaks(P,height=threshold)[0]]# Spread estimators# -----------------# Interquartile rangeiqrfcn=lambdaP:pctile(r,P,75)-pctile(r,P,25)# Mean absolute deviationmadfcn=lambdaP:E(abs(r-meanfcn(P)),P,r)# 2nd moment - Variancevariancefcn=lambdaP:E((r-meanfcn(P))**2,P,r)# 2nd moment - Standard deviationstdfcn=lambdaP:np.sqrt(variancefcn(P))# Entropy (information theory)entropyfcn=lambdaP:-E(np.log(np.maximum(np.finfo(float).eps,normalize(P))),P,r)# Shape estimators# ----------------# Modalitymodalityfcn=lambdaP:np.size(modesfcn(P))# 3rd moment - Skewnessskewnessfcn=lambdaP:E(((r-meanfcn(P))/stdfcn(P))**3,P,r)# 4th moment - Kurtosiskurtosisfcn=lambdaP:E(((r-meanfcn(P))/stdfcn(P))**4,P,r)# Excess kurtosis exkurtosisfcn=lambdaP:3-E(((r-meanfcn(P))/stdfcn(P))**4,P,r)# 0th moment - Integral intfcn=lambdaP:np.trapz(P,r)# Calculate distribution estimatorsestimators={'rmin':min(r),'rmax':max(r),'int':intfcn(P),'mean':meanfcn(P),'median':medianfcn(P),'mode':modefcn(P),'modes':modesfcn(P),'iqm':iqmfcn(P),'mad':madfcn(P),'var':variancefcn(P),'std':stdfcn(P),'iqr':iqrfcn(P),'entropy':entropyfcn(P),'modality':modalityfcn(P),'skewness':skewnessfcn(P),'kurtosis':exkurtosisfcn(P),'moment1':meanfcn(P),'moment2':variancefcn(P),'moment3':skewnessfcn(P),'moment4':kurtosisfcn(P),}# Calculate distribution estimator uncertainties if uncertainty of P is givenuq=NoneifPuqisnotNone:uq={'rmin':None,'rmax':None,'int':None,'mean':_propagation(Puq,meanfcn),'median':_propagation(Puq,medianfcn),'mode':None,'modes':None,'iqm':_propagation(Puq,iqmfcn),'mad':_propagation(Puq,madfcn),'var':_propagation(Puq,variancefcn),'std':_propagation(Puq,stdfcn),'iqr':_propagation(Puq,iqrfcn),'entropy':_propagation(Puq,entropyfcn),'modality':None,'skewness':_propagation(Puq,skewnessfcn),'kurtosis':_propagation(Puq,exkurtosisfcn),'moment1':_propagation(Puq,meanfcn),'moment2':_propagation(Puq,variancefcn),'moment3':_propagation(Puq,skewnessfcn),'moment4':_propagation(Puq,kurtosisfcn),}# Print if requestedifverbose:_print_estimators(r,estimators,uq)returnestimators,uq
def_propagation(Puq,fcn):# Propagate the uncertainty to the functionwithwarnings.catch_warnings():# Ignore warnings when evaluating Jacobians outside of reasonable boundswarnings.simplefilter('ignore')uq_=Puq.propagate(fcn)uq=copy.deepcopy(uq_)defci(p):paramci=np.atleast_2d(uq_.ci(p))return[paramci[:,0][0],paramci[:,1][0]]# Wrap the ci() method to simplify arrayuq.ci=cireturnuqdef_print_estimators(r,estimators,uq):# Print summary of estimators if requestedifuqisNone:print('-------------------------------------------------')print('Distribution Statistics')print('-------------------------------------------------')print(f'Range {min(r):.2f}-{max(r):.2f} nm')print(f'Integral {estimators["int"]:.2f}')print('-------------------------------------------------')print('Location')print('-------------------------------------------------')print(f'Mean {estimators["mean"]:.2f} nm')print(f'Median {estimators["median"]:.2f} nm')print(f'Interquartile mean {estimators["iqm"]:.2f} nm')print(f'Mode {estimators["mode"]:.2f} nm')print('-------------------------------------------------')print('Spread')print('-------------------------------------------------')print(f'Standard deviation {estimators["std"]:.2f} nm')print(f'Mean absolute deviation {estimators["mad"]:.2f} nm')print(f'Interquartile range {estimators["iqr"]:.2f} nm')print(f'Variance {estimators["var"]:.2f} nm²')print('-------------------------------------------------')print('Shape')print('-------------------------------------------------')print(f'Modality {estimators["modality"]} ')print(f'Skewness {estimators["skewness"]:.2f} ')print(f'Excess kurtosis {estimators["kurtosis"]:.2f} ')print('-------------------------------------------------')else:print('-------------------------------------------------')print('Distribution Statistics')print('-------------------------------------------------')print(f'Range {min(r):.2f}-{max(r):.2f} nm')print(f'Integral {estimators["int"]:.2f}')print('-------------------------------------------------')print('Location')print('-------------------------------------------------')print(f'Range {min(r):.2f}-{max(r):.2f} nm')print(f'Mean {estimators["mean"]:.2f} ({uq["mean"].ci(95)[0]:.2f},{uq["mean"].ci(95)[1]:.2f}) nm')print(f'Median {estimators["median"]:.2f} ({uq["median"].ci(95)[0]:.2f},{uq["median"].ci(95)[1]:.2f}) nm')print(f'Interquartile mean {estimators["iqm"]:.2f} ({uq["iqm"].ci(95)[0]:.2f},{uq["iqm"].ci(95)[1]:.2f}) nm')print(f'Mode {estimators["mode"]:.2f} nm')print('-------------------------------------------------')print('Spread')print('-------------------------------------------------')print(f'Standard deviation {estimators["std"]:.2f} ({uq["std"].ci(95)[0]:.2f},{uq["std"].ci(95)[1]:.2f}) nm')print(f'Mean absolute deviation {estimators["mad"]:.2f} ({uq["mad"].ci(95)[0]:.2f},{uq["mad"].ci(95)[1]:.2f}) nm')print(f'Interquartile range {estimators["iqr"]:.2f} ({uq["iqr"].ci(95)[0]:.2f},{uq["iqr"].ci(95)[1]:.2f}) nm')print(f'Variance {estimators["var"]:.2f} ({uq["var"].ci(95)[0]:.2f},{uq["var"].ci(95)[1]:.2f}) nm²')print('-------------------------------------------------')print('Shape')print('-------------------------------------------------')print(f'Modality {estimators["modality"]}')print(f'Skewness {estimators["skewness"]:.2f} ({uq["skewness"].ci(95)[0]:.2f},{uq["skewness"].ci(95)[1]:.2f}) ')print(f'Kurtosis {estimators["kurtosis"]:.2f} ({uq["kurtosis"].ci(95)[0]:.2f},{uq["kurtosis"].ci(95)[1]:.2f}) ')print('-------------------------------------------------')