Source code for deerlab.classes

# This file is a part of DeerLab. License is MIT (see LICENSE.md).
# Copyright(c) 2019-2023: Luis Fabregas, Stefan Stoll and other contributors.

import numpy as np
from deerlab.utils import Jacobian, nearest_psd
from scipy.stats import norm
from scipy.signal import fftconvolve
from scipy.linalg import block_diag
from scipy.optimize import brentq
from scipy.interpolate import interp1d


# =========================================================================


[docs]
class UQResult:
# =========================================================================
    r""" Represents the uncertainty quantification of fit results.

    This class provides methods for performing uncertainty analysis on fit results. 
    The uncertainty analysis can be performed using moment-based, bootstrapped,
    or likelihood profile methods. The type of uncertainty analysis is specified when initializing the object.

    Attributes
    ----------
    type : string
        Type of uncertainty analysis performed. Possible values are:

            * ``'moment'`` - Moment-based uncertainty analysis
            * ``'bootstrap'`` - Bootstrapped uncertainty analysis
            * ``'profile'`` - Likelihood profile uncertainty analysis
            * ``'void'`` - Empty uncertainty analysis
    
    mean : ndarray
        Mean values of the uncertainty distribution of the parameters.
   
    median : ndarray
        Median values of the uncertainty distribution of the parameters.
   
    std : ndarray
        Standard deviations of the uncertainty distribution of the parameters.
   
    covmat : ndarray
        Covariance matrix
   
    nparam : int scalar
        Number of parameters in the analysis.
   
    samples : ndarray 
        Bootstrap samples of the parameters. Only available for ``type='bootstrap'``.

    profile : ndarray 
        Likelihood profile of the parameters. Only available for ``type='profile'``.

    threshold : scalar
        Treshold value used for the profile method. Only available for ``type='profile'``.  

    

    """


[docs]
    def __init__(self,uqtype,data=None,covmat=None,lb=None,ub=None,threshold=None,profiles=None,noiselvl=None):
        r"""
        Initializes the UQResult object.

        Parameters
        ----------
        uqtype : str
            Type of uncertainty analysis to perform. Possible values are: ``'moment'``, ``'bootstrap'``, ``'profile'``, ``'void'``.
        
        data : ndarray
            Data to be used for the uncertainty analysis. The format of this parameter depends on the value of ``uqtype``:

            * If ``uqtype='moment'``, data should be an array of parameter estimates.
            * If ``uqtype='bootstrap'``, data should be a 2-dimensional array of bootstrap samples, where each row
              corresponds to a sample and each column corresponds to a parameter.
            * If ``uqtype='profile'``, data should be an array of parameter estimates.
            * If ``uqtype='void'``, data should not be provided.

        covmat : ndarray
            Covariance matrix of the parameter estimates. Only applicable if ``uqtype='moment'``.
        
        lb : ndarray
            Lower bounds of the parameter estimates. Only applicable if ``uqtype='moment'``.
        
        ub : ndarray
            Upper bounds of the parameter estimates. Only applicable if ``uqtype='moment'``.
        
        threshold : float
            Threshold value used for the likelihood profile method. Only applicable if ``uqtype='profile'``.
        
        profiles : list
            List of likelihood profiles for each parameter. Each element of the list should be a tuple of arrays
            ``(x, y)``, where ``x`` represents the values of the parameter and ``y`` represents the corresponding likelihoods.
            Only applicable if ``uqtype='profile'``.
        
        noiselvl : float
            Noise level used for the likelihood profile method. Only applicable if ``uqtype='profile'``.
        """
        #Parse inputs schemes
        if uqtype=='moment':
            # Scheme 1: UQResult('moment',parfit,covmat,lb,ub)
            self.type = uqtype
            parfit = data
            nParam = len(parfit)
            
        elif uqtype == 'profile':
            # Scheme 2: UQResult('profile',profiles)
            if not isinstance(profiles,list):
                profiles = [profiles]
            self.type = uqtype
            self.__parfit = data
            self.__noiselvl = noiselvl
            self.profile = profiles
            self.threshold = threshold
            nParam = len(np.atleast_1d(data))

        elif uqtype == 'bootstrap':
            # Scheme 2: UQResult('bootstrap',samples)
            self.type = uqtype
            samples = data
            self.samples = samples
            nParam = np.shape(samples)[1]            

        elif uqtype=='void':
            # Scheme 2: UQResult('void')
            self.type = uqtype
            self.mean, self.median, self.std, self.covmat, self.nparam = ([] for _ in range(5))
            return
        else:
            raise NameError('uqtype not found. Must be: ''moment'', ''bootstrap'' or ''void''.')

        if lb is None:
            lb = np.full(nParam, -np.inf)
        
        if ub is None:
            ub = np.full(nParam, np.inf)
            
        # Set private variables
        self.__lb = lb
        self.__ub = ub
        self.nparam = nParam

        # Create confidence intervals structure
        if uqtype=='moment':
            self.mean = parfit
            self.median = parfit
            self.std = np.sqrt(np.diag(covmat))
            self.covmat = covmat

        # Profile-based CI specific fields
        elif uqtype == 'profile':
            xs = [self.pardist(n)[0] for n in range(nParam)]
            pardists = [self.pardist(n)[1] for n in range(nParam)]
            means = [np.trapz(pardist*x,x) for x,pardist in zip(xs,pardists)]
            std = [np.sqrt(np.trapz(pardist*(x-mean)**2,x)) for x,pardist,mean in zip(xs,pardists,means)]
            self.mean = means
            self.median = self.percentile(50)
            self.std = std
            self.covmat = np.diag(np.array(std)**2)
                
        # Bootstrap-based CI specific fields
        elif uqtype == 'bootstrap':
            means = np.mean(samples,0)
            covmat = np.squeeze(samples).T@np.squeeze(samples)/np.shape(samples)[0] - means*means.T
            self.mean = means
            self.median = self.percentile(50)
            self.median = np.array([nth_samples[0] if np.all(nth_samples==nth_samples[0]) else self.median[n] for n,nth_samples in enumerate(samples.T)])
            self.std = np.squeeze(np.std(samples,0))
            self.covmat = covmat



    # Gets called when an attribute is accessed
    #--------------------------------------------------------------------------------
    def __getattribute__(self, attr):
        try:
            # Calling the super class to avoid recursion
            if (attr!='type' and not '__' in attr) and super(UQResult, self).__getattribute__('type') == 'void':
                # Check if the uncertainty quantification has been done, if not report that there is nothing in the object
                raise ValueError(f"The requested attribute/method ('{attr}') is not available. Uncertainty quantification has not been calculated during the fit.")
        except AttributeError:
            # Catch cases where 'type' attribute has still not been defined (e.g. when using copy.deepcopy)
            pass
        # Otherwise return requested attribute
        return super(UQResult, self).__getattribute__(attr)
    #--------------------------------------------------------------------------------


    # Combination of multiple uncertainties
    #--------------------------------------------------------------------------------

[docs]
    def join(self,*args):
        """
        Combine multiple uncertainty quantification instances.

        This method concatenates the parameter vectors of the object calling the method with the parameter vectors of
        any number of other uncertainty quantification objects.
        
        Parameters
        ----------
        uq : any number of :ref:`UQResult`
            Uncertainty quantification objects with ``N1,N2,...,Nn`` parameters to be joined 
            to the object calling the method with ``M`` parameters. 
        
        Returns
        -------
        uq_joined : :ref:`UQResult`
            Joined uncertainty quantification object with a total of ``M + N1 + N2 + ... + Nn`` parameters. 
            The parameter vectors are concatenated on the order they are passed. 

        Raises
        ------
        TypeError
            If any of the input objects is not a :ref:`UQResult` instance or if the types 
            of the input objects do not match.
        TypeError
            If an attempt is made to join an instance of :ref:`UQResult` with type ``'void'``.
        """
        newargs = []
        if self.type=='moment':
            # Original metadata
            newargs.append(self.mean)
            newargs.append(self.covmat)
            newargs.append(self.__lb)
            newargs.append(self.__ub)
        elif self.type=='bootstrap':
            newargs.append(self.samples)
            
        for uq in args:
            if not isinstance(uq, UQResult):
                raise TypeError('Only UQResult objects can be joined.')
            if uq.type!=self.type:
                raise TypeError(f'A UQResult of type ({uq.type}) and another of type ({self.type}) cannot be joined.')
            if uq.type=='void':
                raise TypeError('Void UQResults cannot be joined.')
            # Concatenate metadata of external UQResult objects
            if self.type=='moment':
                newargs[0] = np.concatenate([newargs[0], uq.mean])
                newargs[1] = block_diag(newargs[1], uq.covmat)
                newargs[2] = np.concatenate([newargs[2], uq.__lb])
                newargs[3] = np.concatenate([newargs[3], uq.__ub])
            elif self.type=='bootstrap':
                newargs[0] = np.concatenate([newargs[0],uq.samples],axis=1) 
                
        # Return new UQResult object with combined information    
        return UQResult(self.type,*newargs) 

    #--------------------------------------------------------------------------------


    # Parameter distributions
    #--------------------------------------------------------------------------------

[docs]
    def pardist(self,n=0):
        """
        Generate the uncertainty distribution of the n-th parameter

        This method generates the probability density function of the uncertainty 
        of the n-th parameter of the model. The distribution is evaluated at a 
        range of values where the parameter is most likely to be. The range is 
        determined based on the type of uncertainty quantification method used. 

        Parameters
        ----------
        n : int scalar
            Index of the parameter 
        
        Returns
        -------
        ax : ndarray
            Parameter values at which the distribution is evaluated
        pdf : ndarray
            Probability density function of the parameter uncertainty.
        """
        if n > self.nparam or n < 0:
            raise ValueError('The input must be a valid integer number.')
        isdelta = False

        if self.type == 'moment':
            # Generate Gaussian distribution based on covariance matrix
            sig = np.sqrt(self.covmat[n,n])
            xmean = self.mean[n]
            x = np.linspace(xmean-4*sig,xmean+4*sig,500)
            pdf = 1/sig/np.sqrt(2*np.pi)*np.exp(-((x-xmean)/sig)**2/2)
            
        if self.type == 'bootstrap':
            # Get bw using silverman's rule (1D only)
            samplen = self.samples[:, n].real

            # Take limited precision into account to avoid round-off errors
            samplen = np.round(samplen,200)

            if np.all(samplen == samplen[0]):
                # Dirac's delta distribution 
                x = np.array([0.9*samplen[0],samplen[0],1.1*samplen[0]])
                pdf = np.array([0,1,0]).astype(float)
                isdelta = True 
            else:
                sigma = np.std(samplen, ddof=1)
                bw = sigma*(len(samplen)*3/4.0)**(-1/5)

                # Make histogram
                maxbin = np.maximum(np.max(samplen),np.mean(samplen)+3*sigma)
                minbin = np.minimum(np.min(samplen),np.mean(samplen)-3*sigma)
                bins = np.linspace(minbin,maxbin, 2**10 + 1)
                count, edges = np.histogram(samplen, bins=bins)

                # Generate kernel
                delta = np.maximum(np.finfo(float).eps,(edges.max() - edges.min()) / (len(edges) - 1))
                kernel_x = np.arange(-4*bw, 4*bw + delta, delta)
                kernel = norm(0, bw).pdf(kernel_x)

                # Convolve
                pdf = fftconvolve(count, kernel, mode='same')
                
                # Set x coordinate of pdf to midpoint of bin
                x = edges[:-1] + delta

        if self.type=='profile':
            if not isinstance(self.profile,list) and n==0:
                profile = self.profile
            else:
                profile = self.profile[n] 
            σ = self.__noiselvl
            obj2likelihood = lambda f: 1/np.sqrt(σ*2*np.pi)*np.exp(-1/2*(f-np.min(f))/σ**2)
            profileinterp = interp1d(profile['x'], profile['y'], kind='slinear', fill_value=1e6,bounds_error=False)
            x = np.linspace(np.min(profile['x']), np.max(profile['x']), 2**10 + 1)
            pdf = obj2likelihood(profileinterp(x)) 

            # Generate kernel
            sigma = np.sum(x*pdf/np.sum(pdf))
            bw = sigma*(1e12*3/4.0)**(-1/5)
            delta = np.maximum(np.finfo(float).eps,(x.max() - x.min()) / (len(x) - 1))
            kernel_x = np.arange(-5*bw, 5*bw + delta, delta)
            kernel = norm(0, bw).pdf(kernel_x)

            # Convolve
            pdf = fftconvolve(pdf, kernel, mode='same')

        # Clip the distributions outside the boundaries
        pdf[x < self.__lb[n]] = 0
        pdf[x > self.__ub[n]] = 0

        # Enforce non-negativity (takes care of negative round-off errors)
        pdf = np.maximum(pdf,0)

        # Ensure normalization of the probability density function (if not a Dirac delta function)
        if not isdelta:
            if np.trapz(pdf, x)!=0:
                pdf = pdf/np.trapz(pdf, x)
        
        return x, pdf

    #--------------------------------------------------------------------------------


    # Parameter percentiles
    #--------------------------------------------------------------------------------

[docs]
    def percentile(self,p):
        """
        Compute the p-th percentiles of the parameters uncertainty distributions

        Parameters
        ----------
        p : float scalar
            Percentile (a value between 0-100)
        
        Returns
        -------
        prctiles : ndarray
            Percentile values of all parameters

        Raises
        ------
        ValueError
            If `p` is not a number between 0 and 100.
        """
        if p>100 or p<0:
            raise ValueError('The input must be a number between 0 and 100')
 
        x = np.zeros(self.nparam)
        for n in range(self.nparam):
            # Get parameter PDF
            values,pdf = self.pardist(n)
            # Compute corresponding CDF
            cdf = np.cumsum(pdf)
            cdf /= max(cdf)
            # Eliminate duplicates
            cdf, index = np.unique(cdf,return_index=True)
            # Interpolate requested percentile
            x[n] = np.interp(p/100,cdf,values[index])

        return x

    #--------------------------------------------------------------------------------


    #--------------------------------------------------------------------------------

[docs]
    def ci(self,coverage):
        """
        Compute the confidence intervals for the parameters.

        This method computes confidence intervals for the parameters of the fitted
        distribution, based on the method used to compute the parameter estimates
        (moment estimation, bootstrapping, or likelihood profile).

        Parameters
        ----------
        coverage : float scalar
            Coverage (confidence level) of the confidence intervals (a value between 0-100)
        
        Returns
        -------
        ci : ndarray
            A 2D array containing the lower and upper confidence intervals for the
            parameters. The first column of the array holds the lower confidence
            intervals, and the second column holds the upper confidence intervals.
            
                * ``ci[:,0]`` - Lower confidence intervals
                * ``ci[:,1]`` - Upper confidence intervals
            
            The array has shape ``(nparam, 2)``, where ``nparam`` is the number of fitted
            parameters.

        Raises
        ------
        ValueError
            If the ``coverage`` argument is outside the range 0-100.

        Examples
        --------
        Compute 95% confidence intervals for the fitted parameters: ::

            ci = param.ci(95)

        Notes
        -----
        The method used to compute the confidence intervals depends on the method
        used to compute the parameter estimates. If the parameter estimates were
        computed using moment estimation, the confidence intervals are computed
        based on the estimated standard errors of the parameters. If the parameter
        estimates were computed using bootstrapping, the confidence intervals are
        computed based on the empirical distribution of the bootstrapped samples.
        If the parameter estimates were computed using likelihood profile, the
        confidence intervals are computed by finding the parameter values at the
        edges of the likelihood profile that correspond to the specified coverage.
        """
        if coverage>100 or coverage<0:
            raise ValueError('The input must be a number between 0 and 100')
        value = self.mean if hasattr(self,'mean') else self.__parfit
        iscomplex = np.iscomplexobj(value)
        
        alpha = 1 - coverage/100
        p = 1 - alpha/2 # percentile
        
        confint = np.zeros((self.nparam,2))
        if iscomplex: confint = confint.astype(complex)

        if self.type=='moment':
            # Compute moment-based confidence intervals
            # Clip at specified box boundaries
            standardError = norm.ppf(p)*np.sqrt(np.diag(self.covmat))
            confint[:,0] = np.maximum(self.__lb, self.mean.real - standardError)
            confint[:,1] = np.minimum(self.__ub, self.mean.real + standardError)
            if iscomplex:
                confint[:,0] = confint[:,0] + 1j*np.maximum(self.__lb, self.mean.imag - standardError)
                confint[:,1] = confint[:,1] + 1j*np.minimum(self.__ub, self.mean.imag + standardError)

        elif self.type=='bootstrap':
            # Compute bootstrap-based confidence intervals
            # Clip possible artifacts from the percentile estimation
            confint[:,0] = np.minimum(self.percentile((1-p)*100), np.amax(self.samples))
            confint[:,1] = np.maximum(self.percentile(p*100), np.amin(self.samples))

        elif self.type=='profile':
            # Compute likelihood-profile-based confidence intervals
            for n,profile in enumerate(self.profile):
                
                # Construct interpolator for the profile
                profileinterp = interp1d(profile['x'], profile['y'], kind='slinear', fill_value=1e6,bounds_error=False)

                #-----------------------------------------------------------------
                def getCIbound(boundary,optimum):
                    def getprofile_at(value):
                        return profileinterp(value) - self.threshold(coverage/100)
                    # Evaluate the profile function
                    fbound = getprofile_at(boundary)
                    f0 = getprofile_at(optimum)
                    
                    # Check the signs of the shifted profile
                    if np.sign(fbound)==np.sign(f0):
                        # If both edges have the same sign return one of the edges
                        ci_bound = boundary
                    else:
                        searchrange = [boundary,optimum] if boundary<optimum else [optimum,boundary]
                        ci_bound = brentq(getprofile_at, *searchrange,maxiter=int(1e4))
                    return ci_bound
                #-----------------------------------------------------------------
                # Get the upper and lower bounds of the confidence interval
                confint[n,0] = getCIbound(profile['x'].min(),self.__parfit[n])
                confint[n,1] = getCIbound(profile['x'].max(),self.__parfit[n])

        # Remove singleton dimensions
        confint = np.squeeze(confint)
        return confint




    #--------------------------------------------------------------------------------

[docs]
    def propagate(self,model,lb=None,ub=None,samples=None):
        """
        Uncertainty propagation.
        
        The method performs uncertainty propagation on the model by applying the
        uncertainty analysis of the input parameters to the model. The output is 
        a new uncertainty quantification analysis for the model outputs. The method 
        returns a new ``UQResult`` object containing the uncertainty information
        of the model outputs. If the input type is ``'moment'``, the returned object
        will be of type ``'moment'``. Otherwise, if the type is ``'bootstrap'``, the
        returned object will be of type ``'bootstrap'``.
        
        Parameters
        ----------
        model : callable
            A callable function that takes an array of parameters as input and returns a model output.
        lbm : ndarray
            Lower bounds of the values returned by ``model``, by default assumed unconstrained.
        ubm : ndarray
            Upper bounds of the values returned by ``model``, by default assumed unconstrained.
        samples : int, optional
            Number of samples to use when propagating uncertainty. If not provided, default value is 1000.
        
        Returns
        -------
        modeluq : :ref:`UQResult`
            New uncertainty quantification analysis for the outputs of ``model``.
        """
        parfit = self.mean
        # Evaluate model with fit parameters
        modelfit = model(parfit)
        iscomplex = np.iscomplexobj(modelfit)

        # Validate input boundaries
        if lb is None:
            lb = np.full(np.size(modelfit), -np.inf)

        if ub is None:
            ub = np.full(np.size(modelfit), np.inf)

        lb,ub = (np.atleast_1d(var) for var in [lb,ub])

        if np.size(modelfit)!=np.size(lb) or np.size(modelfit)!=np.size(ub):
            raise IndexError ('The 2nd and 3rd input arguments must have the same number of elements as the model output.')

        if samples is None:
            Nsamples = 1000
        else:
            Nsamples = samples

        if self.type=='moment':

            if iscomplex:
                model_ = model 
                model = lambda p: np.concatenate([model_(p).real,model_(p).imag])

            # Get jacobian of model to be propagated with respect to parameters
            J = Jacobian(model,parfit,self.__lb,self.__ub)

            # Clip at boundaries
            modelfit = np.maximum(modelfit,lb)
            modelfit = np.minimum(modelfit,ub)

            # Error progation
            modelcovmat = nearest_psd(J@self.covmat@J.T)

            if iscomplex:
                N = modelcovmat.shape[0]
                Nreal = np.arange(0,N/2).astype(int)
                Nimag = np.arange(N/2,N).astype(int)
                modelcovmat = modelcovmat[np.ix_(Nreal,Nreal)] + 1j* modelcovmat[np.ix_(Nimag,Nimag)]

            # Construct new uncertainty object
            return  UQResult('moment',modelfit,modelcovmat,lb,ub)

        elif self.type=='bootstrap':

            sampled_parameters = [[] for _ in range(self.nparam)]
            for n in range(self.nparam):
                # Get the parameter uncertainty distribution
                values,pdf = self.pardist(n)
                # Random sampling form the uncertainty distribution
                sampled_parameters[n] =  [np.random.choice(values, p=pdf/sum(pdf)) for _ in range(Nsamples)]
            # Convert to matrix
            sampled_parameters = np.atleast_2d(sampled_parameters)

            # Bootstrap sampling of the model response
            sampled_model = [model(sampled_parameters[:,n]) for n in range(Nsamples)]
            sampled_model = [np.maximum(sample,lb) for sample in sampled_model]
            sampled_model = [np.minimum(sample,ub) for sample in sampled_model]

            # Convert to matrix
            sampled_model = np.atleast_2d(sampled_model)
            
            # Construct new uncertainty object
            return  UQResult('bootstrap',data=sampled_model,lb=lb,ub=ub)


 
    #--------------------------------------------------------------------------------

# =========================================================================