Source code for UQpy.inference.InformationModelSelection

import logging
from typing import Union

from UQpy.inference.information_criteria import AIC
from UQpy.inference.information_criteria.baseclass.InformationCriterion import InformationCriterion
from beartype import beartype
from UQpy.inference.MLE import MLE
import numpy as np


[docs]class InformationModelSelection: # Authors: Audrey Olivier, Dimitris Giovanis # Last Modified: 12/19 by Audrey Olivier @beartype def __init__( self, parameter_estimators: list[MLE], criterion: InformationCriterion = AIC(), n_optimizations: list[int] = None, initial_parameters: list[np.ndarray] = None ): """ Perform model selection using information theoretic criteria. Supported criteria are :class:`.BIC`, :class:`.AIC` (default), :class:`.AICc`. This class leverages the :class:`.MLE` class for maximum likelihood estimation, thus inputs to :class:`.MLE` can also be provided to :class:`InformationModelSelection`, as lists of length equal to the number of models. :param parameter_estimators: A list containing a maximum-likelihood estimator (:class:`.MLE`) for each one of the models to be compared. :param criterion: Criterion to be used (:class:`.AIC`, :class:`.BIC`, :class:`.AICc)`. Default is :class:`.AIC` :param initial_parameters: Initial guess(es) for optimization, :class:`numpy.ndarray` of shape :code:`(nstarts, n_parameters)` or :code:`(n_parameters, )`, where :code:`nstarts` is the number of times the optimizer will be called. Alternatively, the user can provide input `n_optimizations` to randomly sample initial guess(es). The identified MLE is the one that yields the maximum log likelihood over all calls of the optimizer. """ self.candidate_models = [mle.inference_model for mle in parameter_estimators] self.models_number = len(parameter_estimators) self.criterion: InformationCriterion = criterion self.logger = logging.getLogger(__name__) self.n_optimizations = n_optimizations self.initial_parameters= initial_parameters self.parameter_estimators: list = parameter_estimators """:class:`.MLE` results for each model (contains e.g. fitted parameters)""" # Initialize the outputs self.criterion_values: list = [None, ] * self.models_number """Value of the criterion for all models.""" self.penalty_terms: list = [None, ] * self.models_number """Value of the penalty term for all models. Data fit term is then criterion_value - penalty_term.""" self.probabilities: list = [None, ] * self.models_number """Value of the model probabilities, computed as .. math:: P(M_i|d) = \dfrac{\exp(-\Delta_i/2)}{\sum_i \exp(-\Delta_i/2)} where :math:`\Delta_i = criterion_i - min_i(criterion)`""" # Run the model selection procedure if (self.n_optimizations is not None) or (self.initial_parameters is not None): self.run(self.n_optimizations, self.initial_parameters)
[docs] def run(self, n_optimizations: list[int], initial_parameters: list[np.ndarray]=None): """ Run the model selection procedure, i.e. compute criterion value for all models. This function calls the :meth:`run` method of the :class:`.MLE` object for each model to compute the maximum log-likelihood, then computes the criterion value and probability for each model. If `data` are given when creating the :class:`.MLE` object, this method is called automatically when the object is created. :param n_optimizations: Number of iterations that the optimization is run, starting at random initial guesses. It is only used if `initial_parameters` is not provided. Default is :math:`1`. The random initial guesses are sampled uniformly between :math:`0` and :math:`1`, or uniformly between user-defined bounds if an input bounds is provided as a keyword argument to the `optimizer` input parameter. :param initial_parameters: Initial guess(es) for optimization, :class:`numpy.ndarray` of shape :code:`(nstarts, n_parameters)` or :code:`(n_parameters, )`, where :code:`nstarts` is the number of times the optimizer will be called. Alternatively, the user can provide input `n_optimizations` to randomly sample initial guess(es). The identified MLE is the one that yields the maximum log likelihood over all calls of the optimizer. """ if (n_optimizations is not None and (len(n_optimizations) != len(self.parameter_estimators))) or \ (initial_parameters is not None and len(initial_parameters) != len(self.parameter_estimators)): raise ValueError("The length of n_optimizations and initial_parameters should be equal to the number of " "parameter estimators") # Loop over all the models for i, parameter_estimator in enumerate(self.parameter_estimators): # First evaluate ML estimate for all models, do several iterations if demanded parameters = None if initial_parameters is not None: parameters = initial_parameters[i] optimizations = 0 if n_optimizations is not None: optimizations = n_optimizations[i] parameter_estimator.run(n_optimizations=optimizations, initial_parameters=parameters) # Then minimize the criterion self.criterion_values[i], self.penalty_terms[i] = \ self.criterion.minimize_criterion(data=parameter_estimator.data, parameter_estimator=parameter_estimator, return_penalty=True) # Compute probabilities from criterion values self.probabilities = self._compute_probabilities(self.criterion_values)
[docs] def sort_models(self): """ Sort models in descending order of model probability (increasing order of `criterion` value). This function sorts - in place - the attribute lists :py:attr:`.candidate_models`, :py:attr:`.ml_estimators`, :py:attr:`criterion_values`, :py:attr:`penalty_terms` and :py:attr:`probabilities` so that they are sorted from most probable to least probable model. It is a stand-alone function that is provided to help the user to easily visualize which model is the best. No inputs/outputs. """ sort_idx = list(np.argsort(np.array(self.criterion_values))) self.candidate_models = [self.candidate_models[i] for i in sort_idx] self.parameter_estimators = [self.parameter_estimators[i] for i in sort_idx] self.criterion_values = [self.criterion_values[i] for i in sort_idx] self.penalty_terms = [self.penalty_terms[i] for i in sort_idx] self.probabilities = [self.probabilities[i] for i in sort_idx]
@staticmethod def _compute_probabilities(criterion_values): delta = np.array(criterion_values) - min(criterion_values) prob = np.exp(-delta / 2) return prob / np.sum(prob)