Source code for UQpy.surrogates.polynomial_chaos.regressions.LeastAngleRegression

import logging
import numpy as np
from beartype import beartype
import copy

from UQpy.surrogates.polynomial_chaos.PolynomialChaosExpansion import PolynomialChaosExpansion
from UQpy.surrogates.polynomial_chaos.polynomials.TotalDegreeBasis import PolynomialBasis
from UQpy.surrogates.polynomial_chaos.regressions.baseclass.Regression import Regression
from UQpy.surrogates.polynomial_chaos.regressions import LeastSquareRegression

from sklearn import linear_model as regresion


[docs]class LeastAngleRegression(Regression): @beartype def __init__(self, fit_intercept: bool = False, verbose: bool = False, n_nonzero_coefs: int = 1000, normalize: bool = False): """ Class to select the best model approximation and calculate the polynomial_chaos coefficients with the Least Angle Regression method combined with ordinary least squares. :param n_nonzero_coefs: Maximum number of non-zero coefficients. :param fit_intercept: Whether to calculate the intercept for this model. Recommended false for PCE, since intercept is included in basis functions. :param verbose: Sets the verbosity amount. """ self.fit_intercept = fit_intercept self.n_nonzero_coefs = n_nonzero_coefs self.normalize = normalize self.verbose = verbose self.logger = logging.getLogger(__name__)
[docs] def run(self, x: np.ndarray, y: np.ndarray, design_matrix: np.ndarray): """ Implements the LAR method to compute the polynomial_chaos coefficients. Recommended only for model_selection algorithm. :param x: :class:`numpy.ndarray` containing the training points (samples). :param y: :class:`numpy.ndarray` containing the model evaluations (labels) at the training points. :param design_matrix: matrix containing the evaluation of the polynomials at the input points **x**. :return: Beta (polynomial_chaos coefficients) """ polynomialbasis = design_matrix P = polynomialbasis.shape[1] n_samples, inputs_number = x.shape reg = regresion.Lars(fit_intercept=self.fit_intercept, verbose=self.verbose, n_nonzero_coefs=self.n_nonzero_coefs) reg.fit(design_matrix, y) # LarsBeta = reg.coef_path_ c_ = reg.coef_ self.Beta_path = reg.coef_path_ if c_.ndim == 1: c_ = c_.reshape(-1, 1) return c_, None, np.shape(c_)[1]
[docs] @staticmethod def model_selection(pce_object: PolynomialChaosExpansion, target_error=1, check_overfitting=True): """ LARS model selection algorithm for given TargetError of approximation measured by Cross validation: Leave-one-out error (1 is perfect approximation). Option to check overfitting by empirical rule: if three steps in a row have a decreasing accuracy, stop the algorithm. :param pce_object: existing target PCE for model_selection :param target_error: Target error of an approximation (stoping criterion). :param check_overfitting: Whether to check over-fitting by empirical rule. :return: copy of input PolynomialChaosExpansion containing the best possible model for given data identified by LARs """ pce = copy.deepcopy(pce_object) x = pce.experimental_design_input y = pce.experimental_design_output pce.regression_method = LeastAngleRegression() pce.fit(x, y) LarsBeta = pce.regression_method.Beta_path P, steps = LarsBeta.shape polynomialbasis = pce.design_matrix multindex = pce.multi_index_set pce.regression_method = LeastSquareRegression() larsbasis = [] OLSBetaList = [] larsindex = [] LarsError = [] error = 0 overfitting = False BestLarsError = 0 step = 0 if steps<3: raise Exception('LAR identified constant function! Check your data.') while BestLarsError < target_error and step < steps - 2 and overfitting == False: mask = LarsBeta[:, step + 2] != 0 mask[0] = True larsindex.append(multindex[mask, :]) larsbasis.append(list(np.array(pce_object.polynomial_basis.polynomials)[mask])) pce.polynomial_basis.polynomials_number = len(larsbasis[step]) pce.polynomial_basis.polynomials = larsbasis[step] pce.multi_index_set = larsindex[step] pce.fit(x, y) coefficients = pce.coefficients LarsError.append(float(1 - pce.leaveoneout_error())) error = LarsError[step] if step == 0: BestLarsMultindex = larsindex[step] BestLarsBasis = larsbasis[step] BestLarsError = LarsError[step] else: if error > BestLarsError: BestLarsMultindex = larsindex[step] BestLarsBasis = larsbasis[step] BestLarsError = LarsError[step] if (step > 3) and (check_overfitting == True): if (BestLarsError > 0.6) and (error < LarsError[step - 1]) and (error < LarsError[step - 2]) and ( error < LarsError[step - 3]): overfitting = True step += 1 pce.polynomial_basis.polynomials_number = len(BestLarsBasis) pce.polynomial_basis.polynomials = BestLarsBasis pce.multi_index_set = BestLarsMultindex pce.fit(x, y) return pce