Source code for UQpy.scientific_machine_learning.losses.GeometricJensenShannonDivergence

import torch
import torch.nn as nn
import UQpy.scientific_machine_learning.functional as func
from UQpy.scientific_machine_learning.baseclass import NormalBayesianLayer, Loss

from typing import Annotated
from beartype import beartype
from beartype.vale import Is


[docs]@beartype class GeometricJensenShannonDivergence(Loss): def __init__( self, alpha: Annotated[float, Is[lambda x: 0 <= x <= 1]] = 0.5, reduction: str = "sum", device=None, ): r"""Analytic form for Geometric JS divergence for all Bayesian layers in a module :param alpha: Weight of the mixture distribution, :math:`0 \leq \alpha \leq 1`. See formula for details. Default: 0.5 :param reduction: Specifies the reduction to apply to the output: 'mean' or 'sum'. 'mean': the output will be averaged, 'sum': the output will be summed. Default: 'sum' The Geometric Jensen-Shannon divergence :math:`D_{JSG}` is computed as .. math:: D_{JSG}(P, Q) = (1-\alpha) D_{KL}(P, M) + \alpha D_{KL}(Q, M) where :math:`D_{KL}` is the Kullback-Leibler divergence and :math:`M=P^\alpha Q^{(1-\alpha)}` is the geometric mean distribution. When the distributions :math:`P` and :math:`Q` are Gaussian, the closed form for Geometric Jensen-Shannon divergence is given as .. math:: D_{JSG}(P, Q) = \frac12 \left( \frac{(1-\alpha)\sigma_0^2 + \alpha\sigma_1^2}{\sigma_\alpha^2} + \log \frac{\sigma_\alpha^2}{\sigma_0^{2(1-\alpha)} \sigma_1^{2\alpha}} + (1-\alpha) \frac{(\mu_\alpha - \mu_0)^2}{\sigma_\alpha^2} + \frac{\alpha(\mu_\alpha - \mu_1)^2}{\sigma_\alpha^2} -1 \right) where :math:`\sigma_\alpha^2 = \left( \frac{\alpha}{\sigma_0^2}+\frac{1-\alpha}{\sigma_1^2} \right)^{-1}` and :math:`\mu_\alpha = \sigma_\alpha^2 \left[\frac{\alpha \mu_0}{\sigma_0^2} + \frac{(1-\alpha)\mu_1}{\sigma_1^2}\right]` Examples: >>> # Divergence of a single Bayesian Layer >>> layer = sml.BayesianLinear(4, 5) >>> divergence_function = sml.GeometricJensenShannonDivergence() >>> div = divergence_function(layer) >>> # Divergence of a Bayesian neural network >>> network = nn.Sequential( >>> sml.BayesianLinear(1, 4), >>> nn.ReLU(), >>> nn.Linear(4, 4), >>> nn.ReLU(), >>> sml.BayesianLinear(4, 1), >>> ) >>> model = sml.FeedForwardNeuralNetwork(network) >>> divergence_function = sml.GeometricJensenShannonDivergence() >>> div = divergence_function(model) """ super().__init__() self.alpha = alpha if reduction == "none": raise ValueError( "UQpy: GeometricJensenShannonDivergence does not accept reduction='none'. " "Must be 'sum' or 'mean'." ) self.reduction = reduction self.device = device
[docs] def forward(self, network: nn.Module) -> torch.Tensor: """Compute the Geometric JS divergence on all Bayesian layers in a module :param network: Module containing Bayesian layers as class attributes :return: Geometric JS divergence between prior and posterior distributions """ divergence = torch.tensor(0.0, device=self.device) for layer in network.modules(): if isinstance(layer, NormalBayesianLayer): for name in layer.parameter_shapes: if layer.parameter_shapes[name] is not None: mu = getattr(layer, f"{name}_mu") rho = getattr(layer, f"{name}_rho") divergence += func.geometric_jensen_shannon_divergence( mu, torch.log1p(torch.exp(rho)), torch.tensor(layer.prior_mu, device=mu.device), torch.tensor(layer.prior_sigma, device=mu.device), alpha=self.alpha, reduction=self.reduction, ) return divergence
def extra_repr(self) -> str: s = [] if self.alpha != 0.5: s.append(f"alpha={self.alpha}") if self.reduction != "sum": s.append(f"reduction={self.reduction}") return ", ".join(s)