Note
Go to the end to download the full example code or to run this example in your browser via Binder
Sequential Tempering for Bayesian Inference and Reliability analyses
The general framework: one wants to sample from a distribution of the form
where \(q_1 \left( x \right)\) and \(p_0 \left( x \right)\) can be evaluated; and potentially estimate the constant \(Z_1 = \int q_1 \left( x \right)p_0 \left( x \right) dx\).
Sequential tempering introduces a sequence of intermediate distributions:
for values of \(\beta_j\) in \([0, 1]\). The algorithm starts with \(\beta_0 = 0\), which samples from the reference distribution \(p_0\), and ends for some \(j = m\) such that \(\beta_m = 1\), sampling from the target. First, a set of sample points is generated from \(p_0 = p_{\beta_0}\), and then these are resampled according to some weights \(w_0\) such that after resampling the points follow \(p_{\beta_1}\). This procedure of resampling is carried out at each intermediate level \(j\) - resampling the points distributed as \(p_{\beta_{j}}\) according to weights \(w_{j}\) such that after resampling, the points are distributed according to \(p_{\beta_{j+1}}\). As the points are sequentially resampled to follow each intermediate distribution, eventually they are resampled from \(p_{\beta_{m-1}}\) to follow \(p_{\beta_{m}} = p_1\).
The weights are calculated as
The normalizing constant is calculated during the generation of samples, as
where \(N_j\) is the number of sample points generated from the intermediate distribution \(p_{\beta_j}\).
Bayesian Inference
In the Bayesian setting, \(p_0\) is the prior, and \(q \left( x, \beta_j \right) = \mathcal{L}\left( data, x \right) ^{\beta_j}\)
from UQpy.run_model import RunModel, PythonModel
import numpy as np
from UQpy.distributions import Uniform, Normal, JointIndependent, MultivariateNormal
from UQpy.sampling import SequentialTemperingMCMC
import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal, norm, uniform
from UQpy.sampling.mcmc import *
def likelihood(x, b):
mu1 = np.array([1., 1.])
mu2 = -0.8 * np.ones(2)
w1 = 0.5
# Width of 0.1 in each dimension
sigma1 = np.diag([0.02, 0.05])
sigma2 = np.diag([0.05, 0.02])
# Posterior is a mixture of two gaussians
like = np.exp(np.logaddexp(np.log(w1) + multivariate_normal.logpdf(x=x, mean=mu1, cov=sigma1),
np.log(1. - w1) + multivariate_normal.logpdf(x=x, mean=mu2, cov=sigma2)))
return like ** b
prior = JointIndependent(marginals=[Uniform(loc=-2.0, scale=4.0), Uniform(loc=-2.0, scale=4.0)])
# estimate evidence
def estimate_evidence_from_prior_samples(size):
samples = -2. + 4 * np.random.uniform(size=size * 2).reshape((size, 2))
return np.mean(likelihood(samples, 1.0))
def func_integration(x1, x2):
x = np.array([x1, x2]).reshape((1, 2))
return likelihood(x, 1.0) * (1. / 4) ** 2
def estimate_evidence_from_quadrature():
from scipy.integrate import dblquad
ev = dblquad(func=func_integration, a=-2, b=2, gfun=lambda x: -2, hfun=lambda x: 2)
return ev
x = np.arange(-2, 2, 0.02)
y = np.arange(-2, 2, 0.02)
xx, yy = np.meshgrid(x, y)
z = likelihood(np.concatenate([xx.reshape((-1, 1)), yy.reshape((-1, 1))], axis=-1), 1.0)
h = plt.contourf(x, y, z.reshape(xx.shape))
plt.title('Likelihood')
plt.axis('equal')
plt.show()
# for nMC in [50000, 100000, 500000, 1000000]:
# print('Evidence = {}'.format(estimate_evidence_from_prior_samples(nMC)))
print('Evidence computed analytically = {}'.format(estimate_evidence_from_quadrature()[0]))
sampler = MetropolisHastings(dimension=2, n_chains=20)
test = SequentialTemperingMCMC(pdf_intermediate=likelihood,
distribution_reference=prior,
save_intermediate_samples=True,
percentage_resampling=10,
sampler=sampler,
nsamples=4000)
print('Normalizing Constant = ' + str(test.evidence))
print('Tempering Parameters = ' + str(test.tempering_parameters))
plt.figure()
plt.scatter(test.intermediate_samples[0][:, 0], test.intermediate_samples[0][:, 1])
plt.title(r'$\beta = $' + str(test.tempering_parameters[0]))
plt.show()
plt.figure()
plt.scatter(test.intermediate_samples[2][:, 0], test.intermediate_samples[2][:, 1])
plt.title(r'$\beta = $' + str(test.tempering_parameters[2]))
plt.show()
plt.figure()
plt.scatter(test.samples[:, 0], test.samples[:, 1])
plt.title(r'$\beta = $' + str(test.tempering_parameters[-1]))
plt.show()
Reliability
In the reliability context, \(p_0\) is the pdf of the parameters, and
where \(G \left( x \right)\) is the performance function, negative if the system fails, and \(I_{\beta_j} \left( x \right)\) are smoothed versions of the indicator function.
from scipy.stats import norm
def indic_sigmoid(y, beta):
return 1. / (1. + np.exp(y / (1. / beta - 1.)))
fig, ax = plt.subplots(figsize=(4, 3.5))
ys = np.linspace(-5, 5, 100)
for i, s in enumerate(1. / np.array([1.01, 1.25, 2., 4., 70.])):
ax.plot(ys, indic_sigmoid(y=ys, beta=s), label=r'$\beta={:.2f}$'.format(s), color='blue', alpha=1. - i / 6)
ax.set_xlabel(r'$y=g(\theta)$', fontsize=13)
ax.set_ylabel(r'$q_{\beta}(\theta)=I_{\beta}(y)$', fontsize=13)
# ax.set_title(r'Smooth versions of the indicator function', fontsize=14)
ax.legend(fontsize=8.5)
plt.show()
beta = 2 # Specified Reliability Index
rho = 0.7 # Specified Correlation
dim = 2 # Dimension
# Define the correlation matrix
C = np.ones((dim, dim)) * rho
np.fill_diagonal(C, 1)
print(C)
# Print information related to the true probability of failure
e, v = np.linalg.eig(np.asarray(C))
beff = np.sqrt(np.max(e)) * beta
print(beff)
from scipy.stats import norm
pf_true = norm.cdf(-beta)
print('True pf={}'.format(pf_true))
def estimate_Pf_0(samples, model_values):
mask = model_values <= 0
return np.sum(mask) / len(mask)
model = RunModel(model=PythonModel(model_script='local_reliability_funcs.py', model_object_name="correlated_gaussian",
b_eff=beff, d=dim))
samples = MultivariateNormal(mean=np.zeros((2,)), cov=np.array([[1, 0.7], [0.7, 1]])).rvs(nsamples=20000)
model.run(samples=samples, append_samples=False)
model_values = np.array(model.qoi_list)
print('Prob. failure (MC) = {}'.format(estimate_Pf_0(samples, model_values)))
fig, ax = plt.subplots(figsize=(4, 3.5))
mask = np.squeeze(model_values <= 0)
ax.scatter(samples[mask, 0], samples[mask, 1], color='red', label='fail', alpha=0.5, marker='d')
ax.scatter(samples[~mask, 0], samples[~mask, 1], color='blue', label='safe', alpha=0.5)
plt.axis('equal')
# plt.title('Failure domain for reliability problem', fontsize=14)
plt.xlabel(r'$\theta_{1}$', fontsize=13)
plt.ylabel(r'$\theta_{2}$', fontsize=13)
ax.legend(fontsize=13)
fig.tight_layout()
plt.show()
def indic_sigmoid(y, b):
return 1.0 / (1.0 + np.exp((y * b) / (1.0 - b)))
def factor_param(x, b):
model.run(samples=x, append_samples=False)
G_values = np.array(model.qoi_list)
return np.squeeze(indic_sigmoid(G_values, b))
prior = MultivariateNormal(mean=np.zeros((2,)), cov=C)
sampler = MetropolisHastings(dimension=2, n_chains=20)
test = SequentialTemperingMCMC(pdf_intermediate=factor_param,
distribution_reference=prior,
save_intermediate_samples=True,
percentage_resampling=10,
random_state=960242069,
sampler=sampler,
nsamples=3000)
print('Estimated Probability of Failure = ' + str(test.evidence))
print('Tempering Parameters = ' + str(test.tempering_parameters))
plt.figure()
plt.scatter(test.intermediate_samples[0][:, 0], test.intermediate_samples[0][:, 1])
plt.title(r'$\beta = $' + str(test.tempering_parameters[0]))
plt.show()
plt.figure()
plt.scatter(test.intermediate_samples[2][:, 0], test.intermediate_samples[2][:, 1])
plt.title(r'$\beta = $' + str(test.tempering_parameters[2]))
plt.show()
plt.figure()
plt.scatter(test.samples[:, 0], test.samples[:, 1])
plt.title(r'$\beta = $' + str(test.tempering_parameters[-1]))
plt.show()
Total running time of the script: ( 0 minutes 0.000 seconds)