Skip to content
Snippets Groups Projects
Commit e53f933b authored by Guillaume Suatton's avatar Guillaume Suatton
Browse files

Update Variational_Bayesian_Model_Selection_for_Mixture_Distributions.ipynb

parent d17e8d85
Branches
No related tags found
No related merge requests found
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Reproducing the results of the paper ### Reproducing the results of the paper
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# %pip install -r requirements.txt
# Imports # Imports
import numpy as np import numpy as np
from numpy import linalg as la from numpy import linalg as la
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse from matplotlib.patches import Ellipse
from matplotlib.colors import LogNorm from matplotlib.colors import LogNorm
from scipy.special import digamma, gammaln, logsumexp from scipy.special import digamma, gammaln, logsumexp
from scipy.stats import multivariate_normal from scipy.stats import multivariate_normal
def fit_predict(X,nb_components=10,max_iter=10,seed=0): def fit_predict(X,nb_components=10,max_iter=10,seed=0):
# Sizes # Sizes
n, d = X.shape n, d = X.shape
# Initialization # Initialization
rd = np.random.RandomState(seed) rd = np.random.RandomState(seed)
resp = rd.rand(n, nb_components)/resp.sum(axis=1)[:, np.newaxis] resp = rd.rand(n, nb_components)/resp.sum(axis=1)[:, np.newaxis]
nu0,invW0,beta0 = d,np.atleast_2d(np.cov(X.T)),1. nu0,invW0,beta0 = d,np.atleast_2d(np.cov(X.T)),1.
weights = np.ones(nb_components) / nb_components weights = np.ones(nb_components) / nb_components
expectation = [np.zeros((nb_components, d)),np.array([nu0 * np.linalg.inv(invW0) for _ in range(nb_components)]),np.zeros(nb_components),np.zeros((nb_components, d, d))] expectation = [np.zeros((nb_components, d)),np.array([nu0 * np.linalg.inv(invW0) for _ in range(nb_components)]),np.zeros(nb_components),np.zeros((nb_components, d, d))]
# First parameter update # First parameter update
eta = resp.sum(axis=0) + 10*np.finfo(resp.dtype).eps eta = resp.sum(axis=0) + 10*np.finfo(resp.dtype).eps
nu = nu0 + eta nu = nu0 + eta
S = beta0 * np.eye(d) + expectation[1] * eta[:,np.newaxis,np.newaxis] S = beta0 * np.eye(d) + expectation[1] * eta[:,np.newaxis,np.newaxis]
inverse_S = np.linalg.inv(S) inverse_S = np.linalg.inv(S)
inverse_W = np.zeros((nb_components, d, d)) inverse_W = np.zeros((nb_components, d, d))
m = np.zeros((nb_components, d)) m = np.zeros((nb_components, d))
for k in range(nb_components): for k in range(nb_components):
m[k] = inverse_S[k]@expectation[1][k]@(resp[:, k]@X) m[k] = inverse_S[k]@expectation[1][k]@(resp[:, k]@X)
s = np.zeros((d, d)) s = np.zeros((d, d))
for i in range(n): for i in range(n):
s += resp[i, k] * (np.outer(X[i], X[i]) - np.outer(X[i], expectation[0][k]) - np.outer(expectation[0][k], X[i]) + expectation[3][k]) s += resp[i, k] * (np.outer(X[i], X[i]) - np.outer(X[i], expectation[0][k]) - np.outer(expectation[0][k], X[i]) + expectation[3][k])
inverse_W[k] = invW0 + s inverse_W[k] = invW0 + s
# Initializing the lower bound # Initializing the lower bound
lower_bound = np.zeros(max_iter) lower_bound = np.zeros(max_iter)
# EM procedure # EM procedure
for p in range(max_iter): for p in range(max_iter):
# Computing expectation values # Computing expectation values
expectation=[] expectation=[]
expectation.append(m.copy()) expectation.append(m.copy())
expectation.append(nu[:, np.newaxis, np.newaxis] * np.linalg.inv(inverse_W)) expectation.append(nu[:, np.newaxis, np.newaxis] * np.linalg.inv(inverse_W))
expectation.append(d*np.log(2) - np.log(np.linalg.det(inverse_W)) + np.sum(digamma(0.5*(nu - np.arange(d)[:,np.newaxis])))) expectation.append(d*np.log(2) - np.log(np.linalg.det(inverse_W)) + np.sum(digamma(0.5*(nu - np.arange(d)[:,np.newaxis]))))
inverse_S = np.linalg.inv(S) inverse_S = np.linalg.inv(S)
expectation.append([inverse_S[i] + np.outer(m[i],m[i]) for i in range(nb_components)]) expectation.append([inverse_S[i] + np.outer(m[i],m[i]) for i in range(nb_components)])
expectation.append([np.trace(inverse_S[i]) + m[i]@m[i] for i in range(nb_components)]) expectation.append([np.trace(inverse_S[i]) + m[i]@m[i] for i in range(nb_components)])
# Computing probabilities # Computing probabilities
log_rho_tilde = np.zeros((n, nb_components)) log_rho_tilde = np.zeros((n, nb_components))
for i in range(n): for i in range(n):
for j in range(nb_components): for j in range(nb_components):
log_rho_tilde[i, j] = 0.5*expectation[2][j]-0.5*np.trace(expectation[1][j]@(np.outer(X[i],X[i])-np.outer(X[i], expectation[0][j])-np.outer(expectation[0][j], X[i])+expectation[3][j])) log_rho_tilde[i, j] = 0.5*expectation[2][j]-0.5*np.trace(expectation[1][j]@(np.outer(X[i],X[i])-np.outer(X[i], expectation[0][j])-np.outer(expectation[0][j], X[i])+expectation[3][j]))
log_rho = log_rho_tilde + np.log(weights + 10 * np.finfo(weights.dtype).eps) log_rho = log_rho_tilde + np.log(weights + 10 * np.finfo(weights.dtype).eps)
log_resp = log_rho - logsumexp(log_rho, axis=1)[:, np.newaxis] log_resp = log_rho - logsumexp(log_rho, axis=1)[:, np.newaxis]
# Computing resp # Computing resp
resp = np.exp(log_resp) resp = np.exp(log_resp)
# Updating parameters # Updating parameters
eta = resp.sum(axis=0) + 10*np.finfo(resp.dtype).eps eta = resp.sum(axis=0) + 10*np.finfo(resp.dtype).eps
nu = nu0 + eta nu = nu0 + eta
S = beta0 * np.eye(d) + expectation[1] * eta[:,np.newaxis,np.newaxis] S = beta0 * np.eye(d) + expectation[1] * eta[:,np.newaxis,np.newaxis]
inverse_S = np.linalg.inv(S) inverse_S = np.linalg.inv(S)
inverse_W = np.zeros((nb_components, d, d)) inverse_W = np.zeros((nb_components, d, d))
m = np.zeros((nb_components, d)) m = np.zeros((nb_components, d))
for k in range(nb_components): for k in range(nb_components):
m[k] = inverse_S[k]@expectation[1][k]@(resp[:, k]@X) m[k] = inverse_S[k]@expectation[1][k]@(resp[:, k]@X)
s = np.zeros((d, d)) s = np.zeros((d, d))
for i in range(n): for i in range(n):
s += resp[i, k] * (np.outer(X[i], X[i]) - np.outer(X[i], expectation[0][k])- np.outer(expectation[0][k], X[i]) + expectation[3][k]) s += resp[i, k] * (np.outer(X[i], X[i]) - np.outer(X[i], expectation[0][k])- np.outer(expectation[0][k], X[i]) + expectation[3][k])
inverse_W[k] = invW0 + s inverse_W[k] = invW0 + s
# M-step # M-step
weights = resp.sum(axis=0) / resp.sum() weights = resp.sum(axis=0) / resp.sum()
# Computing the lower bound # Computing the lower bound
ln_p_x = np.sum(resp * (log_rho_tilde)) ln_p_x = np.sum(resp * (log_rho_tilde))
ln_p_z = np.sum(resp * np.log(weights)) ln_p_z = np.sum(resp * np.log(weights))
ln_p_mu = nb_components * d * np.log(0.5 * beta0 / np.pi) - 0.5 * beta0 * np.sum(expectation[4]) ln_p_mu = nb_components * d * np.log(0.5 * beta0 / np.pi) - 0.5 * beta0 * np.sum(expectation[4])
ln_p_T = nb_components * log_wishart_B(invW0, nu0)+ 0.5 * (nu0 - d - 1) * expectation[2].sum()- 0.5 * np.trace(invW0 * expectation[1].sum()) ln_p_T = nb_components * log_wishart_B(invW0, nu0)+ 0.5 * (nu0 - d - 1) * expectation[2].sum()- 0.5 * np.trace(invW0 * expectation[1].sum())
ln_q_z = np.sum(resp * np.log(resp)) ln_q_z = np.sum(resp * np.log(resp))
ln_q_mu = - 0.5 * nb_components * d * (1 + np.log(2 * np.pi)) + 0.5 * np.sum(np.log(np.linalg.det(S))) ln_q_mu = - 0.5 * nb_components * d * (1 + np.log(2 * np.pi)) + 0.5 * np.sum(np.log(np.linalg.det(S)))
ln_q_T = np.sum([log_wishart_B(inverse_W[k], nu[k]) for k in range(nb_components)]) + np.sum(0.5 * (nu - d - 1) * expectation[2]) - np.sum(0.5 * np.trace(inverse_W @ expectation[1], axis1=1, axis2=2)) ln_q_T = np.sum([log_wishart_B(inverse_W[k], nu[k]) for k in range(nb_components)]) + np.sum(0.5 * (nu - d - 1) * expectation[2]) - np.sum(0.5 * np.trace(inverse_W @ expectation[1], axis1=1, axis2=2))
lower_bound[p] = ln_p_x + ln_p_z + ln_p_mu + ln_p_T + ln_q_z + ln_q_mu + ln_q_T lower_bound[p] = ln_p_x + ln_p_z + ln_p_mu + ln_p_T + ln_q_z + ln_q_mu + ln_q_T
# Displaying every ten iterations the result # Displaying every ten iterations the result
if p % 10 == 0: if p % 10 == 0:
covs = inverse_W / nu[:, np.newaxis, np.newaxis] covs = inverse_W / nu[:, np.newaxis, np.newaxis]
display_2D(m,covs,nb_components,weights,X) display_2D(m,covs,nb_components,weights,X)
plt.title('iteration '+ str(p)) plt.title('iteration '+ str(p))
plt.show() plt.show()
# Final parameter update # Final parameter update
covs = inverse_W / nu[:, np.newaxis, np.newaxis] covs = inverse_W / nu[:, np.newaxis, np.newaxis]
display_2D(m,covs,nb_components,weights,X) display_2D(m,covs,nb_components,weights,X)
plt.title('iteration '+ str(p)) plt.title('iteration '+ str(p))
plt.show() plt.show()
def log_wishart_B(inverse_W, nu): def log_wishart_B(inverse_W, nu):
d = len(inverse_W) d = len(inverse_W)
return + 0.5 * nu * np.log(np.linalg.det(inverse_W)) - 0.5 * nu * d * np.log(2) - 0.25 * d * (d-1) * np.log(np.pi) - gammaln(0.5 * (nu - np.arange(d))).sum() return + 0.5 * nu * np.log(np.linalg.det(inverse_W)) - 0.5 * nu * d * np.log(2) - 0.25 * d * (d-1) * np.log(np.pi) - gammaln(0.5 * (nu - np.arange(d))).sum()
def display_2D(m,covs,nb_components,weights, X): def display_2D(m,covs,nb_components,weights, X):
# Grid # Grid
xmin, xmax, ymin, ymax = X[:,0].min(), X[:,0].max(), X[:,1].min(), X[:,1].max() xmin, xmax, ymin, ymax = X[:,0].min(), X[:,0].max(), X[:,1].min(), X[:,1].max()
mx, my =.1 * (xmax - xmin), .1 * (ymax - ymin) # margins mx, my =.1 * (xmax - xmin), .1 * (ymax - ymin) # margins
xmin, xmax, ymin, ymax = xmin - mx, xmax + mx, ymin - my, ymax + my xmin, xmax, ymin, ymax = xmin - mx, xmax + mx, ymin - my, ymax + my
x = np.linspace(xmin, xmax, 200) x = np.linspace(xmin, xmax, 200)
y = np.linspace(ymin, ymax, 200) y = np.linspace(ymin, ymax, 200)
x, y = np.meshgrid(x, y) x, y = np.meshgrid(x, y)
pos = np.empty(x.shape + (2,)) pos = np.empty(x.shape + (2,))
pos[:,:,0] = x; pos[:,:,1] = y pos[:,:,0] = x; pos[:,:,1] = y
# Distributions # Distributions
rvs = [multivariate_normal(m[k], covs[k]) for k in range(nb_components)] rvs = [multivariate_normal(m[k], covs[k]) for k in range(nb_components)]
Z = sum([weights[k] * rvs[k].pdf(pos) for k in range(nb_components)]) Z = sum([weights[k] * rvs[k].pdf(pos) for k in range(nb_components)])
# Figure # Figure
plt.figure(figsize=(6,6)) plt.figure(figsize=(6,6))
# Dataset plot # Dataset plot
plt.plot(*X.T, '.', c='k') plt.plot(*X.T, '.', c='k')
plt.xlim(xmin, xmax) plt.xlim(xmin, xmax)
plt.ylim(ymin, ymax) plt.ylim(ymin, ymax)
# Component distributions # Component distributions
ax = plt.gca() ax = plt.gca()
for k in range(nb_components): for k in range(nb_components):
if weights[k] >= 1e-5: if weights[k] >= 1e-5:
plot_confidence_ellipse(m[k], covs[k], 0.9, ax=ax,ec='black',alpha=max(0.3, weights[k] / max(weights))) plot_confidence_ellipse(m[k], covs[k], 0.9, ax=ax,ec='black',alpha=max(0.3, weights[k] / max(weights)))
def plot_confidence_ellipse(mu, cov, alph, ax, clabel=None, label_bg='white', **kwargs): def plot_confidence_ellipse(mu, cov, alph, ax, clabel=None, label_bg='white', **kwargs):
c = -2 * np.log(1 - alph) # quantile at alpha of the chi_squarred distr. with df = 2 c = -2 * np.log(1 - alph) # quantile at alpha of the chi_squarred distr. with df = 2
Lambda, Q = la.eig(cov) # eigenvalues and eigenvectors (col. by col.) Lambda, Q = la.eig(cov) # eigenvalues and eigenvectors (col. by col.)
# Compute the attributes of the ellipse # Compute the attributes of the ellipse
width, heigth = 2 * np.sqrt(c * Lambda) width, heigth = 2 * np.sqrt(c * Lambda)
# compute the value of the angle theta (in degree) # compute the value of the angle theta (in degree)
theta = 180 * np.arctan(Q[1,0] / Q[0,0]) / np.pi if cov[1,0] else 0 theta = 180 * np.arctan(Q[1,0] / Q[0,0]) / np.pi if cov[1,0] else 0
# Create the ellipse # Create the ellipse
if 'fc' not in kwargs.keys(): if 'fc' not in kwargs.keys():
kwargs['fc'] = 'None' kwargs['fc'] = 'None'
level_line = Ellipse(mu, width, heigth, angle=theta, **kwargs) level_line = Ellipse(mu, width, heigth, angle=theta, **kwargs)
return ax.add_patch(level_line) return ax.add_patch(level_line)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Dataset creation # Dataset creation
dataset_1 = np.random.multivariate_normal(mean=[1,1],cov=[[0.01,0],[0,0.01]],size=(100)) dataset_1 = np.random.multivariate_normal(mean=[1,1],cov=[[0.01,0],[0,0.01]],size=(100))
dataset_2 = np.random.multivariate_normal(mean=[-1,-1],cov=[[0.01,0],[0,0.01]],size=(100)) dataset_2 = np.random.multivariate_normal(mean=[-1,-1],cov=[[0.01,0],[0,0.01]],size=(100))
dataset = np.vstack((dataset_1,dataset_2)) dataset = np.vstack((dataset_1,dataset_2))
# Training and visualization of the iterations # Training and visualization of the iterations
fit_predict(dataset,nb_components=5,max_iter=100) fit_predict(dataset,nb_components=5,max_iter=100)
``` ```
%% Output %% Output
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment