1. GMM (Gaussian Mixture Model)
주어진 distribution이 있을 때 이를 위와 같이 modeling하는 것이다. 즉 어떤 모르는 변수들이 막 있을 때 이 변수들이 여러 개의 mixed gaussian distribution에서 나왔다고 가정을 한다. 결국 우리가 구하는 것은 n개의 mean과 covariance matrix이다.
2. 매트랩 시뮬레이션 결과 (크게 보려면 클릭)
3. example_gmm_vs_kmeans.m
%%
clc;
clear all;
close all;
%%
mean1 = [1 2 3]';
var1 = 2;
mean2 = [3 2 1]';
var2 = 1;
mean3 = [1 3 -1]';
var3 = 0.5;
x1 = mean1*ones(1,20) + var1*randn(3, 20);
x2 = mean2*ones(1,20) + var2*randn(3, 20);
x3 = mean3*ones(1,20) + var3*randn(3, 20);
x = [x1 x2 x3];
group = [ ones(1, 20) 2*ones(1, 20) 3*ones(1, 20) ]';
total_num = 60;
fig_ref = figure(1);
spread(x, group);
title('Reference Data');
hold on;
ref_mu = [ mean1' ; mean2' ; mean3' ];
plot3( ref_mu(1, 1), ref_mu(1, 2), ref_mu(1, 3), 'kx', 'MarkerSize',12,'LineWidth',2);
plot3( ref_mu(1, 1), ref_mu(1, 2), ref_mu(1, 3), 'ko', 'MarkerSize',12,'LineWidth',2);
plot3( ref_mu(2, 1), ref_mu(2, 2), ref_mu(2, 3), 'kx', 'MarkerSize',12,'LineWidth',2);
plot3( ref_mu(2, 1), ref_mu(2, 2), ref_mu(2, 3), 'ko', 'MarkerSize',12,'LineWidth',2);
plot3( ref_mu(3, 1), ref_mu(3, 2), ref_mu(3, 3), 'kx', 'MarkerSize',12,'LineWidth',2);
plot3( ref_mu(3, 1), ref_mu(3, 2), ref_mu(3, 3), 'ko', 'MarkerSize',12,'LineWidth',2);
hold off;
%%
[label, model, llh] = emgm(x, 3);
fig_gmm = figure(2);
spread(x,label);
hold on;
model.mu = model.mu';
plot3( model.mu(1, 1), model.mu(1, 2), model.mu(1, 3), 'kx', 'MarkerSize',12,'LineWidth',2);
plot3( model.mu(1, 1), model.mu(1, 2), model.mu(1, 3), 'ko', 'MarkerSize',12,'LineWidth',2);
plot3( model.mu(2, 1), model.mu(2, 2), model.mu(2, 3), 'kx', 'MarkerSize',12,'LineWidth',2);
plot3( model.mu(2, 1), model.mu(2, 2), model.mu(2, 3), 'ko', 'MarkerSize',12,'LineWidth',2);
plot3( model.mu(3, 1), model.mu(3, 2), model.mu(3, 3), 'kx', 'MarkerSize',12,'LineWidth',2);
plot3( model.mu(3, 1), model.mu(3, 2), model.mu(3, 3), 'ko', 'MarkerSize',12,'LineWidth',2);
hold off;
title('GMM result');
err_count = 0;
for i = 1:total_num
if label(i) ~= group(i)
err_count = err_count + 1;
end
end
%gmm_result = 1 - err_count/ total_num;
%fprintf('GMM result: %.1f \r\n', gmm_result*100);
%%
[kmeans_label kmeans_center] = kmeans(x', 3);
fig_kmeans = figure(3);
spread(x, kmeans_label );
hold on;
plot3( kmeans_center(1, 1), kmeans_center(1, 2), kmeans_center(1, 3), 'kx', 'MarkerSize',12,'LineWidth',2);
plot3( kmeans_center(1, 1), kmeans_center(1, 2), kmeans_center(1, 3), 'ko', 'MarkerSize',12,'LineWidth',2);
plot3( kmeans_center(2, 1), kmeans_center(2, 2), kmeans_center(2, 3), 'kx', 'MarkerSize',12,'LineWidth',2);
plot3( kmeans_center(2, 1), kmeans_center(2, 2), kmeans_center(2, 3), 'ko', 'MarkerSize',12,'LineWidth',2);
plot3( kmeans_center(3, 1), kmeans_center(3, 2), kmeans_center(3, 3), 'kx', 'MarkerSize',12,'LineWidth',2);
plot3( kmeans_center(3, 1), kmeans_center(3, 2), kmeans_center(3, 3), 'ko', 'MarkerSize',12,'LineWidth',2);
hold off;
title('k means result');
%% END
print (fig_ref , '-djpeg', ['fig_ref.jpeg']) ;
print (fig_gmm , '-djpeg', ['fig_gmm.jpeg']) ;
print (fig_kmeans , '-djpeg', ['fig_kmeans.jpeg']) ;
4. emgm.m
function [label, model, llh] = emgm(X, init)
% Perform EM algorithm for fitting the Gaussian mixture model.
% X: d x n data matrix
% init: k (1 x 1) or label (1 x n, 1<=label(i)<=k) or center (d x k)
% Written by Michael Chen (sth4nth@gmail.com).
%% initialization
fprintf('EM for Gaussian mixture: running ... \n');
R = initialization(X,init);
[~,label(1,:)] = max(R,[],2);
R = R(:,unique(label));
tol = 1e-10;
maxiter = 500;
llh = -inf(1,maxiter);
converged = false;
t = 1;
while ~converged && t < maxiter
t = t+1;
model = maximization(X,R);
[R, llh(t)] = expectation(X,model);
[~,label(:)] = max(R,[],2);
u = unique(label); % non-empty components
if size(R,2) ~= size(u,2)
R = R(:,u); % remove empty components
else
converged = llh(t)-llh(t-1) < tol*abs(llh(t));
end
end
llh = llh(2:t);
if converged
fprintf('Converged in %d steps.\n',t-1);
else
fprintf('Not converged in %d steps.\n',maxiter);
end
function R = initialization(X, init)
[d,n] = size(X);
if isstruct(init) % initialize with a model
R = expectation(X,init);
elseif length(init) == 1 % random initialization
k = init;
idx = randsample(n,k);
m = X(:,idx);
[~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1);
[u,~,label] = unique(label);
while k ~= length(u)
idx = randsample(n,k);
m = X(:,idx);
[~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1);
[u,~,label] = unique(label);
end
R = full(sparse(1:n,label,1,n,k,n));
elseif size(init,1) == 1 && size(init,2) == n % initialize with labels
label = init;
k = max(label);
R = full(sparse(1:n,label,1,n,k,n));
elseif size(init,1) == d %initialize with only centers
k = size(init,2);
m = init;
[~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1);
R = full(sparse(1:n,label,1,n,k,n));
else
error('ERROR: init is not valid.');
end
function [R, llh] = expectation(X, model)
mu = model.mu;
Sigma = model.Sigma;
w = model.weight;
n = size(X,2);
k = size(mu,2);
logRho = zeros(n,k);
for i = 1:k
logRho(:,i) = loggausspdf(X,mu(:,i),Sigma(:,:,i));
end
logRho = bsxfun(@plus,logRho,log(w));
T = logsumexp(logRho,2);
llh = sum(T)/n; % loglikelihood
logR = bsxfun(@minus,logRho,T);
R = exp(logR);
function model = maximization(X, R)
[d,n] = size(X);
k = size(R,2);
nk = sum(R,1);
w = nk/n;
mu = bsxfun(@times, X*R, 1./nk);
Sigma = zeros(d,d,k);
sqrtR = sqrt(R);
for i = 1:k
Xo = bsxfun(@minus,X,mu(:,i));
Xo = bsxfun(@times,Xo,sqrtR(:,i)');
Sigma(:,:,i) = Xo*Xo'/nk(i);
Sigma(:,:,i) = Sigma(:,:,i)+eye(d)*(1e-6); % add a prior for numerical stability
end
model.mu = mu;
model.Sigma = Sigma;
model.weight = w;
function y = loggausspdf(X, mu, Sigma)
d = size(X,1);
X = bsxfun(@minus,X,mu);
[U,p]= chol(Sigma);
if p ~= 0
error('ERROR: Sigma is not PD.');
end
Q = U'\X;
q = dot(Q,Q,1); % quadratic term (M distance)
c = d*log(2*pi)+2*sum(log(diag(U))); % normalization constant
y = -(c+q)/2;
logsumexp.m
function s = logsumexp(x, dim)
% Compute log(sum(exp(x),dim)) while avoiding numerical underflow.
% By default dim = 1 (columns).
% Written by Michael Chen (sth4nth@gmail.com).
if nargin == 1,
% Determine which dimension sum will use
dim = find(size(x)~=1,1);
if isempty(dim), dim = 1; end
end
% subtract the largest in each column
y = max(x,[],dim);
x = bsxfun(@minus,x,y);
s = y + log(sum(exp(x),dim));
i = find(~isfinite(y));
if ~isempty(i)
s(i) = y(i);
end
5. spread.m
function spread(X, label)
% Plot samples of different labels with different colors.
% Written by Michael Chen (sth4nth@gmail.com).
[d,n] = size(X);
if nargin == 1
label = ones(n,1);
end
assert(n == length(label));
color = 'brgmcyk';
m = length(color);
c = max(label);
figure(gcf);
clf;
hold on;
switch d
case 2
view(2);
for i = 1:c
idc = label==i;
% plot(X(1,label==i),X(2,label==i),['.' color(i)],'MarkerSize',15);
scatter(X(1,idc),X(2,idc),36,color(mod(i-1,m)+1));
end
case 3
view(3);
for i = 1:c
idc = label==i;
% plot3(X(1,idc),X(2,idci),X(3,idc),['.' idc],'MarkerSize',15);
scatter3(X(1,idc),X(2,idc),X(3,idc),36,color(mod(i-1,m)+1));
end
otherwise
error('ERROR: only support data of 2D or 3D.');
end
axis equal
grid on
hold off
'Enginius > Machine Learning' 카테고리의 다른 글
Reinforcement Learning (10) | 2012.12.04 |
---|---|
Solving LMI using SeDuMi (0) | 2012.11.26 |
Laplace Approximation (0) | 2012.10.19 |
papers - for seminar (0) | 2012.08.23 |
K-SVD: Sparse Dictionary building algorithm (0) | 2012.07.26 |