diff --git a/doc/dp-derivation.rst b/doc/dp-derivation.rst index f467bec8c214ba1dc2f284edf3f8cd16c2237167..63c0ac35599947cba1c1e7c30b61672f091aba3b 100644 --- a/doc/dp-derivation.rst +++ b/doc/dp-derivation.rst @@ -128,13 +128,10 @@ distribution. \begin{array}{rcl} && E_q[\log P(z)] - E_q[\log Q(z)] \\ &=& - \sum_{k} \left( \left(\sum_{j=k+1}^K - \nu_{z_{i,j}}\right)(\Psi(\gamma{k,1})-\Psi(\gamma{k,1}+\gamma_{k,2})) - + - \nu_{z_{i,k}}(\Psi(\gamma_{k,1})-\Psi(\gamma_{k,1}+\gamma_{k,2}))\right) - \\ && - - \sum_k - \nu_{z_{i,k}} \log \nu_{z_{i,k}} \\ + \sum_{k} \left( + \left(\sum_{j=k+1}^K \nu_{z_{i,j}}\right)(\Psi(\gamma{k,1})-\Psi(\gamma{k,1}+\gamma_{k,2})) + + \nu_{z_{i,k}}(\Psi(\gamma_{k,1})-\Psi(\gamma_{k,1}+\gamma_{k,2})) + - \log \nu_{z_{i,k}} \right) \end{array} diff --git a/examples/mixture/plot_dpgmm.py b/examples/mixture/plot_dpgmm.py new file mode 100644 index 0000000000000000000000000000000000000000..3b56369711eb3aaa0ce7c66cf5c0f7f0f72d3d6b --- /dev/null +++ b/examples/mixture/plot_dpgmm.py @@ -0,0 +1,48 @@ +""" +================================= +DP Mixture Model Ellipsoids +================================= + +Plot the confidence ellipsoids of a mixture of two gaussians. +""" + +import numpy as np +from scikits.learn import mixture +import itertools + +import pylab as pl +import matplotlib as mpl + +n, m = 300, 2 + +# generate random sample, two components +np.random.seed(0) +C = np.array([[0., -0.7], [3.5, .7]]) +X = np.r_[np.dot(np.random.randn(n, 2), C), + np.random.randn(n, 2) + np.array([3, 3])] + +for p,alpha in enumerate([0.01, 1., 100.]): + clf = mixture.DPGMM(n_states=10, cvtype='full', alpha=alpha) + clf.fit(X) + + splot = pl.subplot(310+p+1, aspect='equal') + color_iter = itertools.cycle (['r', 'g', 'b', 'c']) + + Y_ = clf.predict(X) + + for i, (mean, covar, color) in enumerate(zip(clf.means, clf.covars, color_iter)): + v, w = np.linalg.eigh(covar) + u = w[0] / np.linalg.norm(w[0]) + if sum(Y_ == i) > 1: + pl.scatter(X[Y_==i, 0], X[Y_==i, 1], .8, color=color) + else: + continue + angle = np.arctan(u[1]/u[0]) + angle = 180 * angle / np.pi # convert to degrees + ell = mpl.patches.Ellipse (mean, v[0], v[1], 180 + angle, color=color) + ell.set_clip_box(splot.bbox) + ell.set_alpha(0.5) + splot.add_artist(ell) + +pl.show() + diff --git a/scikits/learn/mixture/__init__.py b/scikits/learn/mixture/__init__.py index de151289b75b8bad514bbf5b25b369e88e044975..3495238c2492b2fd4cd519f61b51540a1c31d2f3 100644 --- a/scikits/learn/mixture/__init__.py +++ b/scikits/learn/mixture/__init__.py @@ -4,4 +4,4 @@ Mixture modeling algorithms from .gmm import logsum, normalize, sample_gaussian, lmvnpdf from .gmm import GMM -from .dpgmm import DPGMM, VBGMM +from .dpgmm import DPGMM diff --git a/scikits/learn/mixture/dpgmm.py b/scikits/learn/mixture/dpgmm.py index be3dd6fa53f2e638ddd4c9ea7e5bcca34f1f2f38..435b129b735570369345fc311bfa7f9f7a628bfc 100644 --- a/scikits/learn/mixture/dpgmm.py +++ b/scikits/learn/mixture/dpgmm.py @@ -2,7 +2,9 @@ import numpy as np from . import gmm as mixture -from scipy.special import digamma +from scikits.learn import cluster + +from scipy.special import digamma, gammaln # Author: Alexandre Passos (alexandre.tp@gmail.com) # @@ -27,7 +29,8 @@ def squarenorm(x, Sigma): def lognormalize(v): """Given a vector of unnormalized log-probabilites v returns a vector of normalized probabilities""" - return np.exp(v-np.logaddexp.reduce(v)) + v = np.exp(v-np.logaddexp.reduce(v)) + return v / np.sum(v) class DPGMM(mixture.GMM): @@ -87,21 +90,21 @@ class DPGMM(mixture.GMM): Methods ------- - decode(X) - Find most likely mixture components for each point in `X`. - eval(X) - Compute a lower-bound of the log likelihood of `X` under the model + decode(self._X) + Find most likely mixture components for each point in `self._X`. + eval(self._X) + Compute a lower-bound of the log likelihood of `self._X` under the model and an approximate posterior distribution over mixture components. - fit(X) - Estimate the posterior of themodel parameters from `X` using the + fit(self._X) + Estimate the posterior of themodel parameters from `self._X` using the variational mean-field algorithm. - predict(X) + predict(self._X) Like decode, find most likely mixtures components for each - observation in `X`. + observation in `self._X`. rvs(n=1) Generate `n` samples from the posterior for the model. - score(X) - Compute the log likelihood of `X` under the model. + score(self._X) + Compute the log likelihood of `self._X` under the model. """ @@ -120,44 +123,51 @@ class DPGMM(mixture.GMM): elif self.cvtype == 'spherical': return [np.eye(self.n_features) * f for f in self._covars] - def _set_covars(self, covars): + def _set_precisions(self, covars): covars = np.asanyarray(covars) _validate_covars(covars, self._cvtype, self._n_states, self.n_features) self._covars = covars + + def _get_covars(self): + return [np.linalg.pinv(c) for c in self._get_precisions()] + + def _set_covars(self, covars): + self._covars = [np.linalg.pinv(c) for c in x] - precisions = property(_get_covars, _set_covars) - covars = property(lambda *args: raise NotImplementedError("Use precisions"), - lambda *args: raise NotImplementedError("Use precisions")) + precisions = property(_get_precisions, _set_covars) + covars = property(_get_covars, _set_covars) def _bound_pxgivenz(self, x, k): bound = -0.5*self.n_features*np.log(2*np.pi) bound -= np.log(2*np.pi*np.e) if self.cvtype == 'spherical': - bound -= 0.5*self.n_features*(digamma(self._a[k])-np.log(self._b[k])) + bound += 0.5*self.n_features*(digamma(self._a[k])-np.log(self._b[k])) bound -= 0.5*(self._covars[k])*(norm(x-self._means[k])+self.n_features) elif self.cvtype == 'diag': for i in xrange(self.n_features): - bound -= 0.5*np.sum(digamma(self._a[k])-np.log(self._b[k])) + bound += 0.5*np.sum(digamma(self._a[k])-np.log(self._b[k])) bound -= 0.5*diagnorm(x-self._means[k], self._covars[k]) + bound -= 0.5*np.sum(self._covars[k]) elif self.cvtype == 'tied': + "check this bound" for i in xrange(self.n_features): - bound -= 0.5*digamma((self._a-1-i)/2.) - bound -= 0.5*self.n_features*np.log(2) - bound -= 0.5*np.log(self._detB) + bound += 0.5*digamma((self._a-1-i)/2.) + bound += 0.5*self.n_features*np.log(2) + bound += 0.5*np.log(self._detB) bound -= 0.5*squarenorm(x-self._means[k], self._covars) bound -= 0.5*self._a*np.trace(self._B) elif self.cvtype == 'full': for i in xrange(self.n_features): - bound -= 0.5*digamma((self._a[k]-1-i)/2.) - bound -= 0.5*self.n_features*np.log(2) - bound -= 0.5*np.log(self._detB[k]) + bound += 0.5*digamma((self._a[k]-1-i)/2.) + bound += 0.5*self.n_features*np.log(2) + bound += 0.5*np.log(self._detB[k]) bound -= 0.5*squarenorm(x-self._means[k], self._covars[k]) bound -= 0.5*self._a[k]*np.trace(self._B[k]) else: raise NotImplementedError("This ctype is not implemented: "+self.cvtype) return bound - def eval(self, obs): + def eval(self, obs=None): """Evaluate the model on data Compute the bound on log probability of `obs` under the model @@ -181,8 +191,12 @@ class DPGMM(mixture.GMM): Posterior probabilities of each mixture component for each observation """ + if obs is None: + z = self._z + obs = self._X + else: + z = np.zeros((obs.shape[0],self.n_states)) obs = np.asanyarray(obs) - z = np.zeros((obs.shape[0],self.n_states)) p = np.zeros(self.n_states) bound = np.zeros(obs.shape[0]) for i in xrange(obs.shape[0]): @@ -197,89 +211,183 @@ class DPGMM(mixture.GMM): bound[i] = np.sum(z[i]*p) return bound, z - def _update_gamma(self, z): + def _update_gamma(self): for i in xrange(self.n_states): - self._gamma[i,1] = 1. + np.sum(z.T[i]) + self._gamma[i,1] = 1. + np.sum(self._z.T[i]) self._gamma[i,2] = self.alpha for k in xrange(i+1, self.n_states): - for j in xrange(z.shape[0]): - self._gamma[i,2] += z[j,k] + for j in xrange(self._z.shape[0]): + self._gamma[i,2] += self._z[j,k] - def _update_mu(self, X, z): + + def _update_mu(self): for k in xrange(self.n_states): if self.cvtype == 'spherical' or self.cvtype == 'diag': - num = X[0]*z[0,k] - for i in xrange(1,X.shape[0]): - num += X[i]*z[i,k] + num = self._X[0]*self._z[0,k] + for i in xrange(1,self._X.shape[0]): + num += self._X[i]*self._z[i,k] num *= self._covars[k] - den = 1. + self._covars[k]*np.sum(z.T[k]) - #print k, self._means[k], num + den = 1. + self._covars[k]*np.sum(self._z.T[k]) self._means[k] = num/den elif self.cvtype == 'tied' or self.cvtype == 'full': if self.cvtype == 'tied': cov = self._covars else: cov = self._covars[k] - den = np.identity(self.n_features) + cov*np.sum(z.T[k]) - num = X[0]*z[0,k] - for i in xrange(1,X.shape[0]): - num += X[i]*z[i,k] + den = np.identity(self.n_features) + cov*np.sum(self._z.T[k]) + num = self._X[0]*self._z[0,k] + for i in xrange(1,self._X.shape[0]): + num += self._X[i]*self._z[i,k] num = np.dot(cov, num) self._means[k] = np.linalg.lstsq(den, num)[0] - def _update_ab(self, X, z): + def _update_ab(self): if self.cvtype == 'spherical': - self._a = 1+ 0.5*self.n_features*np.sum(z, axis=0) + self._a = 0.5*self.n_features*np.sum(self._z, axis=0) for k in xrange(self.n_states): - difs = np.sum((X-self._means[k])*(X-self._means[k]), axis=1) - self._b[k] = 1 + 0.5*np.sum(z.T[k]*(difs+self.n_features)) - self._covars = self._a*self._b + self._b[k] = 1. + for i in xrange(self._X.shape[0]): + dif = norm((self._X[i]-self._means[k])) + self._b[k] += 0.5*self._z[i,k]*(dif) + self._covars = self._a/self._b elif self.cvtype == 'diag': - self._a = np.ones((self.n_states, self.n_features)) for k in xrange(self.n_states): - self._a[k] *= 1+ 0.5*self.n_features*np.sum(z.T[k], axis=0) + self._a[k] = 0.5*self.n_features*np.sum(self._z.T[k], axis=0) + self._covars[k] = self._a[k]/self._b[k] + continue for d in xrange(self.n_features): self._b[k,d] = 1. - for i in xrange(X.shape[0]): - dif = X[i,d]-self._means[k,d] - self._b[k,d] += z[i,k]*(dif*dif+1) - self._covars = self._a/self._b + for i in xrange(self._X.shape[0]): + dif = self._X[i,d]-self._means[k,d] + self._b[k,d] += self._z[i,k]*max((dif*dif+1),1) + self._b[k,d] = min(10*self._a[k,d], self._b[k,d]) + self._covars[k] = self._a[k]/self._b[k] elif self.cvtype == 'tied': - self._a = 2+X.shape[0]+self.n_features - self._B = (X.shape[0]+1)*np.identity(self.n_features) - for i in xrange(X.shape[0]): + self._a = 2+self._X.shape[0]+self.n_features + self._B = (self._X.shape[0]+1)*np.identity(self.n_features) + for i in xrange(self._X.shape[0]): for k in xrange(self.n_states): - dif = X[i]-self._means[k] - self._B += z[i,k]*np.dot(dif.reshape((-1,1)), + dif = self._X[i]-self._means[k] + self._B += self._z[i,k]*np.dot(dif.reshape((-1,1)), dif.reshape((1,-1))) self._B = np.linalg.pinv(self._B) self._covars = self._a*self._B self._detB = np.linalg.det(self._B) elif self.cvtype == 'full': for k in xrange(self.n_states): - T = np.sum(z.T[k]) + T = np.sum(self._z.T[k]) self._a[k] = 2+T+self.n_features self._B[k]= (T+1)*np.identity(self.n_features) - for i in xrange(X.shape[0]): - dif = X[i]-self._means[k] - self._B[k] += z[i,k]*np.dot(dif.reshape((-1,1)), + for i in xrange(self._X.shape[0]): + dif = self._X[i]-self._means[k] + self._B[k] += self._z[i,k]*np.dot(dif.reshape((-1,1)), dif.reshape((1,-1))) self._B[k] = np.linalg.pinv(self._B[k]) self._covars[k] = self._a[k]*self._B[k] self._detB[k] = np.linalg.det(self._B[k]) - def _do_mstep(self, X, posteriors, params): - self._update_gamma(posteriors) + def _do_mstep(self, params, monitor=False): + if monitor:print 1, self._lower_bound() + self._update_gamma() + if monitor: print 2, self._lower_bound() if 'm' in params: - self._update_mu(X, posteriors) + self._update_mu() + if monitor: print 3, self._lower_bound() if 'c' in params: - self._update_ab(X, posteriors) + self._update_ab() + if monitor: print 4, self._lower_bound() + if monitor: print self._gamma.T[1] + - def initialize_gamma(self): + def _initialize_gamma(self): self._gamma = self.alpha*np.ones((self.n_states,3)) + def _bound_gamma(self): + logprior = 0. + for k in xrange(self.n_states): + logprior = gammaln(self.alpha) + logprior += (self.alpha-1)*(digamma(self._gamma[k,2])- + digamma(self._gamma[k,1]+self._gamma[k,2])) + logprior += -gammaln(self._gamma[k,1]+self._gamma[k,2]) + logprior += gammaln(self._gamma[k,1])+gammaln(self._gamma[k,2]) + logprior -= (self._gamma[k,1]-1)*(digamma(self._gamma[k,1])- + digamma(self._gamma[k,1]+self._gamma[k,2])) + logprior -= (self._gamma[k,2]-1)*(digamma(self._gamma[k,2])- + digamma(self._gamma[k,1]+self._gamma[k,2])) + return logprior + + def _bound_mu(self): + logprior = 0. + for k in xrange(self.n_states): + logprior -= 0.5*norm(self._means[k]) + 0.5*self.n_features + return logprior + + def _bound_ab(self): + logprior = 0. + if self.cvtype == 'spherical': + for k in xrange(self.n_states): + logprior += gammaln(self._a[k])-(self._a[k]-1)*digamma(self._a[k]) + logprior += -np.log(self._b[k])+self._a[k]-self._covars[k] + elif self.cvtype == 'diag': + for k in xrange(self.n_states): + for d in xrange(self.n_features): + logprior += gammaln(self._a[k,d]) + logprior -= (self._a[k,d]-1)*digamma(self._a[k,d]) + logprior += -np.log(self._b[k,d])+self._a[k,d]-self._covars[k,d] + elif self.cvtype == 'tied': + for d in xrange(self.n_features): + logprior += gammaln(0.5*(self.n_features+1-d)) + logprior -= gammaln(0.5*(self._a+1-d)) + logprior += (self._a-self.n_features)*0.5*(digamma(0.5*(self._a+1-d))) + logprior += (self._a-self.n_features)*0.5*np.log(self._detB) + logprior -= 0.5*self.n_features*self._a*np.log(2) + logprior += 0.5*self._a*np.log(self._detB) + logprior += 0.5*self._a*np.trace(self._B-np.identity(self.n_features)) + elif self.cvtype == 'full': + for k in xrange(self.n_states): + for d in xrange(self.n_features): + logprior += gammaln(0.5*(self.n_features+1-d)) + logprior -= gammaln(0.5*(self._a[k]+1-d)) + logprior += (self._a[k]-self.n_features)*0.5*(digamma(0.5*(self._a[k] + +1-d))) + logprior += (self._a[k]-self.n_features)*0.5*np.log(self._detB[k]) + logprior -= 0.5*self.n_features*self._a[k]*np.log(2) + logprior += 0.5*self._a[k]*np.log(self._detB[k]) + logprior += 0.5*self._a[k]*np.trace(self._B[k]-np.identity(self.n_features)) + return logprior + + def _bound_z(self): + logprior = 0. + for i in xrange(self._z.shape[0]): + for k in xrange(self.n_states): + a = 0. + for j in xrange(k, self.n_states): + a += self._z[i,j] + logprior += a*(digamma(self._gamma[k,2]) + -digamma(self._gamma[k,1]+self._gamma[k,2])) + logprior += self._z[i,k]*(digamma(self._gamma[k,1]) + -digamma(self._gamma[k,1]+self._gamma[k,2])) + if self._z[i,k] != 0: + logprior -= self._z[i,k]*np.log(self._z[i,k]) + return logprior + + def _logprior(self): + logprior = self._bound_gamma() + logprior += self._bound_mu() + logprior += self._bound_ab() + logprior += self._bound_z() + return logprior + + def _lower_bound(self): + c = 0. + for i in xrange(self._X.shape[0]): + for k in xrange(self.n_states): + c += self._z[i,k]*self._bound_pxgivenz(self._X[i],k) + return c + self._logprior() + + - def fit(self, X, n_iter=10, thresh=1e-2, params='wmc', + def fit(self, X, n_iter=20, thresh=1e-2, params='wmc', init_params='wmc'): """Estimate model parameters with the variational algorithm. @@ -295,7 +403,7 @@ class DPGMM(mixture.GMM): Parameters ---------- - X : array_like, shape (n, n_features) + self._X : array_like, shape (n, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. n_iter : int, optional @@ -314,18 +422,20 @@ class DPGMM(mixture.GMM): ## initialization step - X = np.asanyarray(X) - - if hasattr(self, 'n_features') and self.n_features != X.shape[1]: + self._X = np.asanyarray(X) + if hasattr(self, 'n_features') and self.n_features != self._X.shape[1]: raise ValueError('Unexpected number of dimensions, got %s but ' - 'expected %s' % (X.shape[1], self.n_features)) + 'expected %s' % (self._X.shape[1], self.n_features)) - self.n_features = X.shape[1] + self.n_features = self._X.shape[1] + self._z = np.ones((self._X.shape[0],self.n_states))/self.n_states - self._initalize_gamma() + self._initialize_gamma() if 'm' in init_params or not hasattr(self, 'means'): - self._means = np.random.normal(0, 10, size=(self.n_states, self.n_features)) + self._means = cluster.KMeans( + k=self._n_states).fit(X).cluster_centers_ + #self._means = np.random.normal(0, 1, size=(self.n_states, self.n_features)) if 'w' in init_params or not hasattr(self, 'weights'): self.weights = np.tile(1.0 / self._n_states, self._n_states) @@ -336,7 +446,8 @@ class DPGMM(mixture.GMM): self._b = np.ones(self.n_states) self._covars = np.ones(self.n_states) elif self.cvtype == 'diag': - self._a = np.ones((self.n_states,self.n_features)) + self._a = (1+ 0.5*self.n_features)*np.ones((self.n_states, + self.n_features)) self._b = np.ones((self.n_states,self.n_features)) self._covars = np.ones((self.n_states,self.n_features)) elif self.cvtype == 'tied': @@ -345,8 +456,8 @@ class DPGMM(mixture.GMM): self._covars = np.identity(self.n_features) self._detB = 1. elif self.cvtype == 'full': - self._a = np.ones(self.n_states) - self._B = [np.identity(self.n_features) for i in xrange(self.n_states)] + self._a = (1+self.n_states+self._X.shape[0])*np.ones(self.n_states) + self._B = [2*np.identity(self.n_features) for i in xrange(self.n_states)] self._covars = [np.identity(self.n_features) for i in xrange(self.n_states)] self._detB = np.ones(self.n_states) @@ -355,9 +466,8 @@ class DPGMM(mixture.GMM): self.converged_ = False for i in xrange(n_iter): # Expectation step - curr_logprob, posteriors = self.eval(X) - logprob.append(curr_logprob.sum()) - print logprob + curr_logprob, z = self.eval() + logprob.append(curr_logprob.sum()+self._logprior()) # Check for convergence. if i > 0 and abs(logprob[-1] - logprob[-2]) < thresh: @@ -365,125 +475,152 @@ class DPGMM(mixture.GMM): break # Maximization step - self._do_mstep(X, posteriors, params) + self._do_mstep(params) return self -class VBGMM(DPGMM): - """Variational Inference for the Infinite Gaussian Mixture Model - - Variational inference for a Gaussian mixture model probability - distribution. This class allows for easy and efficient inference - of an approximate posterior distribution over the parameters of a - gaussian mixture model with a fixed number of components. - - Initialization is with normally-distributed means and identity - covariance, for proper convergence. - - Parameters - ---------- - n_states : int, optional - Number of mixture components. Defaults to 1. - - cvtype : string (read-only), optional - String describing the type of covariance parameters to - use. Must be one of 'spherical', 'tied', 'diag', 'full'. - Defaults to 'diag'. - - alpha : float, optional - Real number representing the concentration parameter of - the dirichlet distribution. Intuitively, the DP is as likely - to start a new cluster for a point as it is to add that - point to a cluster with alpha elements. A higher alpha - means more clusters, as the expected number of clusters - is alpha*log(N). Defaults to 1. - - - Attributes - ---------- - cvtype : string (read-only) - String describing the type of covariance parameters used by - the DP-GMM. Must be one of 'spherical', 'tied', 'diag', 'full'. - n_features : int - Dimensionality of the Gaussians. - n_states : int (read-only) - Number of mixture components. - weights : array, shape (`n_states`,) - Mixing weights for each mixture component. - means : array, shape (`n_states`, `n_features`) - Mean parameters for each mixture component. - precisions : array - Precision (inverse covariance) parameters for each mixture - component. The shape depends on `cvtype`: - (`n_states`,) if 'spherical', - (`n_features`, `n_features`) if 'tied', - (`n_states`, `n_features`) if 'diag', - (`n_states`, `n_features`, `n_features`) if 'full' - converged_ : bool - True when convergence was reached in fit(), False - otherwise. - - Methods - ------- - decode(X) - Find most likely mixture components for each point in `X`. - eval(X) - Compute a lower-bound of the log likelihood of `X` under the model - and an approximate posterior distribution over mixture components. - fit(X) - Estimate the posterior of themodel parameters from `X` using the - variational mean-field algorithm. - predict(X) - Like decode, find most likely mixtures components for each - observation in `X`. - rvs(n=1) - Generate `n` samples from the posterior for the model. - score(X) - Compute the log likelihood of `X` under the model. - """ - - - - def eval(self, obs): - """Evaluate the model on data - - Compute the bound on log probability of `obs` under the model - and return the posterior distribution (responsibilities) of - each mixture component for each element of `obs`. - - This is done by computing the parameters for the mean-field of - z for each observation. - - Parameters - ---------- - obs : array_like, shape (n_samples, n_features) - List of n_features-dimensional data points. Each row - corresponds to a single data point. - - Returns - ------- - logprob : array_like, shape (n_samples,) - Log probabilities of each data point in `obs` - posteriors: array_like, shape (n_samples, n_states) - Posterior probabilities of each mixture component for each - observation - """ - obs = np.asanyarray(obs) - z = np.zeros((obs.shape[0],self.n_states)) - p = np.zeros(self.n_states) - bound = np.zeros(obs.shape[0]) - for i in xrange(obs.shape[0]): - for k in xrange(self.n_states): - p[k] = z[i,k] = self._bound_pxgivenz(obs[i], k) - z[i,k] += digamma(self._gamma[k]) - z[i] = lognormalize(z[i]) - bound[i] = np.sum(z[i]*p) - return bound, z - - def _update_gamma(self, z): - for i in xrange(self.n_states): - self._gamma[i] = self.alpha + np.sum(z.T[i]) - - def initialize_gamma(self): - self._gamma = self.alpha*np.ones(self.n_states) +# class VBGMM(DPGMM): +# """Variational Inference for the Infinite Gaussian Mixture Model + +# Variational inference for a Gaussian mixture model probability +# distribution. This class allows for easy and efficient inference +# of an approximate posterior distribution over the parameters of a +# gaussian mixture model with a fixed number of components. + +# Initialization is with normally-distributed means and identity +# covariance, for proper convergence. + +# Parameters +# ---------- +# n_states : int, optional +# Number of mixture components. Defaults to 1. + +# cvtype : string (read-only), optional +# String describing the type of covariance parameters to +# use. Must be one of 'spherical', 'tied', 'diag', 'full'. +# Defaults to 'diag'. + +# alpha : float, optional +# Real number representing the concentration parameter of +# the dirichlet distribution. Intuitively, the DP is as likely +# to start a new cluster for a point as it is to add that +# point to a cluster with alpha elements. A higher alpha +# means more clusters, as the expected number of clusters +# is alpha*log(N). Defaults to 1. + + +# Attributes +# ---------- +# cvtype : string (read-only) +# String describing the type of covariance parameters used by +# the DP-GMM. Must be one of 'spherical', 'tied', 'diag', 'full'. +# n_features : int +# Dimensionality of the Gaussians. +# n_states : int (read-only) +# Number of mixture components. +# weights : array, shape (`n_states`,) +# Mixing weights for each mixture component. +# means : array, shape (`n_states`, `n_features`) +# Mean parameters for each mixture component. +# precisions : array +# Precision (inverse covariance) parameters for each mixture +# component. The shape depends on `cvtype`: +# (`n_states`,) if 'spherical', +# (`n_features`, `n_features`) if 'tied', +# (`n_states`, `n_features`) if 'diag', +# (`n_states`, `n_features`, `n_features`) if 'full' +# converged_ : bool +# True when convergence was reached in fit(), False +# otherwise. + +# Methods +# ------- +# decode(self._X) +# Find most likely mixture components for each point in `self._X`. +# eval(self._X) +# Compute a lower-bound of the log likelihood of `self._X` under the model +# and an approximate posterior distribution over mixture components. +# fit(self._X) +# Estimate the posterior of themodel parameters from `self._X` using the +# variational mean-field algorithm. +# predict(self._X) +# Like decode, find most likely mixtures components for each +# observation in `self._X`. +# rvs(n=1) +# Generate `n` samples from the posterior for the model. +# score(self._X) +# Compute the log likelihood of `X` under the model. +# """ + +# def __init__(self, n_states=1, cvtype='diag', alpha=1.0): +# super(VBGMM, self).__init__(n_states, cvtype) +# self.alpha = float(alpha)/n_states + + +# def eval(self, obs): +# """Evaluate the model on data + +# Compute the bound on log probability of `obs` under the model +# and return the posterior distribution (responsibilities) of +# each mixture component for each element of `obs`. + +# This is done by computing the parameters for the mean-field of +# z for each observation. + +# Parameters +# ---------- +# obs : array_like, shape (n_samples, n_features) +# List of n_features-dimensional data points. Each row +# corresponds to a single data point. + +# Returns +# ------- +# logprob : array_like, shape (n_samples,) +# Log probabilities of each data point in `obs` +# posteriors: array_like, shape (n_samples, n_states) +# Posterior probabilities of each mixture component for each +# observation +# """ +# if obs is None: +# z = self._z +# obs = self._X +# else: +# z = np.zeros((obs.shape[0],self.n_states)) +# obs = np.asanyarray(obs) +# p = np.zeros(self.n_states) +# bound = np.zeros(obs.shape[0]) +# for i in xrange(obs.shape[0]): +# for k in xrange(self.n_states): +# p[k] = z[i,k] = self._bound_pxgivenz(obs[i], k) +# z[i,k] += digamma(self._gamma[k]) +# z[i,k] -= digamma(np.sum(self._gamma)) +# z[i] = lognormalize(z[i]) +# bound[i] = np.sum(z[i]*p) +# return bound, z + +# def _update_gamma(self): +# for i in xrange(self.n_states): +# self._gamma[i] = self.alpha + np.sum(self._z.T[i]) + +# def _initialize_gamma(self): +# self._gamma = self.alpha*np.ones(self.n_states) + + +# def _bound_z(self): +# logprior = 0. +# for i in xrange(self._z.shape[0]): +# for k in xrange(self.n_states): +# logprior += self._z[i,k]*(digamma(self._gamma[k]) +# -digamma(np.sum(self._gamma))) +# logprior -= self._z[i,k]*np.log(self._z[i,k]) +# return logprior + +# def _bound_gamma(self): +# logprior = 0. +# logprior = gammaln(np.sum(self._gamma)) - gammaln(self.n_states*self.alpha) +# for k in xrange(self.n_states): +# logprior -= gammaln(self._gamma[k]) - gammaln(self.alpha) +# logprior += (self._gamma[k]-self.alpha)*(digamma(self._gamma[k]) +# -digamma(np.sum(self._gamma))) +# return logprior