From f00bdfa5b06daca830a04c4628c00d7b2840e18e Mon Sep 17 00:00:00 2001 From: Fabian Pedregosa <fabian.pedregosa@inria.fr> Date: Tue, 23 Nov 2010 14:34:08 +0100 Subject: [PATCH] Rename scikits.learn.gmm to scikits.learn.mixture. In the future mixture will become a directory, but for now there are just GMMs. --- doc/modules/{gmm.rst => mixture.rst} | 9 ++--- doc/unsupervised_learning.rst | 2 +- examples/{gmm => mixture}/README.txt | 0 examples/{gmm => mixture}/plot_gmm.py | 4 +-- .../{gmm => mixture}/plot_gmm_classifier.py | 2 +- examples/{gmm => mixture}/plot_gmm_pdf.py | 4 +-- scikits/learn/__init__.py | 1 - scikits/learn/hmm.py | 3 +- scikits/learn/{gmm.py => mixture.py} | 33 +++++++++-------- scikits/learn/tests/test_hmm.py | 6 ++-- .../tests/{test_gmm.py => test_mixture.py} | 36 +++++++++---------- 11 files changed, 49 insertions(+), 51 deletions(-) rename doc/modules/{gmm.rst => mixture.rst} (82%) rename examples/{gmm => mixture}/README.txt (100%) rename examples/{gmm => mixture}/plot_gmm.py (93%) rename examples/{gmm => mixture}/plot_gmm_classifier.py (98%) rename examples/{gmm => mixture}/plot_gmm_pdf.py (91%) rename scikits/learn/{gmm.py => mixture.py} (97%) rename scikits/learn/tests/{test_gmm.py => test_mixture.py} (88%) diff --git a/doc/modules/gmm.rst b/doc/modules/mixture.rst similarity index 82% rename from doc/modules/gmm.rst rename to doc/modules/mixture.rst index 74a610e34e..b29f2dc279 100644 --- a/doc/modules/gmm.rst +++ b/doc/modules/mixture.rst @@ -4,15 +4,16 @@ Gaussian mixture models =================================================== -`scikits.learn.gmm` is a package which enables to create Gaussian -Mixture Models (diagonal, spherical, tied and full covariance matrices +`scikits.learn.mixture` is a package which enables to create Mixture +Models (diagonal, spherical, tied and full covariance matrices supported), to sample them, and to estimate them from data using Expectation Maximization algorithm. It can also draw confidence ellipsoides for multivariate models, and compute the Bayesian Information Criterion to assess the number of clusters in the data. -Gaussian Mixture Models (GMM) are a class of probabilistic models -describing the data as drawn from a mixture of Gaussian probability +For the moment, only Gaussian Mixture Models (GMM) are +implemented. These are a class of probabilistic models describing the +data as drawn from a mixture of Gaussian probability distributions. The challenge that is GMM tackles is to learn the parameters of these Gaussians from the data. diff --git a/doc/unsupervised_learning.rst b/doc/unsupervised_learning.rst index 0a740079be..6767f216c0 100644 --- a/doc/unsupervised_learning.rst +++ b/doc/unsupervised_learning.rst @@ -7,7 +7,7 @@ Unsupervised learning .. toctree:: - modules/gmm + modules/mixture modules/clustering modules/decompositions diff --git a/examples/gmm/README.txt b/examples/mixture/README.txt similarity index 100% rename from examples/gmm/README.txt rename to examples/mixture/README.txt diff --git a/examples/gmm/plot_gmm.py b/examples/mixture/plot_gmm.py similarity index 93% rename from examples/gmm/plot_gmm.py rename to examples/mixture/plot_gmm.py index 0f6ec31736..80e1c6fbe8 100644 --- a/examples/gmm/plot_gmm.py +++ b/examples/mixture/plot_gmm.py @@ -7,7 +7,7 @@ Plot the confidence ellipsoids of a mixture of two gaussians. """ import numpy as np -from scikits.learn import gmm +from scikits.learn import mixture import itertools import pylab as pl @@ -21,7 +21,7 @@ C = np.array([[0., -0.7], [3.5, .7]]) X = np.r_[np.dot(np.random.randn(n, 2), C), np.random.randn(n, 2) + np.array([3, 3])] -clf = gmm.GMM(n_states=2, cvtype='full') +clf = mixture.GMM(n_states=2, cvtype='full') clf.fit(X) splot = pl.subplot(111, aspect='equal') diff --git a/examples/gmm/plot_gmm_classifier.py b/examples/mixture/plot_gmm_classifier.py similarity index 98% rename from examples/gmm/plot_gmm_classifier.py rename to examples/mixture/plot_gmm_classifier.py index edf7d12e2d..96ac45790d 100644 --- a/examples/gmm/plot_gmm_classifier.py +++ b/examples/mixture/plot_gmm_classifier.py @@ -32,7 +32,7 @@ import numpy as np from scikits.learn import datasets from scikits.learn.cross_val import StratifiedKFold -from scikits.learn.gmm import GMM +from scikits.learn.mixture import GMM def make_ellipses(gmm, ax): for n, color in enumerate('rgb'): diff --git a/examples/gmm/plot_gmm_pdf.py b/examples/mixture/plot_gmm_pdf.py similarity index 91% rename from examples/gmm/plot_gmm_pdf.py rename to examples/mixture/plot_gmm_pdf.py index cec1ec0df7..2a535cd37c 100644 --- a/examples/gmm/plot_gmm_pdf.py +++ b/examples/mixture/plot_gmm_pdf.py @@ -10,7 +10,7 @@ matrices. import numpy as np import pylab as pl -from scikits.learn import gmm +from scikits.learn import mixture n_samples = 300 @@ -20,7 +20,7 @@ C = np.array([[0., -0.7], [3.5, .7]]) X_train = np.r_[np.dot(np.random.randn(n_samples, 2), C), np.random.randn(n_samples, 2) + np.array([20, 20])] -clf = gmm.GMM(n_states=2, cvtype='full') +clf = mixture.GMM(n_states=2, cvtype='full') clf.fit(X_train) x = np.linspace(-20.0, 30.0) diff --git a/scikits/learn/__init__.py b/scikits/learn/__init__.py index cd4b9d65b9..6fca884796 100644 --- a/scikits/learn/__init__.py +++ b/scikits/learn/__init__.py @@ -19,7 +19,6 @@ from . import ball_tree from . import cluster from . import covariance from . import datasets -from . import gmm from . import glm from . import lda from . import metrics diff --git a/scikits/learn/hmm.py b/scikits/learn/hmm.py index 41a432ae32..0c40af99a7 100644 --- a/scikits/learn/hmm.py +++ b/scikits/learn/hmm.py @@ -7,10 +7,9 @@ import string import numpy as np from .base import BaseEstimator -from .gmm import (GMM, lmvnpdf, logsum, normalize, sample_gaussian, +from .mixture import (GMM, lmvnpdf, logsum, normalize, sample_gaussian, _distribute_covar_matrix_to_match_cvtype, _validate_covars) from . import cluster - ZEROLOGPROB = -1e200 diff --git a/scikits/learn/gmm.py b/scikits/learn/mixture.py similarity index 97% rename from scikits/learn/gmm.py rename to scikits/learn/mixture.py index dbf205367e..47247c3c69 100644 --- a/scikits/learn/gmm.py +++ b/scikits/learn/mixture.py @@ -128,6 +128,20 @@ class GMM(BaseEstimator): This class allows for easy evaluation of, sampling from, and maximum-likelihood estimation of the parameters of a GMM distribution. + Initializes parameters such that every mixture component has zero + mean and identity covariance. + + + Parameters + ---------- + n_states : int + Number of mixture components. + cvtype : string (read-only) + String describing the type of covariance parameters to + use. Must be one of 'spherical', 'tied', 'diag', 'full'. + Defaults to 'diag'. + + Attributes ---------- cvtype : string (read-only) @@ -169,8 +183,8 @@ class GMM(BaseEstimator): Examples -------- >>> import numpy as np - >>> from scikits.learn.gmm import GMM - >>> g = GMM(n_states=2) + >>> from scikits.learn import mixture + >>> g = mixture.GMM(n_states=2) >>> # Generate random observations with two modes centered on 0 >>> # and 10 to use for training. @@ -209,21 +223,6 @@ class GMM(BaseEstimator): """ def __init__(self, n_states=1, cvtype='diag'): - """Create a Gaussian mixture model - - Initializes parameters such that every mixture component has - zero mean and identity covariance. - - Parameters - ---------- - n_states : int - Number of mixture components. - cvtype : string (read-only) - String describing the type of covariance parameters to - use. Must be one of 'spherical', 'tied', 'diag', 'full'. - Defaults to 'diag'. - """ - self._n_states = n_states self._cvtype = cvtype diff --git a/scikits/learn/tests/test_hmm.py b/scikits/learn/tests/test_hmm.py index 1b63253a48..cc77358beb 100644 --- a/scikits/learn/tests/test_hmm.py +++ b/scikits/learn/tests/test_hmm.py @@ -3,7 +3,7 @@ import numpy as np from numpy.testing import assert_array_equal, assert_array_almost_equal from unittest import TestCase -from .test_gmm import _generate_random_spd_matrix +from .test_mixture import _generate_random_spd_matrix from .. import hmm @@ -512,9 +512,9 @@ class GMMHMMParams(object): @staticmethod def create_random_gmm(n_mix, n_features, cvtype): - from scikits.learn import gmm + from scikits.learn import mixture - g = gmm.GMM(n_mix, cvtype=cvtype) + g = mixture.GMM(n_mix, cvtype=cvtype) g.means = np.random.randint(-20, 20, (n_mix, n_features)) mincv = 0.1 g.covars = {'spherical': (mincv diff --git a/scikits/learn/tests/test_gmm.py b/scikits/learn/tests/test_mixture.py similarity index 88% rename from scikits/learn/tests/test_gmm.py rename to scikits/learn/tests/test_mixture.py index fd2089ca84..e7e294246d 100644 --- a/scikits/learn/tests/test_gmm.py +++ b/scikits/learn/tests/test_mixture.py @@ -7,7 +7,7 @@ from numpy.testing import assert_array_equal, assert_array_almost_equal, \ import numpy as np from scipy import stats -from scikits.learn import gmm +from scikits.learn import mixture np.random.seed(0) @@ -23,7 +23,7 @@ def _generate_random_spd_matrix(ndim): def test_logsum_1D(): A = np.random.rand(2) + 1.0 for axis in range(1): - Asum = gmm.logsum(A, axis) + Asum = mixture.logsum(A, axis) assert_array_almost_equal(np.exp(Asum), np.sum(np.exp(A), axis)) @@ -33,27 +33,27 @@ def test_logsum_3D(): """ A = np.random.rand(2, 2, 2) + 1.0 for axis in range(3): - Asum = gmm.logsum(A, axis) + Asum = mixture.logsum(A, axis) assert_array_almost_equal(np.exp(Asum), np.sum(np.exp(A), axis)) def test_normalize_1D(): A = np.random.rand(2) + 1.0 for axis in range(1): - Anorm = gmm.normalize(A, axis) + Anorm = mixture.normalize(A, axis) assert np.all(np.allclose(Anorm.sum(axis), 1.0)) def test_normalize_3D(): A = np.random.rand(2, 2, 2) + 1.0 for axis in range(3): - Anorm = gmm.normalize(A, axis) + Anorm = mixture.normalize(A, axis) assert np.all(np.allclose(Anorm.sum(axis), 1.0)) def test_sample_gaussian(): """ - Test sample generation from gmm.sample_gaussian where covariance + Test sample generation from mixture.sample_gaussian where covariance is diagonal, spherical and full """ @@ -62,14 +62,14 @@ def test_sample_gaussian(): mu = np.random.randint(10) * np.random.rand(n_features) cv = (np.random.rand(n_features) + 1.0) ** 2 - samples = gmm.sample_gaussian(mu, cv, cvtype='diag', n=n_samples) + samples = mixture.sample_gaussian(mu, cv, cvtype='diag', n=n_samples) assert np.allclose(samples.mean(axis), mu, atol=0.3) assert np.allclose(samples.var(axis), cv, atol=0.5) # the same for spherical covariances cv = (np.random.rand() + 1.0) ** 2 - samples = gmm.sample_gaussian(mu, cv, cvtype='spherical', n=n_samples) + samples = mixture.sample_gaussian(mu, cv, cvtype='spherical', n=n_samples) assert np.allclose(samples.mean(axis), mu, atol=0.3) assert np.allclose(samples.var(axis), np.repeat(cv, n_features), atol=0.5) @@ -77,7 +77,7 @@ def test_sample_gaussian(): # and for full covariances A = np.random.randn(n_features, n_features) cv = np.dot(A.T, A) + np.eye(n_features) - samples = gmm.sample_gaussian(mu, cv, cvtype='full', n=n_samples) + samples = mixture.sample_gaussian(mu, cv, cvtype='full', n=n_samples) assert np.allclose(samples.mean(axis), mu, atol=0.3) assert np.allclose(np.cov(samples), cv, atol=0.7) @@ -94,7 +94,7 @@ def _naive_lmvnpdf_diag(obs, mu, cv): def test_lmvnpdf_diag(): """ test a slow and naive implementation of lmvnpdf and - compare it to the vectorized version (gmm.lmvnpdf) to test + compare it to the vectorized version (mixture.lmvnpdf) to test for correctness """ n_features, n_states, n_obs = 2, 3, 10 @@ -103,7 +103,7 @@ def test_lmvnpdf_diag(): obs = np.random.randint(10) * np.random.rand(n_obs, n_features) ref = _naive_lmvnpdf_diag(obs, mu, cv) - lpr = gmm.lmvnpdf(obs, mu, cv, 'diag') + lpr = mixture.lmvnpdf(obs, mu, cv, 'diag') assert_array_almost_equal(lpr, ref) @@ -116,7 +116,7 @@ def test_lmvnpdf_spherical(): cv = np.tile(spherecv, (n_features, 1)) reference = _naive_lmvnpdf_diag(obs, mu, cv) - lpr = gmm.lmvnpdf(obs, mu, spherecv, 'spherical') + lpr = mixture.lmvnpdf(obs, mu, spherecv, 'spherical') assert_array_almost_equal(lpr, reference) @@ -130,14 +130,14 @@ def test_lmvnpdf_full(): fullcv = np.array([np.diag(x) for x in cv]) reference = _naive_lmvnpdf_diag(obs, mu, cv) - lpr = gmm.lmvnpdf(obs, mu, fullcv, 'full') + lpr = mixture.lmvnpdf(obs, mu, fullcv, 'full') assert_array_almost_equal(lpr, reference) def test_GMM_attributes(): n_states, n_features = 10, 4 cvtype = 'diag' - g = gmm.GMM(n_states, cvtype) + g = mixture.GMM(n_states, cvtype) weights = np.random.rand(n_states) weights = weights / weights.sum() means = np.random.randint(-20, 20, (n_states, n_features)) @@ -166,7 +166,7 @@ def test_GMM_attributes(): assert_raises(ValueError, g.__setattr__, 'covars', np.zeros((n_states - 2, n_features))) - assert_raises(ValueError, gmm.GMM, n_states=20, cvtype='badcvtype') + assert_raises(ValueError, mixture.GMM, n_states=20, cvtype='badcvtype') class GMMTester(): @@ -183,7 +183,7 @@ class GMMTester(): for x in xrange(n_states)])} def test_eval(self): - g = gmm.GMM(self.n_states, self.cvtype) + g = mixture.GMM(self.n_states, self.cvtype) # Make sure the means are far apart so posteriors.argmax() # picks the actual component used to generate the observations. g.means = 20 * self.means @@ -202,7 +202,7 @@ class GMMTester(): assert_array_equal(posteriors.argmax(axis=1), gaussidx) def test_rvs(self, n=100): - g = gmm.GMM(self.n_states, self.cvtype) + g = mixture.GMM(self.n_states, self.cvtype) # Make sure the means are far apart so posteriors.argmax() # picks the actual component used to generate the observations. g.means = 20 * self.means @@ -213,7 +213,7 @@ class GMMTester(): self.assertEquals(samples.shape, (n, self.n_features)) def test_train(self, params='wmc'): - g = gmm.GMM(self.n_states, self.cvtype) + g = mixture.GMM(self.n_states, self.cvtype) g.weights = self.weights g.means = self.means g._covars = 20 * self.covars[self.cvtype] -- GitLab