diff --git a/scikits/learn/pyem/Changelog b/scikits/learn/pyem/Changelog index bca9b599c979fd02c0f62c122f685903db518098..0545abf1076ea2761da1634e940a051de37f58b5 100644 --- a/scikits/learn/pyem/Changelog +++ b/scikits/learn/pyem/Changelog @@ -1,3 +1,11 @@ +pyem (0.5.6) Thu, 16 Nov 2006 21:02:02 +0900 + + * correct examples + * correct exceptions msg strings in gauss_mix, which + were buggy + * add examples from website to the package, so that above errors + do not appear again + pyem (0.5.6) Thu, 16 Nov 2006 14:18:19 +0900 * bump to 0.5.6 diff --git a/scikits/learn/pyem/__init__.py b/scikits/learn/pyem/__init__.py index 371e315a96885b4ac4aa451ce1a2f49ae269b92d..ae6d83c73b75d8371cdca32ed5409a668def7372 100644 --- a/scikits/learn/pyem/__init__.py +++ b/scikits/learn/pyem/__init__.py @@ -1,11 +1,12 @@ #! /usr/bin/env python -# Last Change: Fri Oct 20 11:00 AM 2006 J +# Last Change: Thu Nov 16 09:00 PM 2006 J from info import __doc__ from gauss_mix import GmParamError, GM from gmm_em import GmmParamError, GMM, EM from online_em import OnGMM as _OnGMM +import examples as _examples __all__ = filter(lambda s:not s.startswith('_'),dir()) diff --git a/scikits/learn/pyem/basic_example1.py b/scikits/learn/pyem/basic_example1.py new file mode 100644 index 0000000000000000000000000000000000000000..a47e0b935645231a9763aad17c2835e4d4eda33c --- /dev/null +++ b/scikits/learn/pyem/basic_example1.py @@ -0,0 +1,48 @@ +import numpy as N +import pylab as P +from scipy.sandbox.pyem import GM + +#------------------------------ +# Hyper parameters: +# - K: number of clusters +# - d: dimension +k = 3 +d = 2 + +#------------------------------------------------------- +# Values for weights, mean and (diagonal) variances +# - the weights are an array of rank 1 +# - mean is expected to be rank 2 with one row for one component +# - variances are also expteced to be rank 2. For diagonal, one row +# is one diagonal, for full, the first d rows are the first variance, +# etc... In this case, the variance matrix should be k*d rows and d +# colums +w = N.array([0.2, 0.45, 0.35]) +mu = N.array([[4.1, 3], [1, 5], [-2, -3]]) +va = N.array([[1, 1.5], [3, 4], [2, 3.5]]) + +#----------------------------------------- +# First method: directly from parameters: +# Both methods are equivalents. +gm = GM.fromvalues(w, mu, va) + +#------------------------------------- +# Second method to build a GM instance: +gm = GM(d, k, mode = 'diag') +# The set_params checks that w, mu, and va corresponds to k, d and m +gm.set_param(w, mu, va) + +# Once set_params is called, both methods are equivalent. The 2d +# method is useful when using a GM object for learning (where +# the learner class will set the params), whereas the first one +# is useful when there is a need to quickly sample a model +# from existing values, without a need to give the hyper parameters + +# Create a Gaussian Mixture from the parameters, and sample +# 1000 items from it (one row = one 2 dimension sample) +data = gm.sample(1000) + +# Plot the samples +P.plot(data[:, 0], data[:, 1], '.') +# Plot the ellipsoids of confidence with a level a 75 % +gm.plot(level = 0.75) diff --git a/scikits/learn/pyem/basic_example2.py b/scikits/learn/pyem/basic_example2.py new file mode 100644 index 0000000000000000000000000000000000000000..6ff2fc6878021cbf5605aa430539f4d37ebe5933 --- /dev/null +++ b/scikits/learn/pyem/basic_example2.py @@ -0,0 +1,45 @@ +from numpy.random import seed + +from scipy.sandbox.pyem import GM, GMM, EM +import copy + +# To reproduce results, fix the random seed +seed(1) + +#+++++++++++++++++++++++++++++ +# Meta parameters of the model +# - k: Number of components +# - d: dimension of each Gaussian +# - mode: Mode of covariance matrix: full or diag (string) +# - nframes: number of frames (frame = one data point = one +# row of d elements) +k = 2 +d = 2 +mode = 'diag' +nframes = 1e3 + +#+++++++++++++++++++++++++++++++++++++++++++ +# Create an artificial GM model, samples it +#+++++++++++++++++++++++++++++++++++++++++++ +w, mu, va = GM.gen_param(d, k, mode, spread = 1.5) +gm = GM.fromvalues(w, mu, va) + +# Sample nframes frames from the model +data = gm.sample(nframes) + +#++++++++++++++++++++++++ +# Learn the model with EM +#++++++++++++++++++++++++ + +# Create a Model from a Gaussian mixture with kmean initialization +lgm = GM(d, k, mode) +gmm = GMM(lgm, 'kmean') + +# The actual EM, with likelihood computation. The threshold +# is compared to the (linearly appromixated) derivative of the likelihood +em = EM() +like = em.train(data, gmm, maxiter = 30, thresh = 1e-8) + +# The computed parameters are in gmm.gm, which is the same than lgm +# (remember, python does not copy most objects by default). You can for example +# plot lgm against gm to compare diff --git a/scikits/learn/pyem/basic_example3.py b/scikits/learn/pyem/basic_example3.py new file mode 100644 index 0000000000000000000000000000000000000000..d680584dc8674227b88397361bfd511836f066af --- /dev/null +++ b/scikits/learn/pyem/basic_example3.py @@ -0,0 +1,64 @@ +import numpy as N +from numpy.random import seed + +from scipy.sandbox.pyem import GM, GMM, EM +import copy + +seed(2) + +k = 4 +d = 2 +mode = 'diag' +nframes = 1e3 + +#+++++++++++++++++++++++++++++++++++++++++++ +# Create an artificial GMM model, samples it +#+++++++++++++++++++++++++++++++++++++++++++ +w, mu, va = GM.gen_param(d, k, mode, spread = 1.0) +gm = GM.fromvalues(w, mu, va) + +# Sample nframes frames from the model +data = gm.sample(nframes) + +#++++++++++++++++++++++++ +# Learn the model with EM +#++++++++++++++++++++++++ + +# List of learned mixtures lgm[i] is a mixture with i+1 components +lgm = [] +kmax = 6 +bics = N.zeros(kmax) +em = EM() +for i in range(kmax): + lgm.append(GM(d, i+1, mode)) + + gmm = GMM(lgm[i], 'kmean') + em.train(data, gmm, maxiter = 30, thresh = 1e-10) + bics[i] = gmm.bic(data) + +print "Original model has %d clusters, bics says %d" % (k, N.argmax(bics)+1) + +#+++++++++++++++ +# Draw the model +#+++++++++++++++ +import pylab as P +P.subplot(3, 2, 1) + +for k in range(kmax): + P.subplot(3, 2, k+1) + level = 0.9 + P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_') + + # h keeps the handles of the plot, so that you can modify + # its parameters like label or color + h = lgm[k].plot(level = level) + [i.set_color('r') for i in h] + h[0].set_label('EM confidence ellipsoides') + + h = gm.plot(level = level) + [i.set_color('g') for i in h] + h[0].set_label('Real confidence ellipsoides') + +P.legend(loc = 0) +# depending on your configuration, you may have to call P.show() +# to actually display the figure diff --git a/scikits/learn/pyem/example.py b/scikits/learn/pyem/demo1.py similarity index 100% rename from scikits/learn/pyem/example.py rename to scikits/learn/pyem/demo1.py diff --git a/scikits/learn/pyem/example2.py b/scikits/learn/pyem/demo2.py similarity index 100% rename from scikits/learn/pyem/example2.py rename to scikits/learn/pyem/demo2.py diff --git a/scikits/learn/pyem/examples.py b/scikits/learn/pyem/examples.py new file mode 100644 index 0000000000000000000000000000000000000000..0f9a91422a1b4eef6399a63a9a200ad02ace3709 --- /dev/null +++ b/scikits/learn/pyem/examples.py @@ -0,0 +1,14 @@ +def ex1(): + import basic_example1 + +def ex2(): + import basic_example2 + +def ex3(): + import basic_example3 + +if __name__ == '__main__': + ex1() + ex2() + ex3() + diff --git a/scikits/learn/pyem/gauss_mix.py b/scikits/learn/pyem/gauss_mix.py index fdc66be1c47a51c59681c79baf6e186dcbfea6b6..bacdcf7997cf48733f6152e132c156e81eba5cf2 100644 --- a/scikits/learn/pyem/gauss_mix.py +++ b/scikits/learn/pyem/gauss_mix.py @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Thu Nov 09 06:00 PM 2006 J +# Last Change: Thu Nov 16 08:00 PM 2006 J # Module to implement GaussianMixture class. @@ -82,10 +82,10 @@ class GM: k, d, mode = check_gmm_param(weights, mu, sigma) if not k == self.k: raise GmParamError("Number of given components is %d, expected %d" - % (shape(k), shape(self.k))) + % (k, self.k)) if not d == self.d: raise GmParamError("Dimension of the given model is %d, expected %d" - % (shape(d), shape(self.d))) + % (d, self.d)) if not mode == self.mode and not d == 1: raise GmParamError("Given covariance mode is %s, expected %s" % (mode, self.mode)) diff --git a/scikits/learn/pyem/gmm_em.py b/scikits/learn/pyem/gmm_em.py index d5d6aa9f676349315256727764c5681d1c2a0faa..e700c059be3ca27183fdfe7df96b58fa150da5e3 100644 --- a/scikits/learn/pyem/gmm_em.py +++ b/scikits/learn/pyem/gmm_em.py @@ -374,7 +374,7 @@ def multiple_gauss_den(data, mu, va): K = mu.shape[0] n = data.shape[0] - d = data.shape[1] + d = mu.shape[1] y = N.zeros((K, n)) if mu.size == va.size: diff --git a/scikits/learn/pyem/tests/test_examples.py b/scikits/learn/pyem/tests/test_examples.py new file mode 100644 index 0000000000000000000000000000000000000000..708c83bf6e757977bc2373dae929dbf611bca599 --- /dev/null +++ b/scikits/learn/pyem/tests/test_examples.py @@ -0,0 +1,26 @@ +#! /usr/bin/env python +# Last Change: Thu Nov 16 09:00 PM 2006 J + +from numpy.testing import * + +set_package_path() +from pyem.examples import ex1, ex2, ex3 +restore_path() + +# #Optional: +# set_local_path() +# # import modules that are located in the same directory as this file. +# restore_path() + +class test_examples(NumpyTestCase): + def check_ex1(self, level = 5): + ex1() + + def check_ex2(self, level = 5): + ex2() + + def check_ex3(self, level = 5): + ex3() + +if __name__ == "__main__": + NumpyTest().run() diff --git a/scikits/learn/pyem/tests/test_online_em.py b/scikits/learn/pyem/tests/test_online_em.py index ddbe4190b786182276bae78fa05a8d8c2b49b3df..90573e8f72296525b0d1e0c1dc5788edcbeae1c7 100644 --- a/scikits/learn/pyem/tests/test_online_em.py +++ b/scikits/learn/pyem/tests/test_online_em.py @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Mon Oct 23 07:00 PM 2006 J +# Last Change: Thu Nov 16 09:00 PM 2006 J import copy @@ -61,7 +61,7 @@ class test_on_off_eq(OnlineEmTest): self._create_model(d, k, mode, nframes, emiter) self._check(d, k, mode, nframes, emiter) - def check_2d(self, level = 2): + def check_2d(self, level = 1): d = 2 k = 2 mode = 'diag' @@ -72,7 +72,7 @@ class test_on_off_eq(OnlineEmTest): self._create_model(d, k, mode, nframes, emiter) self._check(d, k, mode, nframes, emiter) - def check_5d(self, level = 2): + def check_5d(self, level = 5): d = 5 k = 2 mode = 'diag'