diff --git a/examples/mixture/gmm_model_selection.py b/examples/mixture/gmm_model_selection.py index 8ea0ec61f2b5700b0b625a460602ce2f727376cd..cae9e3d06ff5eba47f6b2ce18081e31c1021ed98 100644 --- a/examples/mixture/gmm_model_selection.py +++ b/examples/mixture/gmm_model_selection.py @@ -10,7 +10,12 @@ and the number of components in the model. In that case, AIC also provides the right result (not shown to save time), but BIC is better suited if the problem is to identify the right model. Unlike Bayesian procedures, such inferences are prior-free. + +In that case, the full model with 2 componnets +(which corresponds to the true generative model) is selected. """ +print __doc__ + import itertools import numpy as np @@ -32,7 +37,7 @@ X = np.r_[np.dot(np.random.randn(n_samples, 2), C), lowest_bic = np.infty bic = [] n_components_range = range(1, 7) -cv_types = ['spherical', 'diag', 'tied', 'full'] +cv_types = ['spherical', 'tied', 'diag', 'full'] for cv_type in cv_types: for n_components in n_components_range: # Fit a mixture of gaussians with EM @@ -58,6 +63,9 @@ for i, (cv_type, color) in enumerate(zip(cv_types, color_iter)): pl.xticks(n_components_range) pl.ylim([bic.min() * 1.01 - .01 * bic.max(), bic.max()]) pl.title('BIC score per model') +xpos = np.mod(bic.argmin(), len(n_components_range)) + .65 +\ + .2 * np.floor(bic.argmin() / len(n_components_range)) +pl.text(xpos, bic.min() * 0.97 + .03 * bic.max(), '*', fontsize=14) spl.set_xlabel('Number of components') spl.legend([b[0] for b in bars], cv_types) @@ -84,6 +92,6 @@ pl.xlim(-10, 10) pl.ylim(-3, 6) pl.xticks(()) pl.yticks(()) -pl.title('Selected GMM') +pl.title('Selected GMM: full model, 2 components') pl.subplots_adjust(hspace=.35, bottom=.02) pl.show() diff --git a/examples/mixture/plot_gmm_classifier.py b/examples/mixture/plot_gmm_classifier.py index 835df829f69a99fa64e09d608053d255fb80aa31..b7bb63d37f8812c020489cee405e2f18d2e5a542 100644 --- a/examples/mixture/plot_gmm_classifier.py +++ b/examples/mixture/plot_gmm_classifier.py @@ -42,7 +42,7 @@ def make_ellipses(gmm, ax): angle = np.arctan2(u[1], u[0]) angle = 180 * angle / np.pi # convert to degrees v *= 9 - ell = mpl.patches.Ellipse(gmm._get_means()[n, :2], v[0], v[1], + ell = mpl.patches.Ellipse(gmm._get_means()[n, :2], v[0], v[1], 180 + angle, color=color) ell.set_clip_box(ax.bbox) ell.set_alpha(0.5) @@ -78,9 +78,9 @@ pl.subplots_adjust(bottom=.01, top=0.95, hspace=.15, wspace=.05, for index, (name, classifier) in enumerate(classifiers.iteritems()): # Since we have class labels for the training data, we can # initialize the GMM parameters in a supervised manner. - classifier.means_ = np.array([X_train[y_train == i, :].mean(axis=0) + classifier.means_ = np.array([X_train[y_train == i].mean(axis=0) for i in xrange(n_classes)]) - + # Train the other parameters using the EM algorithm. classifier.fit(X_train, init_params='wc', n_iter=20)