From 7b45bd97e62c75031fa8c04ec69f11b1f4f37e3c Mon Sep 17 00:00:00 2001
From: Gael Varoquaux <gael.varoquaux@normalesup.org>
Date: Fri, 23 Apr 2010 05:41:43 +0000
Subject: [PATCH] DOC: Moving the em examples. Doing this in two times, to
 avoid what looks like bugs in the svn sourceforge serve.

git-svn-id: https://scikit-learn.svn.sourceforge.net/svnroot/scikit-learn/trunk@699 22fbfee3-77ab-4535-9bad-27d1bd3bc7d8
---
 scikits/learn/em/examples/__init__.py         |   0
 scikits/learn/em/examples/basic_example1.py   |  48 -------
 scikits/learn/em/examples/basic_example2.py   |  45 -------
 scikits/learn/em/examples/basic_example3.py   |  64 ---------
 .../em/examples/discriminant_analysis.py      | 127 ------------------
 scikits/learn/em/examples/examples.py         |  19 ---
 scikits/learn/em/examples/expectation.py      |  42 ------
 scikits/learn/em/examples/pdfestimation.py    |  48 -------
 scikits/learn/em/examples/pdfestimation1d.py  |  81 -----------
 scikits/learn/em/examples/plotexamples.py     |  39 ------
 .../learn/em/examples/regularized_example.py  |  69 ----------
 scikits/learn/em/examples/utils.py            |  47 -------
 12 files changed, 629 deletions(-)
 delete mode 100644 scikits/learn/em/examples/__init__.py
 delete mode 100644 scikits/learn/em/examples/basic_example1.py
 delete mode 100644 scikits/learn/em/examples/basic_example2.py
 delete mode 100644 scikits/learn/em/examples/basic_example3.py
 delete mode 100644 scikits/learn/em/examples/discriminant_analysis.py
 delete mode 100644 scikits/learn/em/examples/examples.py
 delete mode 100644 scikits/learn/em/examples/expectation.py
 delete mode 100644 scikits/learn/em/examples/pdfestimation.py
 delete mode 100644 scikits/learn/em/examples/pdfestimation1d.py
 delete mode 100644 scikits/learn/em/examples/plotexamples.py
 delete mode 100644 scikits/learn/em/examples/regularized_example.py
 delete mode 100644 scikits/learn/em/examples/utils.py

diff --git a/scikits/learn/em/examples/__init__.py b/scikits/learn/em/examples/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/scikits/learn/em/examples/basic_example1.py b/scikits/learn/em/examples/basic_example1.py
deleted file mode 100644
index 3b32fff118..0000000000
--- a/scikits/learn/em/examples/basic_example1.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import numpy as N
-import pylab as P
-from scikits.learn.em import GM
-
-#------------------------------
-# Hyper parameters:
-#   - K:    number of clusters
-#   - d:    dimension
-k   = 3
-d   = 2
-
-#-------------------------------------------------------
-# Values for weights, mean and (diagonal) variances
-#   - the weights are an array of rank 1
-#   - mean is expected to be rank 2 with one row for one component
-#   - variances are also expteced to be rank 2. For diagonal, one row
-#   is one diagonal, for full, the first d rows are the first variance,
-#   etc... In this case, the variance matrix should be k*d rows and d 
-#   colums
-w   = N.array([0.2, 0.45, 0.35])
-mu  = N.array([[4.1, 3], [1, 5], [-2, -3]])
-va  = N.array([[1, 1.5], [3, 4], [2, 3.5]])
-
-#-----------------------------------------
-# First method: directly from parameters:
-# Both methods are equivalents.
-gm      = GM.fromvalues(w, mu, va)
-
-#-------------------------------------
-# Second method to build a GM instance:
-gm      = GM(d, k, mode = 'diag')
-# The set_params checks that w, mu, and va corresponds to k, d and m
-gm.set_param(w, mu, va)
-
-# Once set_params is called, both methods are equivalent. The 2d
-# method is useful when using a GM object for learning (where
-# the learner class will set the params), whereas the first one
-# is useful when there is a need to quickly sample a model
-# from existing values, without a need to give the hyper parameters
-
-# Create a Gaussian Mixture from the parameters, and sample
-# 1000 items from it (one row = one 2 dimension sample)
-data    = gm.sample(1000)
-
-# Plot the samples
-P.plot(data[:, 0], data[:, 1], '.')
-# Plot the ellipsoids of confidence with a level a 75 %
-gm.plot(level = 0.75)
diff --git a/scikits/learn/em/examples/basic_example2.py b/scikits/learn/em/examples/basic_example2.py
deleted file mode 100644
index 941570c8e4..0000000000
--- a/scikits/learn/em/examples/basic_example2.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from numpy.random import seed
-
-from scikits.learn.em import GM, GMM, EM
-import copy
-
-# To reproduce results, fix the random seed
-seed(1)
-
-#+++++++++++++++++++++++++++++
-# Meta parameters of the model
-#   - k: Number of components
-#   - d: dimension of each Gaussian
-#   - mode: Mode of covariance matrix: full or diag (string)
-#   - nframes: number of frames (frame = one data point = one
-#   row of d elements)
-k       = 2
-d       = 2
-mode    = 'diag'
-nframes = 1e3
-
-#+++++++++++++++++++++++++++++++++++++++++++
-# Create an artificial GM model, samples it
-#+++++++++++++++++++++++++++++++++++++++++++
-w, mu, va   = GM.gen_param(d, k, mode, spread = 1.5)
-gm          = GM.fromvalues(w, mu, va)
-
-# Sample nframes frames  from the model
-data    = gm.sample(nframes)
-
-#++++++++++++++++++++++++
-# Learn the model with EM
-#++++++++++++++++++++++++
-
-# Create a Model from a Gaussian mixture with kmean initialization
-lgm = GM(d, k, mode)
-gmm = GMM(lgm, 'kmean')
-
-# The actual EM, with likelihood computation. The threshold
-# is compared to the (linearly appromixated) derivative of the likelihood
-em      = EM()
-like    = em.train(data, gmm, maxiter = 30, thresh = 1e-8)
-
-# The computed parameters are in gmm.gm, which is the same than lgm
-# (remember, python does not copy most objects by default). You can for example
-# plot lgm against gm to compare
diff --git a/scikits/learn/em/examples/basic_example3.py b/scikits/learn/em/examples/basic_example3.py
deleted file mode 100644
index 11c19f1502..0000000000
--- a/scikits/learn/em/examples/basic_example3.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import numpy as N
-from numpy.random import seed
-
-from scikits.learn.em import GM, GMM, EM
-import copy
-
-seed(2)
-
-k       = 4
-d       = 2
-mode    = 'diag'
-nframes = 1e3
-
-#+++++++++++++++++++++++++++++++++++++++++++
-# Create an artificial GMM model, samples it
-#+++++++++++++++++++++++++++++++++++++++++++
-w, mu, va   = GM.gen_param(d, k, mode, spread = 1.0)
-gm          = GM.fromvalues(w, mu, va)
-
-# Sample nframes frames  from the model
-data    = gm.sample(nframes)
-
-#++++++++++++++++++++++++
-# Learn the model with EM
-#++++++++++++++++++++++++
-
-# List of learned mixtures lgm[i] is a mixture with i+1 components
-lgm     = []
-kmax    = 6
-bics    = N.zeros(kmax)
-em      = EM()
-for i in range(kmax):
-    lgm.append(GM(d, i+1, mode))
-
-    gmm = GMM(lgm[i], 'kmean')
-    em.train(data, gmm, maxiter = 30, thresh = 1e-10)
-    bics[i] = gmm.bic(data)
-
-print "Original model has %d clusters, bics says %d" % (k, N.argmax(bics)+1)
-
-#+++++++++++++++
-# Draw the model
-#+++++++++++++++
-import pylab as P
-P.subplot(3, 2, 1)
-
-for k in range(kmax):
-    P.subplot(3, 2, k+1)
-    level   = 0.9
-    P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
-
-    # h keeps the handles of the plot, so that you can modify 
-    # its parameters like label or color
-    h   = lgm[k].plot(level = level)
-    [i.set_color('r') for i in h]
-    h[0].set_label('EM confidence ellipsoides')
-
-    h   = gm.plot(level = level)
-    [i.set_color('g') for i in h]
-    h[0].set_label('Real confidence ellipsoides')
-
-P.legend(loc = 0)
-# depending on your configuration, you may have to call P.show() 
-# to actually display the figure
diff --git a/scikits/learn/em/examples/discriminant_analysis.py b/scikits/learn/em/examples/discriminant_analysis.py
deleted file mode 100644
index 69f59747b0..0000000000
--- a/scikits/learn/em/examples/discriminant_analysis.py
+++ /dev/null
@@ -1,127 +0,0 @@
-#! /usr/bin/env python
-# Last Change: Sun Jul 22 12:00 PM 2007 J
-
-__doc__ = """Example of doing classification with mixture of Gaussian. Note
-that this is really a toy example: we do not use testing testset nor cross
-validation.
-
-We use the famous iris database used by Sir R.A. Fisher. You can try to change
-the attributes used for classification, number of components used for the
-mixtures, etc..."""
-
-import numpy as N
-import pylab as P
-import matplotlib as MPL
-
-from scikits.learn.em import EM, GMM, GM
-import utils
-
-data = utils.iris.load()
-# cnames are the class names
-cnames = data.keys()
-
-#--------------------
-# Data pre processing
-#--------------------
-# we use 25 samples of each class (eg half of iris), for
-# learning, and the other half for testing. We use sepal width and petal width
-# only
-ln = 25
-tn = 25
-xdata = {}
-ydata = {}
-# learning data
-ldata = {}
-
-# you can change here the used attributes (sepal vs petal, width vs height)
-for i in cnames:
-    xdata[i] = data[i]['sepal width']
-    ydata[i] = data[i]['petal width']
-    ldata[i] = N.concatenate((xdata[i][:ln, N.newaxis], 
-                              ydata[i][:ln, N.newaxis]), 
-                             axis = 1)
-
-tx = N.concatenate([xdata[i][ln:] for i in cnames])
-ty = N.concatenate([ydata[i][ln:] for i in cnames])
-tdata = N.concatenate((tx[:, N.newaxis], ty[:, N.newaxis]), axis = 1)
-
-# tclass makes the correspondance class <-> index in the testing data tdata
-tclass = {}
-for i in range(3):
-    tclass[cnames[i]] = N.arange(tn * i, tn * (i+1))
-
-#----------------------------
-# Learning and classification
-#----------------------------
-# This function train a mixture model with k components
-def cluster(data, k, mode = 'full'):
-    d = data.shape[1]
-    gm = GM(d, k, mode)
-    gmm = GMM(gm)
-    em = EM()
-    em.train(data, gmm, maxiter = 20)
-    return gm
-
-# Estimate each class with a mixture of nc components
-nc = 2
-mode = 'diag'
-lmod = {}
-for i in cnames:
-    lmod[i] = cluster(ldata[i], nc, mode)
-
-# Classifiy the testing data. Of course, the data are not really IID, because
-# we did not shuffle the testing data, but in this case, this does not change
-# anything.
-p = N.empty((len(tdata), 3))
-for i in range(3):
-    # For each class, computes the likelihood for the testing data
-    p[:, i] = lmod[cnames[i]].pdf(tdata)
-
-# We then take the Maximum A Posteriori class (same than most likely model in
-# this case, since each class is equiprobable)
-cid = N.argmax(p, 1)
-classification = {}
-for i in range(3):
-    classification[cnames[i]] = N.where(cid == i)[0]
-
-correct = {}
-incorrect = {}
-for i in cnames:
-    correct[i] = N.intersect1d(classification[i], tclass[i])
-    incorrect[i] = N.setdiff1d(classification[i], tclass[i])
-
-#-----------------
-# Plot the results
-#-----------------
-csym = {'setosa' : 's', 'versicolor' : 'x', 'virginica' : 'o'}
-r = 50.
-P.figure(figsize = [600/r, 400/r])
-
-prop = MPL.font_manager.FontProperties(size='smaller')
-
-# Plot the learning data with the mixtures
-P.subplot(2, 1, 1)
-for i in lmod.values():
-    #i.plot()
-    X, Y, Z, V = i.density_on_grid()
-    P.contourf(X, Y, Z, V)
-
-for i in cnames:
-    P.plot(ldata[i][:, 0], ldata[i][:, 1], csym[i], label = i + ' (learning)')
-P.xlabel('sepal width')
-P.ylabel('petal width')
-P.legend(loc = 'best')
-
-# Plot the results on test dataset (green for correctly classified, red for
-# incorrectly classified)
-P.subplot(2, 1, 2)
-for i in cnames:
-    P.plot(tx[correct[i]], ty[correct[i]], 'g' + csym[i], 
-           label = '%s (correctly classified)' % i)
-    if len(incorrect[i]) > 0:
-        P.plot(tx[incorrect[i]], ty[incorrect[i]], 'r' + csym[i], 
-               label = '%s (incorrectly classified)' % i)
-P.legend(loc = 'best', prop = prop)
-P.xlabel('sepal width')
-P.ylabel('petal width')
-P.savefig('dclass.png', dpi = 60)
diff --git a/scikits/learn/em/examples/examples.py b/scikits/learn/em/examples/examples.py
deleted file mode 100644
index c176f76d21..0000000000
--- a/scikits/learn/em/examples/examples.py
+++ /dev/null
@@ -1,19 +0,0 @@
-def ex1():
-    import basic_example1
-
-def ex2():
-    import basic_example2
-
-def ex3():
-    import basic_example3
-
-def pdfestim():
-    import pdfestimation
-
-def pdfestim1d():
-    import pdfestimation1d
-
-if __name__ == '__main__':
-    ex1()
-    ex2()
-    ex3()
diff --git a/scikits/learn/em/examples/expectation.py b/scikits/learn/em/examples/expectation.py
deleted file mode 100644
index 274fc0b242..0000000000
--- a/scikits/learn/em/examples/expectation.py
+++ /dev/null
@@ -1,42 +0,0 @@
-"""
-Expectation maximation example
-"""
-
-import pylab
-import numpy as np
-from scikits.learn.em.densities2 import gauss_ell
-
-#=========================================
-# Test plotting a simple diag 2d variance:
-#=========================================
-va  = np.array([5, 3])
-mu  = np.array([2, 3])
-
-# Generate a multivariate gaussian of mean mu and covariance va
-X       = np.random.randn(2, 1e3)
-Yc      = np.dot(np.diag(np.sqrt(va)), X)
-Yc      = Yc.transpose() + mu
-
-# Plotting
-Xe, Ye  = gauss_ell(mu, va, npoints = 100)
-pylab.figure()
-pylab.plot(Yc[:, 0], Yc[:, 1], '.')
-pylab.plot(Xe, Ye, 'r')
-
-#=========================================
-# Test plotting a simple full 2d variance:
-#=========================================
-va  = np.array([[0.2, 0.1],[0.1, 0.5]])
-mu  = np.array([0, 3])
-
-# Generate a multivariate gaussian of mean mu and covariance va
-X       = np.random.randn(1e3, 2)
-Yc      = np.dot(np.linalg.cholesky(va), X.transpose())
-Yc      = Yc.transpose() + mu
-
-# Plotting
-Xe, Ye  = gauss_ell(mu, va, npoints = 100, level=0.95)
-pylab.figure()
-pylab.plot(Yc[:, 0], Yc[:, 1], '.')
-pylab.plot(Xe, Ye, 'r')
-pylab.show()
diff --git a/scikits/learn/em/examples/pdfestimation.py b/scikits/learn/em/examples/pdfestimation.py
deleted file mode 100644
index 3fd33d3568..0000000000
--- a/scikits/learn/em/examples/pdfestimation.py
+++ /dev/null
@@ -1,48 +0,0 @@
-#! /usr/bin/env python
-# Last Change: Sun Jul 22 12:00 PM 2007 J
-
-# Example of doing pdf estimation with EM algorithm. Requires matplotlib.
-import numpy as N
-import pylab as P
-
-from scikits.learn.em import EM, GM, GMM
-import utils
-
-oldfaithful = utils.get_faithful()
-
-# We want the relationship between d(t) and w(t+1), but get_faithful gives
-# d(t), w(t), so we have to shift to get the "usual" faithful data
-waiting = oldfaithful[1:, 1:]
-duration = oldfaithful[:len(waiting), :1]
-dt = N.concatenate((duration, waiting), 1)
-
-# Scale the data so that each component is in [0..1]
-dt = utils.scale(dt)
-
-# This function train a mixture model with k components, returns the trained
-# model and the BIC
-def cluster(data, k, mode = 'full'):
-    d = data.shape[1]
-    gm = GM(d, k, mode)
-    gmm = GMM(gm)
-    em = EM()
-    em.train(data, gmm, maxiter = 20)
-    return gm, gmm.bic(data)
-
-# bc will contain a list of BIC values for each model trained
-bc = []
-mode = 'full'
-P.figure()
-for k in range(1, 5):
-    # Train a model of k component, and plots isodensity curve
-    P.subplot(2, 2, k)
-    gm, b = cluster(dt, k = k, mode = mode)
-    bc.append(b)
-
-    X, Y, Z, V = gm.density_on_grid()
-    P.contour(X, Y, Z, V)
-    P.plot(dt[:, 0], dt[:, 1], '.')
-    P.xlabel('duration time (scaled)')
-    P.ylabel('waiting time (scaled)')
-
-print "According to the BIC, model with %d components is better" % (N.argmax(bc) + 1)
diff --git a/scikits/learn/em/examples/pdfestimation1d.py b/scikits/learn/em/examples/pdfestimation1d.py
deleted file mode 100644
index 981236afa6..0000000000
--- a/scikits/learn/em/examples/pdfestimation1d.py
+++ /dev/null
@@ -1,81 +0,0 @@
-#! /usr/bin/env python
-# Last Change: Sun Jul 22 12:00 PM 2007 J
-
-__doc__ = """This example shows how to do pdfestimation for one dimensional
-data. It estimates a Gaussian mixture model for several number of components,
-and it determines the "best" one according to the Bayesian Information
-Criterion.
-
-It uses old faitfhul waiting time as the one dimension data, and plots the best
-model as well as the BIC as a function of the number of component."""
-# Example of doing pdf estimation with EM algorithm. Requires matplotlib.
-import numpy as N
-
-import pylab as P
-import matplotlib as MPL
-
-from scikits.learn.em import EM, GM, GMM
-import utils
-
-oldfaithful = utils.get_faithful()
-
-duration = oldfaithful[:, :1]
-waiting = oldfaithful[:, 1:]
-
-#dt = utils.scale(duration)
-#dt = duration / 60.
-dt = waiting / 60.
-
-# This function train a mixture model with k components, returns the trained
-# model and the BIC
-def cluster(data, k):
-    d = data.shape[1]
-    gm = GM(d, k)
-    gmm = GMM(gm)
-    em = EM()
-    em.train(data, gmm, maxiter = 20)
-    return gm, gmm.bic(data)
-
-# bc will contain a list of BIC values for each model trained, gml the
-# corresponding mixture model
-bc = []
-gml = []
-
-for k in range(1, 8):
-    gm, b = cluster(dt, k = k)
-    bc.append(b)
-    gml.append(gm)
-
-mbic = N.argmax(bc)
-
-# Below is code to display a figure with histogram and best model (in the BIC
-# sense) pdf, with the BIC as a function of the number of components on the
-# right.
-P.figure(figsize = [12, 7])
-#---------------------------
-# histogram + pdf estimation
-#---------------------------
-P.subplot(1, 2, 1)
-h = gml[mbic].plot1d(gpdf=True)
-# You can manipulate the differents parts of the plot through the returned
-# handles
-h['gpdf'][0].set_linestyle('-')
-h['gpdf'][0].set_label('pdf of the mixture')
-h['pdf'][0].set_label('pdf of individual component')
-[l.set_linestyle('-') for l in h['pdf']]
-[l.set_color('g') for l in h['pdf']]
-
-prop = MPL.font_manager.FontProperties(size='smaller')
-P.legend(loc = 'best', prop = prop)
-
-P.hist(dt, 25, normed = 1, fill = False)
-P.xlabel('waiting time between consecutive eruption (in min)')
-
-#------------------------------------------
-# BIC as a function of number of components
-#------------------------------------------
-P.subplot(1, 2, 2)
-P.plot(N.arange(1, 8), bc, 'o:')
-P.xlabel("number of components")
-P.ylabel("BIC")
-print "According to the BIC, model with %d components is better" % (N.argmax(bc) + 1)
diff --git a/scikits/learn/em/examples/plotexamples.py b/scikits/learn/em/examples/plotexamples.py
deleted file mode 100644
index af4adc5e5e..0000000000
--- a/scikits/learn/em/examples/plotexamples.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#! /usr/bin/env python
-# Last Change: Sun Jul 22 12:00 PM 2007 J
-
-# This is a simple test to check whether plotting ellipsoides of confidence and
-# isodensity contours match
-import numpy as N
-
-import pylab as P
-
-from scikits.learn.em import EM, GM, GMM
-
-# Generate a simple mixture model, plot its confidence ellipses + isodensity
-# curves for both diagonal and full covariance matrices
-d = 3
-k = 3
-dim = [0, 2]
-# diag model
-w, mu, va = GM.gen_param(d, k)
-dgm = GM.fromvalues(w, mu, va)
-# full model
-w, mu, va = GM.gen_param(d, k, 'full', spread = 1)
-fgm = GM.fromvalues(w, mu, va)
-
-def plot_model(gm, dim):
-    X, Y, Z, V = gm.density_on_grid(dim = dim)
-    h = gm.plot(dim = dim)
-    [i.set_linestyle('-.') for i in h]
-    P.contour(X, Y, Z, V)
-    data = gm.sample(200)
-    P.plot(data[:, dim[0]], data[:,dim[1]], '.')
-
-# Plot the contours and the ellipsoids of confidence
-P.subplot(2, 1, 1)
-plot_model(dgm, dim)
-
-P.subplot(2, 1, 2)
-plot_model(fgm, dim)
-
-P.show()
diff --git a/scikits/learn/em/examples/regularized_example.py b/scikits/learn/em/examples/regularized_example.py
deleted file mode 100644
index a7e23c12c0..0000000000
--- a/scikits/learn/em/examples/regularized_example.py
+++ /dev/null
@@ -1,69 +0,0 @@
-#! /usr/bin/env python
-# Last Change: Sun Jul 22 03:00 PM 2007 J
-
-__doc__ = """Example of using RegularizedEM with pendigits data.
-
-If you want to do discriminant analysis with pendigits, you quickly have
-problems with EM if you use directly the coordinates, because some points are
-likely to be on the border, hence the corresponding component can have a
-covariance matrix which easily becomes singular. Regularized EM avoids this
-problem by using simple regularization on the mixture. You can play with pcount
-and pval to see the effect on pdf estimation.
-
-For now, regularized EM is pretty crude, but is enough for simple cases where
-you need to avoid singular covariance matrices."""
-
-import numpy as N
-import pylab as P
-
-from scikits.learn.em import EM, GM, GMM
-# Experimental RegularizedEM
-from scikits.learn.em.gmm_em import RegularizedEM
-import utils
-
-x, y = utils.get_pendigits()
-
-# Take only the first point of pendigits for pdf estimation
-dt1 = N.concatenate([x[:, N.newaxis], y[:, N.newaxis]], 1)
-dt1 = utils.scale(dt1.astype(N.float))
-
-# pcnt is the poportion of samples to use as prior count. Eg if you have 1000
-# samples, and pcn is 0.1, then the prior count would be 100, and 1100 samples
-# will be considered as overall when regularizing the parameters.
-pcnt = 0.05
-# You should try different values of pval. If pval is 0.1, then the
-# regularization will be strong. If you use something like 0.01, really sharp
-# components will appear. If the values are too small, the regularizer may not
-# work (singular covariance matrices).
-pval = 0.05
-
-# This function train a mixture model with k components, returns the trained
-# model and the BIC
-def cluster(data, k, mode = 'full'):
-    d = data.shape[1]
-    gm = GM(d, k, mode)
-    gmm = GMM(gm, 'random')
-    em = RegularizedEM(pcnt = pcnt, pval = pval)
-    em.train(data, gmm, maxiter = 20)
-    return gm, gmm.bic(data)
-
-# bc will contain a list of BIC values for each model trained
-N.seterr(all = 'warn')
-bc = []
-mode = 'full'
-
-P.figure()
-for k in range(1, 5):
-    # Train a model of k component, and plots isodensity curve
-    P.subplot(2, 2, k)
-    gm, b = cluster(dt1, k = k, mode = mode)
-    bc.append(b)
-
-    X, Y, Z, V = gm.density_on_grid(nl = 20)
-    P.contour(X, Y, Z, V)
-    P.plot(dt1[:, 0], dt1[:, 1], '.')
-    P.xlabel('x coordinate (scaled)')
-    P.ylabel('y coordinate (scaled)')
-
-print "According to the BIC, model with %d components is better" % (N.argmax(bc) + 1)
-P.show()
diff --git a/scikits/learn/em/examples/utils.py b/scikits/learn/em/examples/utils.py
deleted file mode 100644
index 89f11cf190..0000000000
--- a/scikits/learn/em/examples/utils.py
+++ /dev/null
@@ -1,47 +0,0 @@
-#! /usr/bin/env python
-# Last Change: Sun Jul 22 03:00 PM 2007 J
-
-# Various utilities for examples 
-
-import numpy as N
-
-from scikits.learn.datasets import oldfaithful, pendigits
-
-def get_faithful():
-    """Return faithful data as a nx2 array, first column being duration, second
-    being waiting time."""
-    # Load faithful data, convert waiting into integer, remove L, M and S data
-    data = oldfaithful.load()
-    tmp1 = []
-    tmp2 = []
-    for i in data['data']:
-        if not (i[0] == 'L' or i[0] == 'M' or i[0] == 'S'):
-            tmp1.append(i[0])
-            tmp2.append(i[1])
-            
-    waiting = N.array([int(i) for i in tmp1], dtype = N.float)
-    duration = N.array([i for i in tmp2], dtype = N.float)
-
-    waiting = waiting[:, N.newaxis]
-    duration = duration[:, N.newaxis]
-
-    return N.concatenate((waiting, duration), 1)
-
-def get_pendigits():
-    """Return faithful data as a nx2 array, first column being duration, second
-    being waiting time."""
-    # Load faithful data, convert waiting into integer, remove L, M and S data
-    data = pendigits.training.load()
-    return data['data']['x0'], data['data']['y0']
-
-def scale(data):
-    """ Scale data such as each col is in the range [0..1].
-
-    Note: inplace."""
-    n = N.min(data, 0)
-    m = N.max(data, 0)
-
-    data -= n
-    data /= (m-n)
-    return data
-
-- 
GitLab