From 7b45bd97e62c75031fa8c04ec69f11b1f4f37e3c Mon Sep 17 00:00:00 2001 From: Gael Varoquaux <gael.varoquaux@normalesup.org> Date: Fri, 23 Apr 2010 05:41:43 +0000 Subject: [PATCH] DOC: Moving the em examples. Doing this in two times, to avoid what looks like bugs in the svn sourceforge serve. git-svn-id: https://scikit-learn.svn.sourceforge.net/svnroot/scikit-learn/trunk@699 22fbfee3-77ab-4535-9bad-27d1bd3bc7d8 --- scikits/learn/em/examples/__init__.py | 0 scikits/learn/em/examples/basic_example1.py | 48 ------- scikits/learn/em/examples/basic_example2.py | 45 ------- scikits/learn/em/examples/basic_example3.py | 64 --------- .../em/examples/discriminant_analysis.py | 127 ------------------ scikits/learn/em/examples/examples.py | 19 --- scikits/learn/em/examples/expectation.py | 42 ------ scikits/learn/em/examples/pdfestimation.py | 48 ------- scikits/learn/em/examples/pdfestimation1d.py | 81 ----------- scikits/learn/em/examples/plotexamples.py | 39 ------ .../learn/em/examples/regularized_example.py | 69 ---------- scikits/learn/em/examples/utils.py | 47 ------- 12 files changed, 629 deletions(-) delete mode 100644 scikits/learn/em/examples/__init__.py delete mode 100644 scikits/learn/em/examples/basic_example1.py delete mode 100644 scikits/learn/em/examples/basic_example2.py delete mode 100644 scikits/learn/em/examples/basic_example3.py delete mode 100644 scikits/learn/em/examples/discriminant_analysis.py delete mode 100644 scikits/learn/em/examples/examples.py delete mode 100644 scikits/learn/em/examples/expectation.py delete mode 100644 scikits/learn/em/examples/pdfestimation.py delete mode 100644 scikits/learn/em/examples/pdfestimation1d.py delete mode 100644 scikits/learn/em/examples/plotexamples.py delete mode 100644 scikits/learn/em/examples/regularized_example.py delete mode 100644 scikits/learn/em/examples/utils.py diff --git a/scikits/learn/em/examples/__init__.py b/scikits/learn/em/examples/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/scikits/learn/em/examples/basic_example1.py b/scikits/learn/em/examples/basic_example1.py deleted file mode 100644 index 3b32fff118..0000000000 --- a/scikits/learn/em/examples/basic_example1.py +++ /dev/null @@ -1,48 +0,0 @@ -import numpy as N -import pylab as P -from scikits.learn.em import GM - -#------------------------------ -# Hyper parameters: -# - K: number of clusters -# - d: dimension -k = 3 -d = 2 - -#------------------------------------------------------- -# Values for weights, mean and (diagonal) variances -# - the weights are an array of rank 1 -# - mean is expected to be rank 2 with one row for one component -# - variances are also expteced to be rank 2. For diagonal, one row -# is one diagonal, for full, the first d rows are the first variance, -# etc... In this case, the variance matrix should be k*d rows and d -# colums -w = N.array([0.2, 0.45, 0.35]) -mu = N.array([[4.1, 3], [1, 5], [-2, -3]]) -va = N.array([[1, 1.5], [3, 4], [2, 3.5]]) - -#----------------------------------------- -# First method: directly from parameters: -# Both methods are equivalents. -gm = GM.fromvalues(w, mu, va) - -#------------------------------------- -# Second method to build a GM instance: -gm = GM(d, k, mode = 'diag') -# The set_params checks that w, mu, and va corresponds to k, d and m -gm.set_param(w, mu, va) - -# Once set_params is called, both methods are equivalent. The 2d -# method is useful when using a GM object for learning (where -# the learner class will set the params), whereas the first one -# is useful when there is a need to quickly sample a model -# from existing values, without a need to give the hyper parameters - -# Create a Gaussian Mixture from the parameters, and sample -# 1000 items from it (one row = one 2 dimension sample) -data = gm.sample(1000) - -# Plot the samples -P.plot(data[:, 0], data[:, 1], '.') -# Plot the ellipsoids of confidence with a level a 75 % -gm.plot(level = 0.75) diff --git a/scikits/learn/em/examples/basic_example2.py b/scikits/learn/em/examples/basic_example2.py deleted file mode 100644 index 941570c8e4..0000000000 --- a/scikits/learn/em/examples/basic_example2.py +++ /dev/null @@ -1,45 +0,0 @@ -from numpy.random import seed - -from scikits.learn.em import GM, GMM, EM -import copy - -# To reproduce results, fix the random seed -seed(1) - -#+++++++++++++++++++++++++++++ -# Meta parameters of the model -# - k: Number of components -# - d: dimension of each Gaussian -# - mode: Mode of covariance matrix: full or diag (string) -# - nframes: number of frames (frame = one data point = one -# row of d elements) -k = 2 -d = 2 -mode = 'diag' -nframes = 1e3 - -#+++++++++++++++++++++++++++++++++++++++++++ -# Create an artificial GM model, samples it -#+++++++++++++++++++++++++++++++++++++++++++ -w, mu, va = GM.gen_param(d, k, mode, spread = 1.5) -gm = GM.fromvalues(w, mu, va) - -# Sample nframes frames from the model -data = gm.sample(nframes) - -#++++++++++++++++++++++++ -# Learn the model with EM -#++++++++++++++++++++++++ - -# Create a Model from a Gaussian mixture with kmean initialization -lgm = GM(d, k, mode) -gmm = GMM(lgm, 'kmean') - -# The actual EM, with likelihood computation. The threshold -# is compared to the (linearly appromixated) derivative of the likelihood -em = EM() -like = em.train(data, gmm, maxiter = 30, thresh = 1e-8) - -# The computed parameters are in gmm.gm, which is the same than lgm -# (remember, python does not copy most objects by default). You can for example -# plot lgm against gm to compare diff --git a/scikits/learn/em/examples/basic_example3.py b/scikits/learn/em/examples/basic_example3.py deleted file mode 100644 index 11c19f1502..0000000000 --- a/scikits/learn/em/examples/basic_example3.py +++ /dev/null @@ -1,64 +0,0 @@ -import numpy as N -from numpy.random import seed - -from scikits.learn.em import GM, GMM, EM -import copy - -seed(2) - -k = 4 -d = 2 -mode = 'diag' -nframes = 1e3 - -#+++++++++++++++++++++++++++++++++++++++++++ -# Create an artificial GMM model, samples it -#+++++++++++++++++++++++++++++++++++++++++++ -w, mu, va = GM.gen_param(d, k, mode, spread = 1.0) -gm = GM.fromvalues(w, mu, va) - -# Sample nframes frames from the model -data = gm.sample(nframes) - -#++++++++++++++++++++++++ -# Learn the model with EM -#++++++++++++++++++++++++ - -# List of learned mixtures lgm[i] is a mixture with i+1 components -lgm = [] -kmax = 6 -bics = N.zeros(kmax) -em = EM() -for i in range(kmax): - lgm.append(GM(d, i+1, mode)) - - gmm = GMM(lgm[i], 'kmean') - em.train(data, gmm, maxiter = 30, thresh = 1e-10) - bics[i] = gmm.bic(data) - -print "Original model has %d clusters, bics says %d" % (k, N.argmax(bics)+1) - -#+++++++++++++++ -# Draw the model -#+++++++++++++++ -import pylab as P -P.subplot(3, 2, 1) - -for k in range(kmax): - P.subplot(3, 2, k+1) - level = 0.9 - P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_') - - # h keeps the handles of the plot, so that you can modify - # its parameters like label or color - h = lgm[k].plot(level = level) - [i.set_color('r') for i in h] - h[0].set_label('EM confidence ellipsoides') - - h = gm.plot(level = level) - [i.set_color('g') for i in h] - h[0].set_label('Real confidence ellipsoides') - -P.legend(loc = 0) -# depending on your configuration, you may have to call P.show() -# to actually display the figure diff --git a/scikits/learn/em/examples/discriminant_analysis.py b/scikits/learn/em/examples/discriminant_analysis.py deleted file mode 100644 index 69f59747b0..0000000000 --- a/scikits/learn/em/examples/discriminant_analysis.py +++ /dev/null @@ -1,127 +0,0 @@ -#! /usr/bin/env python -# Last Change: Sun Jul 22 12:00 PM 2007 J - -__doc__ = """Example of doing classification with mixture of Gaussian. Note -that this is really a toy example: we do not use testing testset nor cross -validation. - -We use the famous iris database used by Sir R.A. Fisher. You can try to change -the attributes used for classification, number of components used for the -mixtures, etc...""" - -import numpy as N -import pylab as P -import matplotlib as MPL - -from scikits.learn.em import EM, GMM, GM -import utils - -data = utils.iris.load() -# cnames are the class names -cnames = data.keys() - -#-------------------- -# Data pre processing -#-------------------- -# we use 25 samples of each class (eg half of iris), for -# learning, and the other half for testing. We use sepal width and petal width -# only -ln = 25 -tn = 25 -xdata = {} -ydata = {} -# learning data -ldata = {} - -# you can change here the used attributes (sepal vs petal, width vs height) -for i in cnames: - xdata[i] = data[i]['sepal width'] - ydata[i] = data[i]['petal width'] - ldata[i] = N.concatenate((xdata[i][:ln, N.newaxis], - ydata[i][:ln, N.newaxis]), - axis = 1) - -tx = N.concatenate([xdata[i][ln:] for i in cnames]) -ty = N.concatenate([ydata[i][ln:] for i in cnames]) -tdata = N.concatenate((tx[:, N.newaxis], ty[:, N.newaxis]), axis = 1) - -# tclass makes the correspondance class <-> index in the testing data tdata -tclass = {} -for i in range(3): - tclass[cnames[i]] = N.arange(tn * i, tn * (i+1)) - -#---------------------------- -# Learning and classification -#---------------------------- -# This function train a mixture model with k components -def cluster(data, k, mode = 'full'): - d = data.shape[1] - gm = GM(d, k, mode) - gmm = GMM(gm) - em = EM() - em.train(data, gmm, maxiter = 20) - return gm - -# Estimate each class with a mixture of nc components -nc = 2 -mode = 'diag' -lmod = {} -for i in cnames: - lmod[i] = cluster(ldata[i], nc, mode) - -# Classifiy the testing data. Of course, the data are not really IID, because -# we did not shuffle the testing data, but in this case, this does not change -# anything. -p = N.empty((len(tdata), 3)) -for i in range(3): - # For each class, computes the likelihood for the testing data - p[:, i] = lmod[cnames[i]].pdf(tdata) - -# We then take the Maximum A Posteriori class (same than most likely model in -# this case, since each class is equiprobable) -cid = N.argmax(p, 1) -classification = {} -for i in range(3): - classification[cnames[i]] = N.where(cid == i)[0] - -correct = {} -incorrect = {} -for i in cnames: - correct[i] = N.intersect1d(classification[i], tclass[i]) - incorrect[i] = N.setdiff1d(classification[i], tclass[i]) - -#----------------- -# Plot the results -#----------------- -csym = {'setosa' : 's', 'versicolor' : 'x', 'virginica' : 'o'} -r = 50. -P.figure(figsize = [600/r, 400/r]) - -prop = MPL.font_manager.FontProperties(size='smaller') - -# Plot the learning data with the mixtures -P.subplot(2, 1, 1) -for i in lmod.values(): - #i.plot() - X, Y, Z, V = i.density_on_grid() - P.contourf(X, Y, Z, V) - -for i in cnames: - P.plot(ldata[i][:, 0], ldata[i][:, 1], csym[i], label = i + ' (learning)') -P.xlabel('sepal width') -P.ylabel('petal width') -P.legend(loc = 'best') - -# Plot the results on test dataset (green for correctly classified, red for -# incorrectly classified) -P.subplot(2, 1, 2) -for i in cnames: - P.plot(tx[correct[i]], ty[correct[i]], 'g' + csym[i], - label = '%s (correctly classified)' % i) - if len(incorrect[i]) > 0: - P.plot(tx[incorrect[i]], ty[incorrect[i]], 'r' + csym[i], - label = '%s (incorrectly classified)' % i) -P.legend(loc = 'best', prop = prop) -P.xlabel('sepal width') -P.ylabel('petal width') -P.savefig('dclass.png', dpi = 60) diff --git a/scikits/learn/em/examples/examples.py b/scikits/learn/em/examples/examples.py deleted file mode 100644 index c176f76d21..0000000000 --- a/scikits/learn/em/examples/examples.py +++ /dev/null @@ -1,19 +0,0 @@ -def ex1(): - import basic_example1 - -def ex2(): - import basic_example2 - -def ex3(): - import basic_example3 - -def pdfestim(): - import pdfestimation - -def pdfestim1d(): - import pdfestimation1d - -if __name__ == '__main__': - ex1() - ex2() - ex3() diff --git a/scikits/learn/em/examples/expectation.py b/scikits/learn/em/examples/expectation.py deleted file mode 100644 index 274fc0b242..0000000000 --- a/scikits/learn/em/examples/expectation.py +++ /dev/null @@ -1,42 +0,0 @@ -""" -Expectation maximation example -""" - -import pylab -import numpy as np -from scikits.learn.em.densities2 import gauss_ell - -#========================================= -# Test plotting a simple diag 2d variance: -#========================================= -va = np.array([5, 3]) -mu = np.array([2, 3]) - -# Generate a multivariate gaussian of mean mu and covariance va -X = np.random.randn(2, 1e3) -Yc = np.dot(np.diag(np.sqrt(va)), X) -Yc = Yc.transpose() + mu - -# Plotting -Xe, Ye = gauss_ell(mu, va, npoints = 100) -pylab.figure() -pylab.plot(Yc[:, 0], Yc[:, 1], '.') -pylab.plot(Xe, Ye, 'r') - -#========================================= -# Test plotting a simple full 2d variance: -#========================================= -va = np.array([[0.2, 0.1],[0.1, 0.5]]) -mu = np.array([0, 3]) - -# Generate a multivariate gaussian of mean mu and covariance va -X = np.random.randn(1e3, 2) -Yc = np.dot(np.linalg.cholesky(va), X.transpose()) -Yc = Yc.transpose() + mu - -# Plotting -Xe, Ye = gauss_ell(mu, va, npoints = 100, level=0.95) -pylab.figure() -pylab.plot(Yc[:, 0], Yc[:, 1], '.') -pylab.plot(Xe, Ye, 'r') -pylab.show() diff --git a/scikits/learn/em/examples/pdfestimation.py b/scikits/learn/em/examples/pdfestimation.py deleted file mode 100644 index 3fd33d3568..0000000000 --- a/scikits/learn/em/examples/pdfestimation.py +++ /dev/null @@ -1,48 +0,0 @@ -#! /usr/bin/env python -# Last Change: Sun Jul 22 12:00 PM 2007 J - -# Example of doing pdf estimation with EM algorithm. Requires matplotlib. -import numpy as N -import pylab as P - -from scikits.learn.em import EM, GM, GMM -import utils - -oldfaithful = utils.get_faithful() - -# We want the relationship between d(t) and w(t+1), but get_faithful gives -# d(t), w(t), so we have to shift to get the "usual" faithful data -waiting = oldfaithful[1:, 1:] -duration = oldfaithful[:len(waiting), :1] -dt = N.concatenate((duration, waiting), 1) - -# Scale the data so that each component is in [0..1] -dt = utils.scale(dt) - -# This function train a mixture model with k components, returns the trained -# model and the BIC -def cluster(data, k, mode = 'full'): - d = data.shape[1] - gm = GM(d, k, mode) - gmm = GMM(gm) - em = EM() - em.train(data, gmm, maxiter = 20) - return gm, gmm.bic(data) - -# bc will contain a list of BIC values for each model trained -bc = [] -mode = 'full' -P.figure() -for k in range(1, 5): - # Train a model of k component, and plots isodensity curve - P.subplot(2, 2, k) - gm, b = cluster(dt, k = k, mode = mode) - bc.append(b) - - X, Y, Z, V = gm.density_on_grid() - P.contour(X, Y, Z, V) - P.plot(dt[:, 0], dt[:, 1], '.') - P.xlabel('duration time (scaled)') - P.ylabel('waiting time (scaled)') - -print "According to the BIC, model with %d components is better" % (N.argmax(bc) + 1) diff --git a/scikits/learn/em/examples/pdfestimation1d.py b/scikits/learn/em/examples/pdfestimation1d.py deleted file mode 100644 index 981236afa6..0000000000 --- a/scikits/learn/em/examples/pdfestimation1d.py +++ /dev/null @@ -1,81 +0,0 @@ -#! /usr/bin/env python -# Last Change: Sun Jul 22 12:00 PM 2007 J - -__doc__ = """This example shows how to do pdfestimation for one dimensional -data. It estimates a Gaussian mixture model for several number of components, -and it determines the "best" one according to the Bayesian Information -Criterion. - -It uses old faitfhul waiting time as the one dimension data, and plots the best -model as well as the BIC as a function of the number of component.""" -# Example of doing pdf estimation with EM algorithm. Requires matplotlib. -import numpy as N - -import pylab as P -import matplotlib as MPL - -from scikits.learn.em import EM, GM, GMM -import utils - -oldfaithful = utils.get_faithful() - -duration = oldfaithful[:, :1] -waiting = oldfaithful[:, 1:] - -#dt = utils.scale(duration) -#dt = duration / 60. -dt = waiting / 60. - -# This function train a mixture model with k components, returns the trained -# model and the BIC -def cluster(data, k): - d = data.shape[1] - gm = GM(d, k) - gmm = GMM(gm) - em = EM() - em.train(data, gmm, maxiter = 20) - return gm, gmm.bic(data) - -# bc will contain a list of BIC values for each model trained, gml the -# corresponding mixture model -bc = [] -gml = [] - -for k in range(1, 8): - gm, b = cluster(dt, k = k) - bc.append(b) - gml.append(gm) - -mbic = N.argmax(bc) - -# Below is code to display a figure with histogram and best model (in the BIC -# sense) pdf, with the BIC as a function of the number of components on the -# right. -P.figure(figsize = [12, 7]) -#--------------------------- -# histogram + pdf estimation -#--------------------------- -P.subplot(1, 2, 1) -h = gml[mbic].plot1d(gpdf=True) -# You can manipulate the differents parts of the plot through the returned -# handles -h['gpdf'][0].set_linestyle('-') -h['gpdf'][0].set_label('pdf of the mixture') -h['pdf'][0].set_label('pdf of individual component') -[l.set_linestyle('-') for l in h['pdf']] -[l.set_color('g') for l in h['pdf']] - -prop = MPL.font_manager.FontProperties(size='smaller') -P.legend(loc = 'best', prop = prop) - -P.hist(dt, 25, normed = 1, fill = False) -P.xlabel('waiting time between consecutive eruption (in min)') - -#------------------------------------------ -# BIC as a function of number of components -#------------------------------------------ -P.subplot(1, 2, 2) -P.plot(N.arange(1, 8), bc, 'o:') -P.xlabel("number of components") -P.ylabel("BIC") -print "According to the BIC, model with %d components is better" % (N.argmax(bc) + 1) diff --git a/scikits/learn/em/examples/plotexamples.py b/scikits/learn/em/examples/plotexamples.py deleted file mode 100644 index af4adc5e5e..0000000000 --- a/scikits/learn/em/examples/plotexamples.py +++ /dev/null @@ -1,39 +0,0 @@ -#! /usr/bin/env python -# Last Change: Sun Jul 22 12:00 PM 2007 J - -# This is a simple test to check whether plotting ellipsoides of confidence and -# isodensity contours match -import numpy as N - -import pylab as P - -from scikits.learn.em import EM, GM, GMM - -# Generate a simple mixture model, plot its confidence ellipses + isodensity -# curves for both diagonal and full covariance matrices -d = 3 -k = 3 -dim = [0, 2] -# diag model -w, mu, va = GM.gen_param(d, k) -dgm = GM.fromvalues(w, mu, va) -# full model -w, mu, va = GM.gen_param(d, k, 'full', spread = 1) -fgm = GM.fromvalues(w, mu, va) - -def plot_model(gm, dim): - X, Y, Z, V = gm.density_on_grid(dim = dim) - h = gm.plot(dim = dim) - [i.set_linestyle('-.') for i in h] - P.contour(X, Y, Z, V) - data = gm.sample(200) - P.plot(data[:, dim[0]], data[:,dim[1]], '.') - -# Plot the contours and the ellipsoids of confidence -P.subplot(2, 1, 1) -plot_model(dgm, dim) - -P.subplot(2, 1, 2) -plot_model(fgm, dim) - -P.show() diff --git a/scikits/learn/em/examples/regularized_example.py b/scikits/learn/em/examples/regularized_example.py deleted file mode 100644 index a7e23c12c0..0000000000 --- a/scikits/learn/em/examples/regularized_example.py +++ /dev/null @@ -1,69 +0,0 @@ -#! /usr/bin/env python -# Last Change: Sun Jul 22 03:00 PM 2007 J - -__doc__ = """Example of using RegularizedEM with pendigits data. - -If you want to do discriminant analysis with pendigits, you quickly have -problems with EM if you use directly the coordinates, because some points are -likely to be on the border, hence the corresponding component can have a -covariance matrix which easily becomes singular. Regularized EM avoids this -problem by using simple regularization on the mixture. You can play with pcount -and pval to see the effect on pdf estimation. - -For now, regularized EM is pretty crude, but is enough for simple cases where -you need to avoid singular covariance matrices.""" - -import numpy as N -import pylab as P - -from scikits.learn.em import EM, GM, GMM -# Experimental RegularizedEM -from scikits.learn.em.gmm_em import RegularizedEM -import utils - -x, y = utils.get_pendigits() - -# Take only the first point of pendigits for pdf estimation -dt1 = N.concatenate([x[:, N.newaxis], y[:, N.newaxis]], 1) -dt1 = utils.scale(dt1.astype(N.float)) - -# pcnt is the poportion of samples to use as prior count. Eg if you have 1000 -# samples, and pcn is 0.1, then the prior count would be 100, and 1100 samples -# will be considered as overall when regularizing the parameters. -pcnt = 0.05 -# You should try different values of pval. If pval is 0.1, then the -# regularization will be strong. If you use something like 0.01, really sharp -# components will appear. If the values are too small, the regularizer may not -# work (singular covariance matrices). -pval = 0.05 - -# This function train a mixture model with k components, returns the trained -# model and the BIC -def cluster(data, k, mode = 'full'): - d = data.shape[1] - gm = GM(d, k, mode) - gmm = GMM(gm, 'random') - em = RegularizedEM(pcnt = pcnt, pval = pval) - em.train(data, gmm, maxiter = 20) - return gm, gmm.bic(data) - -# bc will contain a list of BIC values for each model trained -N.seterr(all = 'warn') -bc = [] -mode = 'full' - -P.figure() -for k in range(1, 5): - # Train a model of k component, and plots isodensity curve - P.subplot(2, 2, k) - gm, b = cluster(dt1, k = k, mode = mode) - bc.append(b) - - X, Y, Z, V = gm.density_on_grid(nl = 20) - P.contour(X, Y, Z, V) - P.plot(dt1[:, 0], dt1[:, 1], '.') - P.xlabel('x coordinate (scaled)') - P.ylabel('y coordinate (scaled)') - -print "According to the BIC, model with %d components is better" % (N.argmax(bc) + 1) -P.show() diff --git a/scikits/learn/em/examples/utils.py b/scikits/learn/em/examples/utils.py deleted file mode 100644 index 89f11cf190..0000000000 --- a/scikits/learn/em/examples/utils.py +++ /dev/null @@ -1,47 +0,0 @@ -#! /usr/bin/env python -# Last Change: Sun Jul 22 03:00 PM 2007 J - -# Various utilities for examples - -import numpy as N - -from scikits.learn.datasets import oldfaithful, pendigits - -def get_faithful(): - """Return faithful data as a nx2 array, first column being duration, second - being waiting time.""" - # Load faithful data, convert waiting into integer, remove L, M and S data - data = oldfaithful.load() - tmp1 = [] - tmp2 = [] - for i in data['data']: - if not (i[0] == 'L' or i[0] == 'M' or i[0] == 'S'): - tmp1.append(i[0]) - tmp2.append(i[1]) - - waiting = N.array([int(i) for i in tmp1], dtype = N.float) - duration = N.array([i for i in tmp2], dtype = N.float) - - waiting = waiting[:, N.newaxis] - duration = duration[:, N.newaxis] - - return N.concatenate((waiting, duration), 1) - -def get_pendigits(): - """Return faithful data as a nx2 array, first column being duration, second - being waiting time.""" - # Load faithful data, convert waiting into integer, remove L, M and S data - data = pendigits.training.load() - return data['data']['x0'], data['data']['y0'] - -def scale(data): - """ Scale data such as each col is in the range [0..1]. - - Note: inplace.""" - n = N.min(data, 0) - m = N.max(data, 0) - - data -= n - data /= (m-n) - return data - -- GitLab