Skip to content
Snippets Groups Projects
Commit 8fbc7707 authored by Vincent Dubourg's avatar Vincent Dubourg
Browse files

Modifications following Gaël latest remarks.

parent 5233f8ed
No related branches found
No related tags found
No related merge requests found
Showing with 450 additions and 316 deletions
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
=============================================== =============================================================
Gaussian Processes for Machine Learning example Gaussian Processes regression example: the 'diabetes' dataset
=============================================== =============================================================
This example consists in fitting a Gaussian Process model onto the diabetes This example consists in fitting a Gaussian Process model onto the diabetes
dataset. dataset.
...@@ -39,7 +39,6 @@ gp = GaussianProcess(theta0=1e-4, nugget=1e-2, verbose=False) ...@@ -39,7 +39,6 @@ gp = GaussianProcess(theta0=1e-4, nugget=1e-2, verbose=False)
# Fit the GP model to the data # Fit the GP model to the data
gp.fit(X, y) gp.fit(X, y)
gp.par['beta']
# Estimate the leave-one-out predictions using the cross_val module # Estimate the leave-one-out predictions using the cross_val module
n_jobs = 2 # the distributing capacity available on the machine n_jobs = 2 # the distributing capacity available on the machine
......
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
=============================================== ==============================================================================
Gaussian Processes for Machine Learning example Gaussian Processes classification example: exploiting the probabilistic output
=============================================== ==============================================================================
A two-dimensional regression exercise with a post-processing allowing for A two-dimensional regression exercise with a post-processing allowing for
probabilistic classification thanks to the Gaussian property of the prediction. probabilistic classification thanks to the Gaussian property of the prediction.
...@@ -34,7 +34,7 @@ PHI = lambda x: Grv.cdf(x) ...@@ -34,7 +34,7 @@ PHI = lambda x: Grv.cdf(x)
PHIinv = lambda x: Grv.ppf(x) PHIinv = lambda x: Grv.ppf(x)
# A few constants # A few constants
beta0 = 8 lim = 8
b, kappa, e = 5, .5, .1 b, kappa, e = 5, .5, .1
# The function to predict (classification will then consist in predicting # The function to predict (classification will then consist in predicting
...@@ -62,8 +62,8 @@ gp.fit(X, Y) ...@@ -62,8 +62,8 @@ gp.fit(X, Y)
# Evaluate real function, the prediction and its MSE on a grid # Evaluate real function, the prediction and its MSE on a grid
res = 50 res = 50
x1, x2 = np.meshgrid(np.linspace(- beta0, beta0, res), \ x1, x2 = np.meshgrid(np.linspace(- lim, lim, res), \
np.linspace(- beta0, beta0, res)) np.linspace(- lim, lim, res))
xx = np.vstack([x1.reshape(x1.size), x2.reshape(x2.size)]).T xx = np.vstack([x1.reshape(x1.size), x2.reshape(x2.size)]).T
YY = g(xx) YY = g(xx)
...@@ -87,7 +87,7 @@ pl.xlabel('$x_1$') ...@@ -87,7 +87,7 @@ pl.xlabel('$x_1$')
pl.ylabel('$x_2$') pl.ylabel('$x_2$')
cax = pl.imshow(np.flipud(PHI(- yy / sigma)), cmap=cm.gray_r, alpha=0.8, \ cax = pl.imshow(np.flipud(PHI(- yy / sigma)), cmap=cm.gray_r, alpha=0.8, \
extent=(- beta0, beta0, - beta0, beta0)) extent=(- lim, lim, - lim, lim))
norm = pl.matplotlib.colors.Normalize(vmin=0., vmax=0.9) norm = pl.matplotlib.colors.Normalize(vmin=0., vmax=0.9)
cb = pl.colorbar(cax, ticks=[0., 0.2, 0.4, 0.6, 0.8, 1.], norm=norm) cb = pl.colorbar(cax, ticks=[0., 0.2, 0.4, 0.6, 0.8, 1.], norm=norm)
cb.set_label('${\\rm \mathbb{P}}\left[\widehat{G}(\mathbf{x}) \leq 0\\right]$') cb.set_label('${\\rm \mathbb{P}}\left[\widehat{G}(\mathbf{x}) \leq 0\\right]$')
......
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
=============================================== =================================================================
Gaussian Processes for Machine Learning example Gaussian Processes regression example: basic introductory example
=============================================== =================================================================
A simple one-dimensional regression exercise with a cubic correlation A simple one-dimensional regression exercise with a cubic correlation
model whose parameters are estimated using the maximum likelihood principle. model whose parameters are estimated using the maximum likelihood principle.
...@@ -18,7 +18,7 @@ confidence interval. ...@@ -18,7 +18,7 @@ confidence interval.
# License: BSD style # License: BSD style
import numpy as np import numpy as np
from scikits.learn.gaussian_process import GaussianProcess, corrcubic from scikits.learn.gaussian_process import GaussianProcess
from matplotlib import pyplot as pl from matplotlib import pyplot as pl
# Print the docstring # Print the docstring
...@@ -38,7 +38,7 @@ Y = f(X).ravel() ...@@ -38,7 +38,7 @@ Y = f(X).ravel()
x = np.atleast_2d(np.linspace(0, 10, 1000)).T x = np.atleast_2d(np.linspace(0, 10, 1000)).T
# Instanciate a Gaussian Process model # Instanciate a Gaussian Process model
gp = GaussianProcess(corr=corrcubic, theta0=1e-2, thetaL=1e-4, thetaU=1e-1, \ gp = GaussianProcess(corr='cubic', theta0=1e-2, thetaL=1e-4, thetaU=1e-1, \
random_start=100) random_start=100)
# Fit to data using Maximum Likelihood Estimation of the parameters # Fit to data using Maximum Likelihood Estimation of the parameters
......
#!/usr/bin/python #!/usr/bin/python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Author: Vincent Dubourg <vincent.dubourg@gmail.com>
# (mostly translation, see implementation details)
# License: BSD style
""" """
This module contains a contribution to the scikit-learn project that A module that implements scalar Gaussian Process based prediction (also
implements Gaussian Process based prediction (also known as Kriging). known as Kriging).
Contains
--------
GaussianProcess: The main class of the module that implements the Gaussian
Process prediction theory.
regression_models: A submodule that contains the built-in regression models.
correlation_models: A submodule that contains the built-in correlation models.
Implementation details
----------------------
The presentation implementation is based on a translation of the DACE
Matlab toolbox.
The present implementation is based on a transliteration of the DACE See references:
Matlab toolbox <http://www2.imm.dtu.dk/~hbn/dace/>. [1] H.B. Nielsen, S.N. Lophaven, H. B. Nielsen and J. Sondergaard (2002).
DACE - A MATLAB Kriging Toolbox.
http://www2.imm.dtu.dk/~hbn/dace/dace.pdf
""" """
from .gaussian_process import GaussianProcess from .gaussian_process import GaussianProcess
from .correlation import corrlin, corrcubic, correxp1, \ from . import correlation_models
correxp2, correxpg, corriid from . import regression_models
from .regression import regpoly0, regpoly1, regpoly2
#!/usr/bin/python #!/usr/bin/python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Author: Vincent Dubourg <vincent.dubourg@gmail.com>
# (mostly translation, see implementation details)
# License: BSD style
"""
The built-in regression models submodule for the gaussian_process module.
"""
################ ################
# Dependencies # # Dependencies #
################ ################
...@@ -13,13 +21,13 @@ import numpy as np ...@@ -13,13 +21,13 @@ import numpy as np
############################# #############################
def correxp1(theta, d): def absolute_exponential(theta, d):
""" """
Exponential autocorrelation model. Absolute exponential autocorrelation model.
(Ornstein-Uhlenbeck stochastic process) (Ornstein-Uhlenbeck stochastic process)
n n
correxp1 : theta, dx --> r(theta, dx) = exp( sum - theta_i * |dx_i| ) theta, dx --> r(theta, dx) = exp( sum - theta_i * |dx_i| )
i = 1 i = 1
Parameters Parameters
...@@ -58,13 +66,13 @@ def correxp1(theta, d): ...@@ -58,13 +66,13 @@ def correxp1(theta, d):
return r return r
def correxp2(theta, d): def squared_exponential(theta, d):
""" """
Squared exponential correlation model (Radial Basis Function). Squared exponential correlation model (Radial Basis Function).
(Infinitely differentiable stochastic process, very smooth) (Infinitely differentiable stochastic process, very smooth)
n n
correxp2 : theta, dx --> r(theta, dx) = exp( sum - theta_i * (dx_i)^2 ) theta, dx --> r(theta, dx) = exp( sum - theta_i * (dx_i)^2 )
i = 1 i = 1
Parameters Parameters
...@@ -103,14 +111,14 @@ def correxp2(theta, d): ...@@ -103,14 +111,14 @@ def correxp2(theta, d):
return r return r
def correxpg(theta, d): def generalized_exponential(theta, d):
""" """
Generalized exponential correlation model. Generalized exponential correlation model.
(Useful when one does not know the smoothness of the function to be (Useful when one does not know the smoothness of the function to be
predicted.) predicted.)
n n
correxpg : theta, dx --> r(theta, dx) = exp( sum - theta_i * |dx_i|^p ) theta, dx --> r(theta, dx) = exp( sum - theta_i * |dx_i|^p )
i = 1 i = 1
Parameters Parameters
...@@ -152,13 +160,13 @@ def correxpg(theta, d): ...@@ -152,13 +160,13 @@ def correxpg(theta, d):
return r return r
def corriid(theta, d): def pure_nugget(theta, d):
""" """
Spatial independence correlation model (pure nugget). Spatial independence correlation model (pure nugget).
(Useful when one wants to solve an ordinary least squares problem!) (Useful when one wants to solve an ordinary least squares problem!)
n n
corriid : theta, dx --> r(theta, dx) = 1 if sum |dx_i| == 0 theta, dx --> r(theta, dx) = 1 if sum |dx_i| == 0
i = 1 i = 1
0 otherwise 0 otherwise
...@@ -191,11 +199,11 @@ def corriid(theta, d): ...@@ -191,11 +199,11 @@ def corriid(theta, d):
return r return r
def corrcubic(theta, d): def cubic(theta, d):
""" """
Cubic correlation model. Cubic correlation model.
corrcubic : theta, dx --> r(theta, dx) = theta, dx --> r(theta, dx) =
n n
prod max(0, 1 - 3(theta_j*d_ij)^2 + 2(theta_j*d_ij)^3) , i = 1,...,m prod max(0, 1 - 3(theta_j*d_ij)^2 + 2(theta_j*d_ij)^3) , i = 1,...,m
j = 1 j = 1
...@@ -241,11 +249,11 @@ def corrcubic(theta, d): ...@@ -241,11 +249,11 @@ def corrcubic(theta, d):
return r return r
def corrlin(theta, d): def linear(theta, d):
""" """
Linear correlation model. Linear correlation model.
corrlin : theta, dx --> r(theta, dx) = theta, dx --> r(theta, dx) =
n n
prod max(0, 1 - theta_j*d_ij) , i = 1,...,m prod max(0, 1 - theta_j*d_ij) , i = 1,...,m
j = 1 j = 1
......
This diff is collapsed.
#!/usr/bin/python #!/usr/bin/python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Author: Vincent Dubourg <vincent.dubourg@gmail.com>
# (mostly translation, see implementation details)
# License: BSD style
"""
The built-in regression models submodule for the gaussian_process module.
"""
################ ################
# Dependencies # # Dependencies #
################ ################
...@@ -13,11 +21,11 @@ import numpy as np ...@@ -13,11 +21,11 @@ import numpy as np
############################ ############################
def regpoly0(x): def constant(x):
""" """
Zero order polynomial (constant, p = 1) regression model. Zero order polynomial (constant, p = 1) regression model.
regpoly0 : x --> f(x) = 1 x --> f(x) = 1
Parameters Parameters
---------- ----------
...@@ -39,11 +47,11 @@ def regpoly0(x): ...@@ -39,11 +47,11 @@ def regpoly0(x):
return f return f
def regpoly1(x): def linear(x):
""" """
First order polynomial (hyperplane, p = n) regression model. First order polynomial (linear, p = n+1) regression model.
regpoly1 : x --> f(x) = [ x_1, ..., x_n ].T x --> f(x) = [ 1, x_1, ..., x_n ].T
Parameters Parameters
---------- ----------
...@@ -65,11 +73,11 @@ def regpoly1(x): ...@@ -65,11 +73,11 @@ def regpoly1(x):
return f return f
def regpoly2(x): def quadratic(x):
""" """
Second order polynomial (hyperparaboloid, p = n*(n-1)/2) regression model. Second order polynomial (quadratic, p = n*(n-1)/2+n+1) regression model.
regpoly2 : x --> f(x) = [ x_i*x_j, (i,j) = 1,...,n ].T x --> f(x) = [ 1, { x_i, i = 1,...,n }, { x_i * x_j, (i,j) = 1,...,n } ].T
i > j i > j
Parameters Parameters
......
"""
Testing for Gaussian Process module (scikits.learn.gaussian_process)
"""
# Author: Vincent Dubourg <vincent.dubourg@gmail.com>
# License: BSD style
import numpy as np
from .. import GaussianProcess
from .. import regression_models as regression
from .. import correlation_models as correlation
def test_1d(regr=regression.constant, corr=correlation.squared_exponential,
random_start=10, beta0=None):
"""
MLE estimation of a one-dimensional Gaussian Process model.
Check random start optimization.
Test the interpolating property.
"""
f = lambda x: x * np.sin(x)
X = np.atleast_2d([1., 3., 5., 6., 7., 8.]).T
y = f(X).ravel()
gp = GaussianProcess(regr=regr, corr=corr, beta0=beta0,
theta0=1e-2, thetaL=1e-4, thetaU=1e-1,
random_start=random_start, verbose=False).fit(X, y)
y_pred, MSE = gp.predict(X, eval_MSE=True)
assert np.allclose(y_pred, y) and np.allclose(MSE, 0.)
def test_2d(regr=regression.constant, corr=correlation.squared_exponential,
random_start=10, beta0=None):
"""
MLE estimation of a two-dimensional Gaussian Process model accounting for
anisotropy. Check random start optimization.
Test the interpolating property.
"""
b, kappa, e = 5., .5, .1
g = lambda x: b - x[:, 1] - kappa * (x[:, 0] - e) ** 2.
X = np.array([[-4.61611719, -6.00099547],
[4.10469096, 5.32782448],
[0.00000000, -0.50000000],
[-6.17289014, -4.6984743],
[1.3109306, -6.93271427],
[-5.03823144, 3.10584743],
[-2.87600388, 6.74310541],
[5.21301203, 4.26386883]])
y = g(X).ravel()
gp = GaussianProcess(regr=regr, corr=corr, beta0=beta0,
theta0=[1e-2] * 2, thetaL=[1e-4] * 2,
thetaU=[1e-1] * 2,
random_start=random_start, verbose=False)
gp.fit(X, y)
y_pred, MSE = gp.predict(X, eval_MSE=True)
assert np.allclose(y_pred, y) and np.allclose(MSE, 0.)
def test_more_builtin_correlation_models(random_start=1):
"""
Repeat test_1d and test_2d for several built-in correlation
models specified as strings.
"""
all_corr = ['absolute_exponential', 'squared_exponential', 'cubic',
'linear']
for corr in all_corr:
test_1d(regr='constant', corr=corr, random_start=random_start)
test_2d(regr='constant', corr=corr, random_start=random_start)
def test_ordinary_kriging():
"""
Repeat test_1d and test_2d with given regression weights (beta0) for
different regression models (Ordinary Kriging).
"""
test_1d(regr='linear', beta0=[0., 0.5])
test_1d(regr='quadratic', beta0=[0., 0.5, 0.5])
test_2d(regr='linear', beta0=[0., 0.5, 0.5])
test_2d(regr='quadratic', beta0=[0., 0.5, 0.5, 0.5, 0.5, 0.5])
"""
Testing for Gaussian Process module (scikits.learn.gaussian_process)
"""
import numpy as np
from numpy.testing import assert_array_equal, assert_array_almost_equal, \
assert_almost_equal, assert_raises, assert_
from ..gaussian_process import GaussianProcess
def test_regression_1d_x_sinx():
"""
MLE estimation of a Gaussian Process model with a squared exponential
correlation model (correxp2). Check random start optimization.
Test the interpolating property.
"""
f = lambda x: x * np.sin(x)
X = np.array([1., 3., 5., 6., 7., 8.])
y = f(X)
gp = GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-1, \
random_start=10, verbose=False).fit(X, y)
y_pred, MSE = gp.predict(X, eval_MSE=True)
assert (np.all(np.abs((y_pred - y) / y) < 1e-6) and np.all(MSE < 1e-6))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment