diff --git a/doc/modules/feature_selection.rst b/doc/modules/feature_selection.rst new file mode 100644 index 0000000000000000000000000000000000000000..8b47f61b18c37ce556482ea4c98bdb98994bdb70 --- /dev/null +++ b/doc/modules/feature_selection.rst @@ -0,0 +1,57 @@ + +======================= +Feature selection +======================= + + + +Univariate feature selection +============================= + +Univariate feature selection works by selecting features based on a +univariate test statistic. Although it can seen as a preprocessing step +to an estimator, `scikit.learn` exposes an object to wrap as existing +estimator with feature selection and expose a new estimator: + +.. autofunction:: scikits.learn.feature_select.univ_selection.UnivSelection + + + +Feature scoring functions +-------------------------- + +.. warning:: + + A common case of non-functionning for feature selection is to use a + regression scoring function with a classification problem. + +For classification +....................... + +.. autofunction:: scikits.learn.feature_select.univ_selection.f_classif + +For regression +................. + +.. autofunction:: scikits.learn.feature_select.univ_selection.f_regression + +Feature selection functions +---------------------------- + +.. autofunction:: scikits.learn.feature_select.univ_selection.select_k_best + +.. autofunction:: scikits.learn.feature_select.univ_selection.select_percentile + +.. autofunction:: scikits.learn.feature_select.univ_selection.select_fpr + +.. autofunction:: scikits.learn.feature_select.univ_selection.select_fdr + +.. autofunction:: scikits.learn.feature_select.univ_selection.select_fwe + + +Examples +---------- + +.. literalinclude:: ../../examples/feature_select.py + + diff --git a/doc/modules/glm.rst b/doc/modules/glm.rst index c32171ca1958468b0f7c44efa3b5a4da456335c4..ba1e3d14776beb1d6d51af691b192a5266e76153 100644 --- a/doc/modules/glm.rst +++ b/doc/modules/glm.rst @@ -25,4 +25,6 @@ Examples .. literalinclude:: ../../examples/lasso_enet_coordinate_descent.py -TODO: include image +.. image:: lasso_enet_coordinate_descent.png + :align: center + diff --git a/doc/modules/lasso_enet_coordinate_descent.png b/doc/modules/lasso_enet_coordinate_descent.png new file mode 100644 index 0000000000000000000000000000000000000000..23dae332a76ba443a5451297ce7e0ce0e82f53e7 Binary files /dev/null and b/doc/modules/lasso_enet_coordinate_descent.png differ diff --git a/examples/lasso_enet_coordinate_descent.py b/examples/lasso_enet_coordinate_descent.py index ca5814f8b24ded05c366130c0a86afeb84235374..f2ca536217977b5dfad8e7c8e9d09d109df64d9d 100644 --- a/examples/lasso_enet_coordinate_descent.py +++ b/examples/lasso_enet_coordinate_descent.py @@ -1,3 +1,8 @@ +""" +Lasso and elastic net (L1 and L2 penalisation) implemented using a +coordinate descent. +""" + # Author: Alexandre Gramfort <alexandre.gramfort@inria.fr> # License: BSD Style. @@ -6,69 +11,63 @@ from itertools import cycle import numpy as np import pylab as pl + from scikits.learn.glm.cd import Lasso, ElasticNet, lasso_path, enet_path -def demo_glm(): - """ - Sample usage of GLM to predict with a linear model. - It will plot the paths for the Lasso and the Elastic-Net problem - - """ - n_samples, n_features, maxit = 5, 10, 30 - np.random.seed(0) - y = np.random.randn(n_samples) - X = np.random.randn(n_samples, n_features) - - """Lasso model - """ - alpha = 1.0 - - lasso = Lasso(alpha=alpha) - lasso.fit(X, y, maxit=maxit) - - print "Duality gap Lasso (should be small): %f" % lasso.gap - - import pylab as pl - pl.close('all') - pl.plot(lasso.E) - pl.xlabel('Iteration') - pl.ylabel('Cost function') - pl.title('Lasso') - pl.show() - - """Elastic-Net model - """ - alpha = 1.0 - beta = 1.0 - - enet = ElasticNet(alpha=alpha, beta=beta) - enet.fit(X, y, maxit=maxit) - - print "Duality gap (should be small): %f" % enet.gap - - pl.figure() - pl.plot(enet.E) - pl.xlabel('Iteration') - pl.ylabel('Cost function') - pl.title('Elastic-Net') - pl.show() - - """Test path functions - """ - - alphas_lasso, weights_lasso = lasso_path(X, y, factor=0.97, n_alphas = 100) - alphas_enet, weights_enet = enet_path(X, y, factor=0.97, n_alphas = 100, beta=0.1) - - pl.figure() - color_iter = cycle(['b', 'g', 'r', 'c', 'm', 'y', 'k']) - for color, weight_lasso, weight_enet in zip(color_iter, weights_lasso.T, weights_enet.T): - pl.plot(-np.log(alphas_lasso), weight_lasso, color) - pl.plot(-np.log(alphas_enet), weight_enet, color+'x') - pl.xlabel('-log(lambda)') - pl.ylabel('weights') - pl.title('Lasso and Elastic-Net Paths') - pl.legend(['Lasso','Elastic-Net']) - pl.show() - -if __name__ == '__main__': - demo_glm() +n_samples, n_features, maxit = 5, 10, 30 + +np.random.seed(0) +y = np.random.randn(n_samples) +X = np.random.randn(n_samples, n_features) + +################################################################################ +# Fit models +################################################################################ + +# Lasso +lasso = Lasso(alpha=1) +lasso.fit(X, y, maxit=maxit) + +print "Duality gap Lasso (should be small): %f" % lasso.gap + + +# Elastic-Net +enet = ElasticNet(alpha=1, beta=1) +enet.fit(X, y, maxit=maxit) + +print "Duality gap (should be small): %f" % enet.gap + +# Display results +pl.figure(-1, figsize=(8, 4)) +pl.clf() +pl.subplots_adjust(wspace=.4, right=.95) +pl.subplot(1, 2, 1) +pl.plot(lasso.E, label='Lasso') +pl.plot(enet.E, label='Elastic Net') +pl.xlabel('Iteration') +pl.ylabel('Cost function') +pl.legend() +pl.title('Convergence') + +################################################################################ +# Demo path functions +################################################################################ + +alphas_lasso, weights_lasso = lasso_path(X, y, factor=0.97, n_alphas = 100) +alphas_enet, weights_enet = enet_path(X, y, factor=0.97, n_alphas = 100, + beta=0.1) + +# Display results +pl.subplot(1, 2, 2) +color_iter = cycle(['b', 'g', 'r', 'c', 'm', 'y', 'k']) +for color, weight_lasso, weight_enet in zip(color_iter, + weights_lasso.T, weights_enet.T): + pl.plot(-np.log(alphas_lasso), weight_lasso, color) + pl.plot(-np.log(alphas_enet), weight_enet, color+'x') + +pl.xlabel('-log(lambda)') +pl.ylabel('weights') +pl.title('Lasso and Elastic-Net Paths') +pl.legend(['Lasso','Elastic-Net']) +pl.show() +