From 5b247f90e4b518ef7ce470bafa17f281de3a1c01 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <amueller@ais.uni-bonn.de>
Date: Sun, 28 Jul 2013 17:39:38 +0200
Subject: [PATCH] move around examples for better structure.

---
 doc/datasets/twenty_newsgroups.rst            |   8 +-
 doc/developers/performance.rst                |   2 +-
 doc/modules/clustering.rst                    |   2 +-
 doc/modules/cross_validation.rst              |   8 +-
 doc/modules/decomposition.rst                 |   2 +-
 doc/modules/feature_extraction.rst            |   6 +-
 doc/modules/feature_selection.rst             |  12 +-
 doc/modules/grid_search.rst                   |   6 +-
 doc/modules/lda_qda.rst                       |   6 +-
 doc/modules/learning_curve.rst                |  22 +--
 doc/modules/linear_model.rst                  |   2 +-
 doc/modules/model_evaluation.rst              |  40 ++---
 doc/modules/neural_networks.rst               |   6 +-
 doc/modules/pipeline.rst                      |   4 +-
 doc/modules/sgd.rst                           |   2 +-
 doc/tutorial/basic/tutorial.rst               |   4 +-
 .../text_analytics/working_with_text_data.rst |   2 +-
 doc/whats_new.rst                             |   4 +-
 examples/bicluster/README.txt                 |   2 +-
 examples/classification/README.txt            |   6 +
 .../plot_classification_probability.py        |   0
 .../plot_classifier_comparison.py             |   0
 .../plot_digits_classification.py             |   0
 examples/{ => classification}/plot_lda_qda.py |   0
 examples/cluster/README.txt                   |   2 +-
 examples/covariance/README.txt                |   2 +-
 examples/cross_decomposition/README.txt       |   2 +-
 examples/datasets/README.txt                  |   2 +-
 examples/decomposition/README.txt             |   2 +-
 examples/ensemble/README.txt                  |   2 +-
 examples/feature_selection/README.txt         |   6 +
 .../feature_selection_pipeline.py             |   0
 .../plot_feature_selection.py                 |   0
 ...lot_permutation_test_for_classification.py |   0
 .../plot_rfe_digits.py                        |   2 +-
 .../plot_rfe_with_cross_validation.py         |   0
 examples/gaussian_process/README.txt          |   2 +-
 examples/linear_model/README.txt              |   2 +-
 examples/manifold/README.txt                  |   2 +-
 examples/mixture/README.txt                   |   2 +-
 examples/model_selection/README.txt           |   7 +
 .../grid_search_digits.py                     |   0
 .../grid_search_text_feature_extraction.py    |   0
 .../plot_confusion_matrix.py                  |   0
 .../plot_learning_curve.py                    |   0
 .../plot_precision_recall.py                  |   0
 examples/{ => model_selection}/plot_roc.py    |   2 +-
 .../plot_roc_crossval.py                      |   2 +-
 .../plot_train_error_vs_test_error.py         |   0
 .../plot_underfitting_overfitting.py          |   0
 .../plot_validation_curve.py                  |   0
 .../randomized_search.py                      |   0
 examples/neighbors/README.txt                 |   2 +-
 .../plot_rbm_logistic_classification.py       |   0
 examples/semi_supervised/README.txt           |   2 +-
 examples/svm/README.txt                       |   2 +-
 examples/text/README.txt                      |   6 +
 .../document_classification_20newsgroups.py   |   0
 examples/{ => text}/document_clustering.py    |   0
 .../{ => text}/hashing_vs_dict_vectorizer.py  |   0
 .../mlcomp_sparse_document_classification.py  | 145 ++++++++++++++++++
 examples/tree/README.txt                      |   2 +-
 62 files changed, 257 insertions(+), 87 deletions(-)
 create mode 100644 examples/classification/README.txt
 rename examples/{ => classification}/plot_classification_probability.py (100%)
 rename examples/{ => classification}/plot_classifier_comparison.py (100%)
 rename examples/{ => classification}/plot_digits_classification.py (100%)
 rename examples/{ => classification}/plot_lda_qda.py (100%)
 create mode 100644 examples/feature_selection/README.txt
 rename examples/{ => feature_selection}/feature_selection_pipeline.py (100%)
 rename examples/{ => feature_selection}/plot_feature_selection.py (100%)
 rename examples/{ => feature_selection}/plot_permutation_test_for_classification.py (100%)
 rename examples/{ => feature_selection}/plot_rfe_digits.py (90%)
 rename examples/{ => feature_selection}/plot_rfe_with_cross_validation.py (100%)
 create mode 100644 examples/model_selection/README.txt
 rename examples/{ => model_selection}/grid_search_digits.py (100%)
 rename examples/{ => model_selection}/grid_search_text_feature_extraction.py (100%)
 rename examples/{ => model_selection}/plot_confusion_matrix.py (100%)
 rename examples/{ => model_selection}/plot_learning_curve.py (100%)
 rename examples/{ => model_selection}/plot_precision_recall.py (100%)
 rename examples/{ => model_selection}/plot_roc.py (98%)
 rename examples/{ => model_selection}/plot_roc_crossval.py (98%)
 rename examples/{ => model_selection}/plot_train_error_vs_test_error.py (100%)
 rename examples/{ => model_selection}/plot_underfitting_overfitting.py (100%)
 rename examples/{ => model_selection}/plot_validation_curve.py (100%)
 rename examples/{ => model_selection}/randomized_search.py (100%)
 rename examples/{ => neural_networks}/plot_rbm_logistic_classification.py (100%)
 create mode 100644 examples/text/README.txt
 rename examples/{ => text}/document_classification_20newsgroups.py (100%)
 rename examples/{ => text}/document_clustering.py (100%)
 rename examples/{ => text}/hashing_vs_dict_vectorizer.py (100%)
 create mode 100644 examples/text/mlcomp_sparse_document_classification.py

diff --git a/doc/datasets/twenty_newsgroups.rst b/doc/datasets/twenty_newsgroups.rst
index 8b5fc72923..3e40a0aaf0 100644
--- a/doc/datasets/twenty_newsgroups.rst
+++ b/doc/datasets/twenty_newsgroups.rst
@@ -134,7 +134,7 @@ which is fast to train and achieves a decent F-score::
   >>> metrics.f1_score(newsgroups_test.target, pred)
   0.88251152461278892
 
-(The example :ref:`example_document_classification_20newsgroups.py` shuffles
+(The example :ref:`example_text_document_classification_20newsgroups.py` shuffles
 the training and test data, instead of segmenting by time, and in that case
 multinomial Naive Bayes gets a much higher F-score of 0.88. Are you suspicious
 yet of what's going on inside this classifier?)
@@ -200,7 +200,7 @@ It loses even more if we also strip this metadata from the training data:
   0.73160869205141166
 
 Some other classifiers cope better with this harder version of the task. Try
-running :ref:`example_grid_search_text_feature_extraction.py` with and without
+running :ref:`example_model_selection_grid_search_text_feature_extraction.py` with and without
 the ``--filter`` option to compare the results.
 
 .. topic:: Recommendation
@@ -212,6 +212,6 @@ the ``--filter`` option to compare the results.
 
 .. topic:: Examples
 
-   * :ref:`example_grid_search_text_feature_extraction.py`
+   * :ref:`example_model_selection_grid_search_text_feature_extraction.py`
 
-   * :ref:`example_document_classification_20newsgroups.py`
+   * :ref:`example_text_document_classification_20newsgroups.py`
diff --git a/doc/developers/performance.rst b/doc/developers/performance.rst
index da9bf83cf6..47992212e8 100644
--- a/doc/developers/performance.rst
+++ b/doc/developers/performance.rst
@@ -119,7 +119,7 @@ for interactively exploring the relevant part for the code.
 
 Suppose we want to profile the Non Negative Matrix Factorization module
 of the scikit. Let us setup a new IPython session and load the digits
-dataset and as in the :ref:`example_plot_digits_classification.py` example::
+dataset and as in the :ref:`example_classification_plot_digits_classification.py` example::
 
   In [1]: from sklearn.decomposition import NMF
 
diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
index 4cb91923aa..94701ed2a7 100644
--- a/doc/modules/clustering.rst
+++ b/doc/modules/clustering.rst
@@ -256,7 +256,7 @@ small, as shown in the example and cited reference.
  * :ref:`example_cluster_plot_mini_batch_kmeans.py`: Comparison of KMeans and
    MiniBatchKMeans
 
- * :ref:`example_document_clustering.py`: Document clustering using sparse
+ * :ref:`example_text_document_clustering.py`: Document clustering using sparse
    MiniBatchKMeans
 
  * :ref:`example_cluster_plot_dict_face_patches.py`
diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index f802a39313..129ff19d3a 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -148,10 +148,10 @@ The available cross validation iterators are introduced in the following.
 
 .. topic:: Examples
 
-    * :ref:`example_plot_roc_crossval.py`,
-    * :ref:`example_plot_rfe_with_cross_validation.py`,
-    * :ref:`example_grid_search_digits.py`,
-    * :ref:`example_grid_search_text_feature_extraction.py`,
+    * :ref:`example_model_selection_plot_roc_crossval.py`,
+    * :ref:`example_feature_selection_plot_rfe_with_cross_validation.py`,
+    * :ref:`example_model_selection_grid_search_digits.py`,
+    * :ref:`example_model_selection_grid_search_text_feature_extraction.py`,
 
 
 Cross validation iterators
diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst
index 74c47b4a2c..227fad5eed 100644
--- a/doc/modules/decomposition.rst
+++ b/doc/modules/decomposition.rst
@@ -312,7 +312,7 @@ compensating for LSA's erroneous assumptions about textual data.
 
 .. topic:: Examples:
 
-   * :ref:`example_document_clustering.py`
+   * :ref:`example_text_document_clustering.py`
 
 .. topic:: References:
 
diff --git a/doc/modules/feature_extraction.rst b/doc/modules/feature_extraction.rst
index b757883fb0..c8359b103d 100644
--- a/doc/modules/feature_extraction.rst
+++ b/doc/modules/feature_extraction.rst
@@ -471,7 +471,7 @@ As usual the best way to adjust the feature extraction parameters
 is to use a cross-validated grid search, for instance by pipelining the
 feature extractor with a classifier:
 
- * :ref:`example_grid_search_text_feature_extraction.py`
+ * :ref:`example_model_selection_grid_search_text_feature_extraction.py`
 
 
 Decoding text files
@@ -565,12 +565,12 @@ In particular in a **supervised setting** it can be successfully combined
 with fast and scalable linear models to train **document classifiers**,
 for instance:
 
- * :ref:`example_document_classification_20newsgroups.py`
+ * :ref:`example_text_document_classification_20newsgroups.py`
 
 In an **unsupervised setting** it can be used to group similar documents
 together by applying clustering algorithms such as :ref:`k_means`:
 
-  * :ref:`example_document_clustering.py`
+  * :ref:`example_text_document_clustering.py`
 
 Finally it is possible to discover the main topics of a corpus by
 relaxing the hard assignment constraint of clustering, for instance by
diff --git a/doc/modules/feature_selection.rst b/doc/modules/feature_selection.rst
index 0fc8a5abf0..feaaa06b73 100644
--- a/doc/modules/feature_selection.rst
+++ b/doc/modules/feature_selection.rst
@@ -99,7 +99,7 @@ univariate p-values:
 
 .. topic:: Examples:
 
-    :ref:`example_plot_feature_selection.py`
+    :ref:`example_feature_selection_plot_feature_selection.py`
 
 
 Recursive feature elimination
@@ -119,10 +119,10 @@ number of features.
 
 .. topic:: Examples:
 
-    * :ref:`example_plot_rfe_digits.py`: A recursive feature elimination example
+    * :ref:`example_feature_selection_plot_rfe_digits.py`: A recursive feature elimination example
       showing the relevance of pixels in a digit classification task.
 
-    * :ref:`example_plot_rfe_with_cross_validation.py`: A recursive feature
+    * :ref:`example_feature_selection_plot_rfe_with_cross_validation.py`: A recursive feature
       elimination example with automatic tuning of the number of features
       selected with cross-validation.
 
@@ -162,7 +162,7 @@ alpha parameter, the fewer features selected.
 
 .. topic:: Examples:
 
-    * :ref:`example_document_classification_20newsgroups.py`: Comparison
+    * :ref:`example_text_document_classification_20newsgroups.py`: Comparison
       of different algorithms for document classification including L1-based
       feature selection.
 
@@ -210,8 +210,8 @@ settings, using the Lasso, while :class:`RandomizedLogisticRegression` uses the
 logistic regression and is suitable for classification tasks.  To get a full
 path of stability scores you can use :func:`lasso_stability_path`.
 
-.. figure:: ../auto_examples/linear_model/images/plot_sparse_recovery_002.png
-   :target: ../auto_examples/linear_model/plot_sparse_recovery.html
+.. figure:: ../auto_examples/linear_model/linear_model.png
+   :target: ../auto_examples/linear_model/linear_model.html
    :align: center
    :scale: 60
 
diff --git a/doc/modules/grid_search.rst b/doc/modules/grid_search.rst
index 01dffc68a2..39cdc43dcb 100644
--- a/doc/modules/grid_search.rst
+++ b/doc/modules/grid_search.rst
@@ -88,10 +88,10 @@ can be specified via the ``scoring`` parameter to :class:`GridSearchCV`.  See
 
 .. topic:: Examples:
 
-    - See :ref:`example_grid_search_digits.py` for an example of
+    - See :ref:`example_model_selection_grid_search_digits.py` for an example of
       Grid Search computation on the digits dataset.
 
-    - See :ref:`example_grid_search_text_feature_extraction.py` for an example
+    - See :ref:`example_model_selection_grid_search_text_feature_extraction.py` for an example
       of Grid Search coupling parameters from a text documents feature
       extractor (n-gram count vectorizer and TF-IDF transformer) with a
       classifier (here a linear SVM trained with SGD with either elastic
@@ -145,7 +145,7 @@ increasing ``n_iter`` will always lead to a finer search.
 
 .. topic:: Examples:
 
-    * :ref:`example_randomized_search.py` compares the usage and efficiency
+    * :ref:`example_model_selection_randomized_search.py` compares the usage and efficiency
       of randomized search and grid search.
 
 .. topic:: References:
diff --git a/doc/modules/lda_qda.rst b/doc/modules/lda_qda.rst
index 2706cbb405..77dd9df5bd 100644
--- a/doc/modules/lda_qda.rst
+++ b/doc/modules/lda_qda.rst
@@ -16,8 +16,8 @@ can be easily computed, are inherently multiclass,
 and have proven to work well in practice.
 Also there are no parameters to tune for these algorithms.
 
-.. |ldaqda| image:: ../auto_examples/images/plot_lda_qda_001.png
-        :target: ../auto_examples/plot_lda_qda.html
+.. |ldaqda| image:: ../auto_examples/classification/images/plot_lda_qda_001.png
+        :target: ../auto_examples/classification/plot_lda_qda.html
         :scale: 80
 
 .. centered:: |ldaqda|
@@ -28,7 +28,7 @@ quadratic boundaries and is therefore more flexible.
 
 .. topic:: Examples:
 
-    :ref:`example_plot_lda_qda.py`: Comparison of LDA and QDA on synthetic data.
+    :ref:`example_classification_plot_lda_qda.py`: Comparison of LDA and QDA on synthetic data.
 
 .. topic:: References:
 
diff --git a/doc/modules/learning_curve.rst b/doc/modules/learning_curve.rst
index 19b9794546..4cd655fdbb 100644
--- a/doc/modules/learning_curve.rst
+++ b/doc/modules/learning_curve.rst
@@ -21,8 +21,8 @@ the second estimator approximates it almost perfectly and the last estimator
 approximates the training data perfectly but does not fit the true function
 very well, i.e. it is very sensitive to varying training data (high variance).
 
-.. figure:: ../auto_examples/images/plot_underfitting_overfitting_001.png
-   :target: ../auto_examples/plot_underfitting_overfitting.html
+.. figure:: ../auto_examples/model_selection/images/plot_underfitting_overfitting_001.png
+   :target: ../auto_examples/model_selection/plot_underfitting_overfitting.html
    :align: center
    :scale: 50%
 
@@ -41,9 +41,9 @@ this reason, it is often helpful to use the tools described below.
 
 .. topic:: Examples:
 
-   * :ref:`example_linear_model_plot_polynomial_regression.py`
-   * :ref:`example_plot_validation_curve.py`
-   * :ref:`example_plot_learning_curve.py`
+   * :ref:`example_model_selection_plot_underfitting_overfitting.py`
+   * :ref:`example_model_selection_plot_validation_curve.py`
+   * :ref:`example_model_selection_plot_learning_curve.py`
 
 
 .. _validation_curve:
@@ -98,8 +98,8 @@ training score and a high validation score is usually not possible. All three
 cases can be found in the plot below where we vary the parameter
 :math:`\gamma` of an SVM on the digits dataset.
 
-.. figure:: ../auto_examples/images/plot_validation_curve_001.png
-   :target: ../auto_examples/plot_validation_curve.html
+.. figure:: ../auto_examples/model_selection/images/plot_validation_curve_001.png
+   :target: ../auto_examples/model_selection/plot_validation_curve.html
    :align: center
    :scale: 50%
 
@@ -118,8 +118,8 @@ size of the training set, we will not benefit much from more training data.
 In the following plot you can see an example: naive Bayes roughly converges
 to a low score.
 
-.. figure:: ../auto_examples/images/plot_learning_curve_001.png
-   :target: ../auto_examples/plot_learning_curve.html
+.. figure:: ../auto_examples/model_selection/images/plot_learning_curve_001.png
+   :target: ../auto_examples/model_selection/plot_learning_curve.html
    :align: center
    :scale: 50%
 
@@ -130,8 +130,8 @@ the maximum number of training samples, adding more training samples will
 most likely increase generalization. In the following plot you can see that
 the SVM could benefit from more training examples.
 
-.. figure:: ../auto_examples/images/plot_learning_curve_002.png
-   :target: ../auto_examples/plot_learning_curve.html
+.. figure:: ../auto_examples/model_selection/images/plot_learning_curve_002.png
+   :target: ../auto_examples/model_selection/plot_learning_curve.html
    :align: center
    :scale: 50%
 
diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 47989c7211..5a1b8580d8 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -114,7 +114,7 @@ its ``coef_`` member::
 .. topic:: Examples:
 
    * :ref:`example_linear_model_plot_ridge_path.py`
-   * :ref:`example_document_classification_20newsgroups.py`
+   * :ref:`example_text_document_classification_20newsgroups.py`
 
 
 Ridge Complexity
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index e2657789ce..c13ca6d232 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -266,7 +266,7 @@ In the multilabel case with binary label indicators: ::
 
 .. topic:: Example:
 
-  * See :ref:`example_plot_permutation_test_for_classification.py`
+  * See :ref:`example_feature_selection_plot_permutation_test_for_classification.py`
     for an example of accuracy score usage using permutations of
     the dataset.
 
@@ -290,24 +290,24 @@ predicted to be in group :math:`j`. Here an example of such confusion matrix::
          [1, 0, 2]])
 
 Here a visual representation of such confusion matrix (this figure comes
-from the :ref:`example_plot_confusion_matrix.py` example):
+from the :ref:`example_model_selection_plot_confusion_matrix.py` example):
 
-.. image:: ../auto_examples/images/plot_confusion_matrix_001.png
-   :target: ../auto_examples/plot_confusion_matrix.html
+.. image:: ../auto_examples/model_selection/images/plot_confusion_matrix_001.png
+   :target: ../auto_examples/model_selection/plot_confusion_matrix.html
    :scale: 75
    :align: center
 
 .. topic:: Example:
 
-  * See :ref:`example_plot_confusion_matrix.py`
+  * See :ref:`example_model_selection_plot_confusion_matrix.py`
     for an example of confusion matrix usage to evaluate the quality of the
     output of a classifier.
 
-  * See :ref:`example_plot_digits_classification.py`
+  * See :ref:`example_classification_plot_digits_classification.py`
     for an example of confusion matrix usage in the classification of
     hand-written digits.
 
-  * See :ref:`example_document_classification_20newsgroups.py`
+  * See :ref:`example_text_document_classification_20newsgroups.py`
     for an example of confusion matrix usage in the classification of text
     documents.
 
@@ -335,15 +335,15 @@ and inferred labels::
 
 .. topic:: Example:
 
-  * See :ref:`example_plot_digits_classification.py`
+  * See :ref:`example_classification_plot_digits_classification.py`
     for an example of classification report usage in the classification of the
     hand-written digits.
 
-  * See :ref:`example_document_classification_20newsgroups.py`
+  * See :ref:`example_text_document_classification_20newsgroups.py`
     for an example of classification report usage in the classification of text
     documents.
 
-  * See :ref:`example_grid_search_digits.py`
+  * See :ref:`example_model_selection_grid_search_digits.py`
     for an example of classification report usage in parameter estimation using
     grid search with a nested cross-validation.
 
@@ -477,15 +477,15 @@ binary classification and multilabel indicator format.
 
 .. topic:: Examples:
 
-  * See :ref:`example_document_classification_20newsgroups.py`
+  * See :ref:`example_text_document_classification_20newsgroups.py`
     for an example of :func:`f1_score` usage with classification of text
     documents.
 
-  * See :ref:`example_grid_search_digits.py`
+  * See :ref:`example_model_selection_grid_search_digits.py`
     for an example of :func:`precision_score` and :func:`recall_score` usage
     in parameter estimation using grid search with a nested cross-validation.
 
-  * See :ref:`example_plot_precision_recall.py`
+  * See :ref:`example_model_selection_plot_precision_recall.py`
     for an example of precision-Recall metric to evaluate the quality of the
     output of a classifier with :func:`precision_recall_curve`.
 
@@ -794,8 +794,8 @@ Here a small example of how to use the :func:`roc_curve` function::
 
 The following figure shows an example of such ROC curve.
 
-.. image:: ../auto_examples/images/plot_roc_001.png
-   :target: ../auto_examples/plot_roc.html
+.. image:: ../auto_examples/model_selection/images/plot_roc_001.png
+   :target: ../auto_examples/model_selection/plot_roc.html
    :scale: 75
    :align: center
 
@@ -835,18 +835,18 @@ F1 score, ROC AUC doesn't require to optimize a threshold for each label. The
 if predicted outputs have been binarized.
 
 
-.. image:: ../auto_examples/images/plot_roc_002.png
-   :target: ../auto_examples/plot_roc.html
+.. image:: ../auto_examples/model_selection/images/plot_roc_002.png
+   :target: ../auto_examples/model_selection/plot_roc.html
    :scale: 75
    :align: center
 
 .. topic:: Examples:
 
-  * See :ref:`example_plot_roc.py`
+  * See :ref:`example_model_selection_plot_roc.py`
     for an example of receiver operating characteristic (ROC) metric to
     evaluate the quality of the output of a classifier.
 
-  * See :ref:`example_plot_roc_crossval.py`
+  * See :ref:`example_model_selection_plot_roc_crossval.py`
     for an example of receiver operating characteristic (ROC) metric to
     evaluate the quality of the output of a classifier using cross-validation.
 
@@ -895,7 +895,7 @@ In the multilabel case with binary label indicators: ::
 
 .. topic:: Example:
 
-  * See :ref:`example_plot_rfe_with_cross_validation.py`
+  * See :ref:`example_feature_selection_plot_rfe_with_cross_validation.py`
     for an example of the zero one loss usage to perform recursive feature
     elimination with cross-validation.
 
diff --git a/doc/modules/neural_networks.rst b/doc/modules/neural_networks.rst
index 7519ba01a1..9924e76eed 100644
--- a/doc/modules/neural_networks.rst
+++ b/doc/modules/neural_networks.rst
@@ -32,14 +32,14 @@ density estimation.
 The method gained popularity for initializing deep neural networks with the
 weights of independent RBMs. This method is known as unsupervised pre-training.
 
-.. figure:: ../auto_examples/images/plot_rbm_logistic_classification_001.png
-   :target: ../auto_examples/plot_rbm_logistic_classification.html
+.. figure:: ../auto_examples/neural_networks/images/plot_rbm_logistic_classification_001.png
+   :target: ../auto_examples/neural_networks/plot_rbm_logistic_classification.html
    :align: center
    :scale: 100%
 
 .. topic:: Examples:
 
-   * :ref:`example_plot_rbm_logistic_classification.py`
+   * :ref:`example_neural_networks_plot_rbm_logistic_classification.py`
 
 
 Graphical model and parametrization
diff --git a/doc/modules/pipeline.rst b/doc/modules/pipeline.rst
index a98fe091b2..dcf0e317d6 100644
--- a/doc/modules/pipeline.rst
+++ b/doc/modules/pipeline.rst
@@ -85,8 +85,8 @@ This is particularly important for doing grid searches::
 
 .. topic:: Examples:
 
- * :ref:`example_feature_selection_pipeline.py`
- * :ref:`example_grid_search_text_feature_extraction.py`
+ * :ref:`example_feature_selection_feature_selection_pipeline.py`
+ * :ref:`example_model_selection_grid_search_text_feature_extraction.py`
  * :ref:`example_plot_digits_pipe.py`
  * :ref:`example_plot_kernel_approximation.py`
  * :ref:`example_svm_plot_svm_anova.py`
diff --git a/doc/modules/sgd.rst b/doc/modules/sgd.rst
index 7f8fb758cc..249b82c2cc 100644
--- a/doc/modules/sgd.rst
+++ b/doc/modules/sgd.rst
@@ -199,7 +199,7 @@ matrix format as defined in `scipy.sparse.csr_matrix
 
 .. topic:: Examples:
 
- - :ref:`example_document_classification_20newsgroups.py`
+ - :ref:`example_text_document_classification_20newsgroups.py`
 
 Complexity
 ==========
diff --git a/doc/tutorial/basic/tutorial.rst b/doc/tutorial/basic/tutorial.rst
index bb6c1fd943..5ac2f61c19 100644
--- a/doc/tutorial/basic/tutorial.rst
+++ b/doc/tutorial/basic/tutorial.rst
@@ -133,7 +133,7 @@ learn::
              [  0.,   0.,   6.,  13.,  10.,   0.,   0.,   0.]])
 
     The :ref:`simple example on this dataset
-    <example_plot_digits_classification.py>` illustrates how starting
+    <example_classification_plot_digits_classification.py>` illustrates how starting
     from the original problem one can shape the data for consumption in
     scikit-learn.
 
@@ -199,7 +199,7 @@ resolution. Do you agree with the classifier?
 
 A complete example of this classification problem is available as an
 example that you can run and study:
-:ref:`example_plot_digits_classification.py`.
+:ref:`example_classification_plot_digits_classification.py`.
 
 
 Model persistence
diff --git a/doc/tutorial/text_analytics/working_with_text_data.rst b/doc/tutorial/text_analytics/working_with_text_data.rst
index 8128d762d0..b8cdae43f4 100644
--- a/doc/tutorial/text_analytics/working_with_text_data.rst
+++ b/doc/tutorial/text_analytics/working_with_text_data.rst
@@ -545,7 +545,7 @@ upon the completion of this tutorial:
   :class:`CountVectorizer`
 
 * If you don't have labels, try using
-  :ref:`Clustering <example_document_clustering.py>`
+  :ref:`Clustering <example_text_document_clustering.py>`
   on your problem.
 
 * If you have multiple labels per document, e.g categories, have a look
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index e6351f5daf..7ac7c3db1b 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -90,7 +90,7 @@ New features
 
    - Added :func:`learning_curve <learning_curve.learning_curve>` utility to
      chart performance with respect to training size. See
-     :ref:`example_plot_learning_curve.py`. By Alexander Fabisch.
+     :ref:`example_model_selection_plot_learning_curve.py`. By Alexander Fabisch.
 
    - Add positive option in :class:`LassoCV <linear_model.LassoCV>` and
      :class:`ElasticNetCV <linear_model.ElasticNetCV>`.
@@ -2574,7 +2574,7 @@ Examples
 
     - new examples using some of the mlcomp datasets:
       ``example_mlcomp_sparse_document_classification.py`` (since removed) and
-      :ref:`example_document_classification_20newsgroups.py`
+      :ref:`example_text_document_classification_20newsgroups.py`
 
     - Many more examples. `See here
       <http://scikit-learn.org/stable/auto_examples/index.html>`_
diff --git a/examples/bicluster/README.txt b/examples/bicluster/README.txt
index 657ff06dbd..468e2524eb 100644
--- a/examples/bicluster/README.txt
+++ b/examples/bicluster/README.txt
@@ -3,4 +3,4 @@
 Biclustering
 ------------
 
-Examples concerning the :mod:`sklearn.cluster.bicluster` package.
+Examples concerning the :mod:`sklearn.cluster.bicluster` module.
diff --git a/examples/classification/README.txt b/examples/classification/README.txt
new file mode 100644
index 0000000000..17743d52b2
--- /dev/null
+++ b/examples/classification/README.txt
@@ -0,0 +1,6 @@
+.. _classification_examples:
+
+Classification
+-----------------------
+
+General examples about classification algorithms.
diff --git a/examples/plot_classification_probability.py b/examples/classification/plot_classification_probability.py
similarity index 100%
rename from examples/plot_classification_probability.py
rename to examples/classification/plot_classification_probability.py
diff --git a/examples/plot_classifier_comparison.py b/examples/classification/plot_classifier_comparison.py
similarity index 100%
rename from examples/plot_classifier_comparison.py
rename to examples/classification/plot_classifier_comparison.py
diff --git a/examples/plot_digits_classification.py b/examples/classification/plot_digits_classification.py
similarity index 100%
rename from examples/plot_digits_classification.py
rename to examples/classification/plot_digits_classification.py
diff --git a/examples/plot_lda_qda.py b/examples/classification/plot_lda_qda.py
similarity index 100%
rename from examples/plot_lda_qda.py
rename to examples/classification/plot_lda_qda.py
diff --git a/examples/cluster/README.txt b/examples/cluster/README.txt
index ea95bec9f6..13f62220d6 100644
--- a/examples/cluster/README.txt
+++ b/examples/cluster/README.txt
@@ -3,4 +3,4 @@
 Clustering
 ----------
 
-Examples concerning the :mod:`sklearn.cluster` package.
+Examples concerning the :mod:`sklearn.cluster` module.
diff --git a/examples/covariance/README.txt b/examples/covariance/README.txt
index 8fe1c89787..283f01590b 100644
--- a/examples/covariance/README.txt
+++ b/examples/covariance/README.txt
@@ -3,4 +3,4 @@
 Covariance estimation
 ---------------------
 
-Examples concerning the :mod:`sklearn.covariance` package.
+Examples concerning the :mod:`sklearn.covariance` module.
diff --git a/examples/cross_decomposition/README.txt b/examples/cross_decomposition/README.txt
index ed6cd9b807..07649ffbb6 100644
--- a/examples/cross_decomposition/README.txt
+++ b/examples/cross_decomposition/README.txt
@@ -3,5 +3,5 @@
 Cross decomposition
 -------------------
 
-Examples concerning the :mod:`sklearn.cross_decomposition` package.
+Examples concerning the :mod:`sklearn.cross_decomposition` module.
 
diff --git a/examples/datasets/README.txt b/examples/datasets/README.txt
index 1afa9ed132..cd3354d7e9 100644
--- a/examples/datasets/README.txt
+++ b/examples/datasets/README.txt
@@ -3,4 +3,4 @@
 Dataset examples
 -----------------------
 
-Examples concerning the :mod:`sklearn.datasets` package.
+Examples concerning the :mod:`sklearn.datasets` module.
diff --git a/examples/decomposition/README.txt b/examples/decomposition/README.txt
index 3fbad80cd5..73014f768f 100644
--- a/examples/decomposition/README.txt
+++ b/examples/decomposition/README.txt
@@ -3,5 +3,5 @@
 Decomposition
 -------------
 
-Examples concerning the :mod:`sklearn.decomposition` package.
+Examples concerning the :mod:`sklearn.decomposition` module.
 
diff --git a/examples/ensemble/README.txt b/examples/ensemble/README.txt
index 086d323985..267211b5bb 100644
--- a/examples/ensemble/README.txt
+++ b/examples/ensemble/README.txt
@@ -3,4 +3,4 @@
 Ensemble methods
 ----------------
 
-Examples concerning the :mod:`sklearn.ensemble` package.
+Examples concerning the :mod:`sklearn.ensemble` module.
diff --git a/examples/feature_selection/README.txt b/examples/feature_selection/README.txt
new file mode 100644
index 0000000000..53f5df42d0
--- /dev/null
+++ b/examples/feature_selection/README.txt
@@ -0,0 +1,6 @@
+.. _feature_selection_examples:
+
+Feature Selection
+-----------------------
+
+Examples concerning the :mod:`sklearn.feature_selection` module.
diff --git a/examples/feature_selection_pipeline.py b/examples/feature_selection/feature_selection_pipeline.py
similarity index 100%
rename from examples/feature_selection_pipeline.py
rename to examples/feature_selection/feature_selection_pipeline.py
diff --git a/examples/plot_feature_selection.py b/examples/feature_selection/plot_feature_selection.py
similarity index 100%
rename from examples/plot_feature_selection.py
rename to examples/feature_selection/plot_feature_selection.py
diff --git a/examples/plot_permutation_test_for_classification.py b/examples/feature_selection/plot_permutation_test_for_classification.py
similarity index 100%
rename from examples/plot_permutation_test_for_classification.py
rename to examples/feature_selection/plot_permutation_test_for_classification.py
diff --git a/examples/plot_rfe_digits.py b/examples/feature_selection/plot_rfe_digits.py
similarity index 90%
rename from examples/plot_rfe_digits.py
rename to examples/feature_selection/plot_rfe_digits.py
index fa6b45d441..4716bdc82c 100644
--- a/examples/plot_rfe_digits.py
+++ b/examples/feature_selection/plot_rfe_digits.py
@@ -8,7 +8,7 @@ a digit classification task.
 
 .. note::
 
-    See also :ref:`example_plot_rfe_with_cross_validation.py`
+    See also :ref:`example_feature_selection_plot_rfe_with_cross_validation.py`
 
 """
 print(__doc__)
diff --git a/examples/plot_rfe_with_cross_validation.py b/examples/feature_selection/plot_rfe_with_cross_validation.py
similarity index 100%
rename from examples/plot_rfe_with_cross_validation.py
rename to examples/feature_selection/plot_rfe_with_cross_validation.py
diff --git a/examples/gaussian_process/README.txt b/examples/gaussian_process/README.txt
index c892678e89..5ee038e015 100644
--- a/examples/gaussian_process/README.txt
+++ b/examples/gaussian_process/README.txt
@@ -3,5 +3,5 @@
 Gaussian Process for Machine Learning
 -------------------------------------
 
-Examples concerning the :mod:`sklearn.gaussian_process` package.
+Examples concerning the :mod:`sklearn.gaussian_process` module.
 
diff --git a/examples/linear_model/README.txt b/examples/linear_model/README.txt
index 060f68e972..08ca5a5e9a 100644
--- a/examples/linear_model/README.txt
+++ b/examples/linear_model/README.txt
@@ -3,4 +3,4 @@
 Generalized Linear Models
 -------------------------
 
-Examples concerning the :mod:`sklearn.linear_model` package.
+Examples concerning the :mod:`sklearn.linear_model` module.
diff --git a/examples/manifold/README.txt b/examples/manifold/README.txt
index 2fc8b89aaf..bf12be84b2 100644
--- a/examples/manifold/README.txt
+++ b/examples/manifold/README.txt
@@ -3,5 +3,5 @@
 Manifold learning
 -----------------------
 
-Examples concerning the :mod:`sklearn.manifold` package.
+Examples concerning the :mod:`sklearn.manifold` module.
 
diff --git a/examples/mixture/README.txt b/examples/mixture/README.txt
index 70d3b872c5..f872af051e 100644
--- a/examples/mixture/README.txt
+++ b/examples/mixture/README.txt
@@ -3,4 +3,4 @@
 Gaussian Mixture Models
 -----------------------
 
-Examples concerning the :mod:`sklearn.mixture` package.
+Examples concerning the :mod:`sklearn.mixture` module.
diff --git a/examples/model_selection/README.txt b/examples/model_selection/README.txt
new file mode 100644
index 0000000000..553c6e7d64
--- /dev/null
+++ b/examples/model_selection/README.txt
@@ -0,0 +1,7 @@
+.. _model_selection_examples:
+
+Model Selection
+-----------------------
+
+Examples concerning model selection, mostly contained in the
+:mod:`sklearn.grid_search` and :mod:`sklearn.cross_validation` modules.
diff --git a/examples/grid_search_digits.py b/examples/model_selection/grid_search_digits.py
similarity index 100%
rename from examples/grid_search_digits.py
rename to examples/model_selection/grid_search_digits.py
diff --git a/examples/grid_search_text_feature_extraction.py b/examples/model_selection/grid_search_text_feature_extraction.py
similarity index 100%
rename from examples/grid_search_text_feature_extraction.py
rename to examples/model_selection/grid_search_text_feature_extraction.py
diff --git a/examples/plot_confusion_matrix.py b/examples/model_selection/plot_confusion_matrix.py
similarity index 100%
rename from examples/plot_confusion_matrix.py
rename to examples/model_selection/plot_confusion_matrix.py
diff --git a/examples/plot_learning_curve.py b/examples/model_selection/plot_learning_curve.py
similarity index 100%
rename from examples/plot_learning_curve.py
rename to examples/model_selection/plot_learning_curve.py
diff --git a/examples/plot_precision_recall.py b/examples/model_selection/plot_precision_recall.py
similarity index 100%
rename from examples/plot_precision_recall.py
rename to examples/model_selection/plot_precision_recall.py
diff --git a/examples/plot_roc.py b/examples/model_selection/plot_roc.py
similarity index 98%
rename from examples/plot_roc.py
rename to examples/model_selection/plot_roc.py
index 884aaa84cd..49ae4b5fe5 100644
--- a/examples/plot_roc.py
+++ b/examples/model_selection/plot_roc.py
@@ -25,7 +25,7 @@ each element of the label indicator matrix as a binary prediction
 .. note::
 
     See also :func:`sklearn.metrics.roc_auc_score`,
-             :ref:`example_plot_roc_crossval.py`.
+             :ref:`example_model_selection_plot_roc_crossval.py`.
 
 """
 print(__doc__)
diff --git a/examples/plot_roc_crossval.py b/examples/model_selection/plot_roc_crossval.py
similarity index 98%
rename from examples/plot_roc_crossval.py
rename to examples/model_selection/plot_roc_crossval.py
index 9557334ccd..0599813653 100644
--- a/examples/plot_roc_crossval.py
+++ b/examples/model_selection/plot_roc_crossval.py
@@ -26,7 +26,7 @@ different the splits generated by K-fold cross-validation are from one another.
 
     See also :func:`sklearn.metrics.auc_score`,
              :func:`sklearn.cross_validation.cross_val_score`,
-             :ref:`example_plot_roc.py`,
+             :ref:`example_model_selection_plot_roc.py`,
 
 """
 print(__doc__)
diff --git a/examples/plot_train_error_vs_test_error.py b/examples/model_selection/plot_train_error_vs_test_error.py
similarity index 100%
rename from examples/plot_train_error_vs_test_error.py
rename to examples/model_selection/plot_train_error_vs_test_error.py
diff --git a/examples/plot_underfitting_overfitting.py b/examples/model_selection/plot_underfitting_overfitting.py
similarity index 100%
rename from examples/plot_underfitting_overfitting.py
rename to examples/model_selection/plot_underfitting_overfitting.py
diff --git a/examples/plot_validation_curve.py b/examples/model_selection/plot_validation_curve.py
similarity index 100%
rename from examples/plot_validation_curve.py
rename to examples/model_selection/plot_validation_curve.py
diff --git a/examples/randomized_search.py b/examples/model_selection/randomized_search.py
similarity index 100%
rename from examples/randomized_search.py
rename to examples/model_selection/randomized_search.py
diff --git a/examples/neighbors/README.txt b/examples/neighbors/README.txt
index 346bcd8095..7271691258 100644
--- a/examples/neighbors/README.txt
+++ b/examples/neighbors/README.txt
@@ -3,4 +3,4 @@
 Nearest Neighbors
 -----------------------
 
-Examples concerning the :mod:`sklearn.neighbors` package.
+Examples concerning the :mod:`sklearn.neighbors` module.
diff --git a/examples/plot_rbm_logistic_classification.py b/examples/neural_networks/plot_rbm_logistic_classification.py
similarity index 100%
rename from examples/plot_rbm_logistic_classification.py
rename to examples/neural_networks/plot_rbm_logistic_classification.py
diff --git a/examples/semi_supervised/README.txt b/examples/semi_supervised/README.txt
index 0158b8b07d..345a400676 100644
--- a/examples/semi_supervised/README.txt
+++ b/examples/semi_supervised/README.txt
@@ -3,4 +3,4 @@
 Semi Supervised Classification
 ------------------------------
 
-Examples concerning the :mod:`sklearn.semi_supervised` package.
+Examples concerning the :mod:`sklearn.semi_supervised` module.
diff --git a/examples/svm/README.txt b/examples/svm/README.txt
index 7a5cbd939e..92ebb7401f 100644
--- a/examples/svm/README.txt
+++ b/examples/svm/README.txt
@@ -3,4 +3,4 @@
 Support Vector Machines
 -----------------------
 
-Examples concerning the :mod:`sklearn.svm` package.
+Examples concerning the :mod:`sklearn.svm` module.
diff --git a/examples/text/README.txt b/examples/text/README.txt
new file mode 100644
index 0000000000..97626f818e
--- /dev/null
+++ b/examples/text/README.txt
@@ -0,0 +1,6 @@
+.. _text_examples:
+
+Working with text documents
+----------------------------
+
+Examples concerning the :mod:`sklearn.feature_extraction.text` module.
diff --git a/examples/document_classification_20newsgroups.py b/examples/text/document_classification_20newsgroups.py
similarity index 100%
rename from examples/document_classification_20newsgroups.py
rename to examples/text/document_classification_20newsgroups.py
diff --git a/examples/document_clustering.py b/examples/text/document_clustering.py
similarity index 100%
rename from examples/document_clustering.py
rename to examples/text/document_clustering.py
diff --git a/examples/hashing_vs_dict_vectorizer.py b/examples/text/hashing_vs_dict_vectorizer.py
similarity index 100%
rename from examples/hashing_vs_dict_vectorizer.py
rename to examples/text/hashing_vs_dict_vectorizer.py
diff --git a/examples/text/mlcomp_sparse_document_classification.py b/examples/text/mlcomp_sparse_document_classification.py
new file mode 100644
index 0000000000..c1d2631453
--- /dev/null
+++ b/examples/text/mlcomp_sparse_document_classification.py
@@ -0,0 +1,145 @@
+"""
+========================================================
+Classification of text documents: using a MLComp dataset
+========================================================
+
+This is an example showing how the scikit-learn can be used to classify
+documents by topics using a bag-of-words approach. This example uses
+a scipy.sparse matrix to store the features instead of standard numpy arrays.
+
+The dataset used in this example is the 20 newsgroups dataset and should be
+downloaded from the http://mlcomp.org (free registration required):
+
+  http://mlcomp.org/datasets/379
+
+Once downloaded unzip the archive somewhere on your filesystem.
+For instance in::
+
+  % mkdir -p ~/data/mlcomp
+  % cd  ~/data/mlcomp
+  % unzip /path/to/dataset-379-20news-18828_XXXXX.zip
+
+You should get a folder ``~/data/mlcomp/379`` with a file named ``metadata``
+and subfolders ``raw``, ``train`` and ``test`` holding the text documents
+organized by newsgroups.
+
+Then set the ``MLCOMP_DATASETS_HOME`` environment variable pointing to
+the root folder holding the uncompressed archive::
+
+  % export MLCOMP_DATASETS_HOME="~/data/mlcomp"
+
+Then you are ready to run this example using your favorite python shell::
+
+  % ipython examples/mlcomp_sparse_document_classification.py
+
+"""
+
+# Author: Olivier Grisel <olivier.grisel@ensta.org>
+# License: BSD 3 clause
+
+from __future__ import print_function
+
+from time import time
+import sys
+import os
+import numpy as np
+import scipy.sparse as sp
+import pylab as pl
+
+from sklearn.datasets import load_mlcomp
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.linear_model import SGDClassifier
+from sklearn.metrics import confusion_matrix
+from sklearn.metrics import classification_report
+from sklearn.naive_bayes import MultinomialNB
+
+
+print(__doc__)
+
+if 'MLCOMP_DATASETS_HOME' not in os.environ:
+    print("MLCOMP_DATASETS_HOME not set; please follow the above instructions")
+    sys.exit(0)
+
+# Load the training set
+print("Loading 20 newsgroups training set... ")
+news_train = load_mlcomp('20news-18828', 'train')
+print(news_train.DESCR)
+print("%d documents" % len(news_train.filenames))
+print("%d categories" % len(news_train.target_names))
+
+print("Extracting features from the dataset using a sparse vectorizer")
+t0 = time()
+vectorizer = TfidfVectorizer(encoding='latin1')
+X_train = vectorizer.fit_transform((open(f).read()
+                                    for f in news_train.filenames))
+print("done in %fs" % (time() - t0))
+print("n_samples: %d, n_features: %d" % X_train.shape)
+assert sp.issparse(X_train)
+y_train = news_train.target
+
+print("Loading 20 newsgroups test set... ")
+news_test = load_mlcomp('20news-18828', 'test')
+t0 = time()
+print("done in %fs" % (time() - t0))
+
+print("Predicting the labels of the test set...")
+print("%d documents" % len(news_test.filenames))
+print("%d categories" % len(news_test.target_names))
+
+print("Extracting features from the dataset using the same vectorizer")
+t0 = time()
+X_test = vectorizer.transform((open(f).read() for f in news_test.filenames))
+y_test = news_test.target
+print("done in %fs" % (time() - t0))
+print("n_samples: %d, n_features: %d" % X_test.shape)
+
+
+###############################################################################
+# Benchmark classifiers
+def benchmark(clf_class, params, name):
+    print("parameters:", params)
+    t0 = time()
+    clf = clf_class(**params).fit(X_train, y_train)
+    print("done in %fs" % (time() - t0))
+
+    if hasattr(clf, 'coef_'):
+        print("Percentage of non zeros coef: %f"
+              % (np.mean(clf.coef_ != 0) * 100))
+    print("Predicting the outcomes of the testing set")
+    t0 = time()
+    pred = clf.predict(X_test)
+    print("done in %fs" % (time() - t0))
+
+    print("Classification report on test set for classifier:")
+    print(clf)
+    print()
+    print(classification_report(y_test, pred,
+                                target_names=news_test.target_names))
+
+    cm = confusion_matrix(y_test, pred)
+    print("Confusion matrix:")
+    print(cm)
+
+    # Show confusion matrix
+    pl.matshow(cm)
+    pl.title('Confusion matrix of the %s classifier' % name)
+    pl.colorbar()
+
+
+print("Testbenching a linear classifier...")
+parameters = {
+    'loss': 'hinge',
+    'penalty': 'l2',
+    'n_iter': 50,
+    'alpha': 0.00001,
+    'fit_intercept': True,
+}
+
+benchmark(SGDClassifier, parameters, 'SGD')
+
+print("Testbenching a MultinomialNB classifier...")
+parameters = {'alpha': 0.01}
+
+benchmark(MultinomialNB, parameters, 'MultinomialNB')
+
+pl.show()
diff --git a/examples/tree/README.txt b/examples/tree/README.txt
index fca9087bf1..004ed09c06 100644
--- a/examples/tree/README.txt
+++ b/examples/tree/README.txt
@@ -3,4 +3,4 @@
 Decision Trees
 --------------
 
-Examples concerning the :mod:`sklearn.tree` package.
+Examples concerning the :mod:`sklearn.tree` module.
-- 
GitLab