From e00fd83a8312a6023d16d31b11a94dcafa0071b6 Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Wed, 26 Nov 2014 09:38:35 +1100
Subject: [PATCH] DOC narrative docs for grid search's robustness to failure

---
 doc/modules/grid_search.rst          | 105 +++++++++++++++++----------
 doc/modules/kernel_approximation.rst |   2 +-
 sklearn/tests/test_grid_search.py    |   9 ++-
 3 files changed, 74 insertions(+), 42 deletions(-)

diff --git a/doc/modules/grid_search.rst b/doc/modules/grid_search.rst
index a6a384dd87..394e8119d2 100644
--- a/doc/modules/grid_search.rst
+++ b/doc/modules/grid_search.rst
@@ -29,16 +29,14 @@ A search consists of:
 - a cross-validation scheme; and
 - a :ref:`score function <gridsearch_scoring>`.
 
+Some models allow for specialized, efficient parameter search strategies,
+:ref:`outlined below <alternative_cv>`.
 Two generic approaches to sampling search candidates are provided in
 scikit-learn: for given values, :class:`GridSearchCV` exhaustively considers
 all parameter combinations, while :class:`RandomizedSearchCV` can sample a
 given number of candidates from a parameter space with a specified
-distribution.
-
-.. seealso
-
-   :ref:`pipeline` describes building composite estimators whose
-   parameter space can be searched with these tools.
+distribution. After describing these tools we detail
+:ref:`best practice <grid_search_tips>` applicable to both approaches.
 
 Exhaustive Grid Search
 ======================
@@ -61,36 +59,8 @@ The :class:`GridSearchCV` instance implements the usual estimator API: when
 "fitting" it on a dataset all the possible combinations of parameter values are
 evaluated and the best combination is retained.
 
-.. topic:: Model selection: development and evaluation
-
-  Model selection with ``GridSearchCV`` can be seen as a way to use the
-  labeled data to "train" the parameters of the grid.
-
-  When evaluating the resulting model it is important to do it on
-  held-out samples that were not seen during the grid search process:
-  it is recommended to split the data into a **development set** (to
-  be fed to the ``GridSearchCV`` instance) and an **evaluation set**
-  to compute performance metrics.
-
-  This can be done by using the :func:`cross_validation.train_test_split`
-  utility function.
-
 .. currentmodule:: sklearn.grid_search
 
-.. _gridsearch_scoring:
-
-Scoring functions for parameter search
---------------------------------------
-
-By default, :class:`GridSearchCV` uses the ``score`` function of the estimator
-to evaluate a parameter setting. These are the
-:func:`sklearn.metrics.accuracy_score` for classification and
-:func:`sklearn.metrics.r2_score` for regression.  For some applications, other
-scoring functions are better suited (for example in unbalanced classification,
-the accuracy score is often uninformative). An alternative scoring function
-can be specified via the ``scoring`` parameter to :class:`GridSearchCV`.  See
-:ref:`scoring_parameter` for more details.
-
 .. topic:: Examples:
 
     - See :ref:`example_model_selection_grid_search_digits.py` for an example of
@@ -102,12 +72,6 @@ can be specified via the ``scoring`` parameter to :class:`GridSearchCV`.  See
       classifier (here a linear SVM trained with SGD with either elastic
       net or L2 penalty) using a :class:`pipeline.Pipeline` instance.
 
-.. note::
-
-  Computations can be run in parallel if your OS supports it, by using
-  the keyword ``n_jobs=-1``, see function signature for more details.
-
-
 Randomized Parameter Optimization
 =================================
 While using a grid of parameter settings is currently the most widely used
@@ -159,6 +123,67 @@ increasing ``n_iter`` will always lead to a finer search.
       Random search for hyper-parameter optimization,
       The Journal of Machine Learning Research (2012)
 
+.. _grid_search_tips:
+
+Tips for parameter search
+=========================
+
+.. _gridsearch_scoring:
+
+Specifying an objective metric
+------------------------------
+
+By default, parameter search uses the ``score`` function of the estimator
+to evaluate a parameter setting. These are the
+:func:`sklearn.metrics.accuracy_score` for classification and
+:func:`sklearn.metrics.r2_score` for regression.  For some applications,
+other scoring functions are better suited (for example in unbalanced
+classification, the accuracy score is often uninformative). An alternative
+scoring function can be specified via the ``scoring`` parameter to
+:class:`GridSearchCV`, :class:`RandomizedSearchCV` and many of the
+specialized cross-validation tools described below.
+See :ref:`scoring_parameter` for more details.
+
+Composite estimators and parameter spaces
+-----------------------------------------
+
+:ref:`pipeline` describes building composite estimators whose
+parameter space can be searched with these tools.
+
+Model selection: development and evaluation
+-------------------------------------------
+
+Model selection by evaluating various parameter settings can be seen as a way
+to use the labeled data to "train" the parameters of the grid.
+
+When evaluating the resulting model it is important to do it on
+held-out samples that were not seen during the grid search process:
+it is recommended to split the data into a **development set** (to
+be fed to the ``GridSearchCV`` instance) and an **evaluation set**
+to compute performance metrics.
+
+This can be done by using the :func:`cross_validation.train_test_split`
+utility function.
+
+Parallelism
+-----------
+
+:class:`GridSearchCV` and :class:`RandomizedSearchCV` evaluate each parameter
+setting independently.  Computations can be run in parallel if your OS
+supports it, by using the keyword ``n_jobs=-1``. See function signature for
+more details.
+
+Robustness to failure
+---------------------
+
+Some parameter settings may result in a failure to ``fit`` one or more folds
+of the data.  By default, this will cause the entire search to fail, even if
+some parameter settings could be fully evaluated. Setting ``error_score=0``
+(or `=np.NaN`) will make the procedure robust to such failure, issuing a
+warning and setting the score for that fold to 0 (or `NaN`), but completing
+the search.
+
+.. _alternative_cv:
 
 Alternatives to brute force parameter search
 ============================================
diff --git a/doc/modules/kernel_approximation.rst b/doc/modules/kernel_approximation.rst
index 9f0cd4e439..7e33ce68df 100644
--- a/doc/modules/kernel_approximation.rst
+++ b/doc/modules/kernel_approximation.rst
@@ -25,7 +25,7 @@ In particular, the combination of kernel map approximations with
 Since there has not been much empirical work using approximate embeddings, it
 is advisable to compare results against exact kernel methods when possible.
 
-.. seealso
+.. seealso::
 
    :ref:`polynomial_regression` for an exact polynomial transformation.
 
diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py
index ed547c7b9c..b8f2018903 100644
--- a/sklearn/tests/test_grid_search.py
+++ b/sklearn/tests/test_grid_search.py
@@ -677,7 +677,6 @@ def test_grid_search_allows_nans():
     GridSearchCV(p, {'classifier__foo_param': [1, 2, 3]}, cv=2).fit(X, y)
 
 
-
 class FailingClassifier(BaseEstimator):
     """Classifier that raises a ValueError on fit()"""
 
@@ -721,6 +720,14 @@ def test_grid_search_failing_classifier():
                if this_point.parameters['parameter'] ==
                FailingClassifier.FAILING_PARAMETER)
 
+    gs = GridSearchCV(clf, [{'parameter': [0, 1, 2]}], scoring='accuracy',
+                      refit=False, error_score=float('nan'))
+    assert_warns(FitFailedWarning, gs.fit, X, y)
+    assert all(np.all(np.isnan(this_point.cv_validation_scores))
+               for this_point in gs.grid_scores_
+               if this_point.parameters['parameter'] ==
+               FailingClassifier.FAILING_PARAMETER)
+
 
 def test_grid_search_failing_classifier_raise():
     """GridSearchCV with on_error == 'raise' raises the error"""
-- 
GitLab