From 2df6bfad588813a19dddc72979f6f5e8322fa15a Mon Sep 17 00:00:00 2001
From: Andreas Mueller <amueller@nyu.edu>
Date: Tue, 3 Nov 2015 17:43:46 -0500
Subject: [PATCH] DOC some fixes to the doc build.

---
 doc/datasets/rcv1.rst                     |  4 +-
 doc/modules/decomposition.rst             |  1 +
 doc/modules/feature_selection.rst         |  2 +
 doc/modules/multiclass.rst                |  8 +-
 doc/whats_new.rst                         |  7 +-
 examples/applications/face_recognition.py |  4 +-
 sklearn/cross_decomposition/pls_.py       | 23 +++---
 sklearn/datasets/descr/breast_cancer.rst  | 89 ++++++++++++-----------
 sklearn/datasets/descr/diabetes.rst       |  2 +-
 sklearn/datasets/descr/digits.rst         |  3 +-
 sklearn/datasets/descr/iris.rst           |  2 +
 sklearn/linear_model/base.py              |  2 +-
 sklearn/preprocessing/data.py             | 14 ++--
 sklearn/preprocessing/tests/test_data.py  |  8 +-
 14 files changed, 92 insertions(+), 77 deletions(-)

diff --git a/doc/datasets/rcv1.rst b/doc/datasets/rcv1.rst
index 486eeee905..ded38584ce 100644
--- a/doc/datasets/rcv1.rst
+++ b/doc/datasets/rcv1.rst
@@ -41,10 +41,10 @@ There are 103 topics, each represented by a string. Their corpus frequencies spa
     >>> rcv1.target_names[:3].tolist()  # doctest: +SKIP
     ['E11', 'ECAT', 'M11']
 
-The dataset will be downloaded from the `dataset's homepage`_ if necessary.
+The dataset will be downloaded from the `rcv1 homepage`_ if necessary.
 The compressed size is about 656 MB.
 
-.. _dataset's homepage: http://jmlr.csail.mit.edu/papers/volume5/lewis04a/
+.. _rcv1 homepage: http://jmlr.csail.mit.edu/papers/volume5/lewis04a/
 
 
 .. topic:: References
diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst
index 91d003ce70..f10e105664 100644
--- a/doc/modules/decomposition.rst
+++ b/doc/modules/decomposition.rst
@@ -776,6 +776,7 @@ a corpus with :math:`D` documents and :math:`K` topics:
   2. For each document :math:`d`, draw :math:`\theta_d \sim Dirichlet(\alpha), \: d=1...D`
 
   3. For each word :math:`i` in document :math:`d`:
+
     a. Draw a topic index :math:`z_{di} \sim Multinomial(\theta_d)`
     b. Draw the observed word :math:`w_{ij} \sim Multinomial(beta_{z_{di}}.)`
 
diff --git a/doc/modules/feature_selection.rst b/doc/modules/feature_selection.rst
index 88ff7d56d6..60e4d0a38f 100644
--- a/doc/modules/feature_selection.rst
+++ b/doc/modules/feature_selection.rst
@@ -153,6 +153,8 @@ For examples on how it is to be used refer to the sections below.
       most important features from the Boston dataset without knowing the
       threshold beforehand.
 
+.. _l1_feature_selection:
+
 L1-based feature selection
 --------------------------
 
diff --git a/doc/modules/multiclass.rst b/doc/modules/multiclass.rst
index 49ea0d588e..9db951f4c4 100644
--- a/doc/modules/multiclass.rst
+++ b/doc/modules/multiclass.rst
@@ -215,7 +215,7 @@ code book. The code size is the dimensionality of the aforementioned space.
 Intuitively, each class should be represented by a code as unique as
 possible and a good code book should be designed to optimize classification
 accuracy. In this implementation, we simply use a randomly-generated code
-book as advocated in [2]_ although more elaborate methods may be added in the
+book as advocated in [3]_ although more elaborate methods may be added in the
 future.
 
 At fitting time, one binary classifier per bit in the code book is fitted.
@@ -262,16 +262,16 @@ Below is an example of multiclass learning using Output-Codes::
 
 .. topic:: References:
 
-    .. [1] "Solving multiclass learning problems via error-correcting output codes",
+    .. [2] "Solving multiclass learning problems via error-correcting output codes",
         Dietterich T., Bakiri G.,
         Journal of Artificial Intelligence Research 2,
         1995.
 
-    .. [2] "The error coding method and PICTs",
+    .. [3] "The error coding method and PICTs",
         James G., Hastie T.,
         Journal of Computational and Graphical statistics 7,
         1998.
 
-    .. [3] "The Elements of Statistical Learning",
+    .. [4] "The Elements of Statistical Learning",
         Hastie T., Tibshirani R., Friedman J., page 606 (second-edition)
         2008.
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 88b8d47512..b5cf4a93bb 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -281,7 +281,7 @@ Bug fixes
 
     - Fixed bug in :class:`cross_decomposition.PLS` that yielded unstable and
       platform dependent output, and failed on `fit_transform`.
-       By `Arthur Mensch`_.
+      By `Arthur Mensch`_.
 
 API changes summary
 -------------------
@@ -3313,8 +3313,8 @@ Changelog
 
   - New :ref:`gaussian_process` module by Vincent Dubourg. This module
     also has great documentation and some very neat examples. See
-    :ref:`example_gaussian_process_plot_gp_regression.py` or
-    :ref:`example_gaussian_process_plot_gp_probabilistic_classification_after_regression.py`
+    example_gaussian_process_plot_gp_regression.py or
+    example_gaussian_process_plot_gp_probabilistic_classification_after_regression.py
     for a taste of what can be done.
 
   - It is now possible to use liblinear’s Multi-class SVC (option
@@ -3774,3 +3774,4 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
 .. _Graham Clenaghan: https://github.com/gclenaghan
 .. _Giorgio Patrini: https://github.com/giorgiop
 .. _Elvis Dohmatob: https://github.com/dohmatob
+.. _yelite: https://github.com/yelite
diff --git a/examples/applications/face_recognition.py b/examples/applications/face_recognition.py
index be466e9532..1385bdd1c7 100644
--- a/examples/applications/face_recognition.py
+++ b/examples/applications/face_recognition.py
@@ -12,8 +12,9 @@ The dataset used in this example is a preprocessed excerpt of the
 
 Expected results for the top 5 most represented people in the dataset::
 
+================== ============ ======= ========== =======
                    precision    recall  f1-score   support
-
+================== ============ ======= ========== =======
      Ariel Sharon       0.67      0.92      0.77        13
      Colin Powell       0.75      0.78      0.76        60
   Donald Rumsfeld       0.78      0.67      0.72        27
@@ -23,6 +24,7 @@ Gerhard Schroeder       0.76      0.76      0.76        25
        Tony Blair       0.81      0.69      0.75        36
 
       avg / total       0.80      0.80      0.80       322
+================== ============ ======= ========== =======
 
 """
 from __future__ import print_function
diff --git a/sklearn/cross_decomposition/pls_.py b/sklearn/cross_decomposition/pls_.py
index 46961c90b3..a6f27f0810 100644
--- a/sklearn/cross_decomposition/pls_.py
+++ b/sklearn/cross_decomposition/pls_.py
@@ -521,7 +521,8 @@ class PLSRegression(_PLS):
 
     Notes
     -----
-    Matrices :
+    Matrices::
+
         T: x_scores_
         U: y_scores_
         W: x_weights_
@@ -529,16 +530,17 @@ class PLSRegression(_PLS):
         P: x_loadings_
         Q: y_loadings__
 
-    Are computed such that:
+    Are computed such that::
+
         X = T P.T + Err and Y = U Q.T + Err
         T[:, k] = Xk W[:, k] for k in range(n_components)
         U[:, k] = Yk C[:, k] for k in range(n_components)
         x_rotations_ = W (P.T W)^(-1)
         y_rotations_ = C (Q.T C)^(-1)
+
     where Xk and Yk are residual matrices at iteration k.
 
-    Slides explaining PLS
-    :ref:http://www.eigenvector.com/Docs/Wise_pls_properties.pdf
+    `Slides explaining PLS <http://www.eigenvector.com/Docs/Wise_pls_properties.pdf>`
 
     For each component k, find weights u, v that optimizes:
     ``max corr(Xk u, Yk v) * std(Xk u) std(Yk u)``, such that ``|u| = 1``
@@ -655,7 +657,8 @@ class PLSCanonical(_PLS):
 
     Notes
     -----
-    Matrices :
+    Matrices::
+
         T: x_scores_
         U: y_scores_
         W: x_weights_
@@ -663,19 +666,21 @@ class PLSCanonical(_PLS):
         P: x_loadings_
         Q: y_loadings__
 
-    Are computed such that:
+    Are computed such that::
+
         X = T P.T + Err and Y = U Q.T + Err
         T[:, k] = Xk W[:, k] for k in range(n_components)
         U[:, k] = Yk C[:, k] for k in range(n_components)
         x_rotations_ = W (P.T W)^(-1)
         y_rotations_ = C (Q.T C)^(-1)
+
     where Xk and Yk are residual matrices at iteration k.
 
-    Slides explaining PLS
-    :ref:http://www.eigenvector.com/Docs/Wise_pls_properties.pdf
+    `Slides explaining PLS <http://www.eigenvector.com/Docs/Wise_pls_properties.pdf>`
 
     For each component k, find weights u, v that optimize::
-    max corr(Xk u, Yk v) * std(Xk u) std(Yk u), such that ``|u| = |v| = 1``
+
+        max corr(Xk u, Yk v) * std(Xk u) std(Yk u), such that ``|u| = |v| = 1``
 
     Note that it maximizes both the correlations between the scores and the
     intra-block variances.
diff --git a/sklearn/datasets/descr/breast_cancer.rst b/sklearn/datasets/descr/breast_cancer.rst
index 518b2c6dff..a568db5330 100644
--- a/sklearn/datasets/descr/breast_cancer.rst
+++ b/sklearn/datasets/descr/breast_cancer.rst
@@ -18,51 +18,52 @@ Data Set Characteristics:
         - concave points (number of concave portions of the contour)
         - symmetry 
         - fractal dimension ("coastline approximation" - 1)
-		
-		The mean, standard error, and "worst" or largest (mean of the three
-		largest values) of these features were computed for each image,
-		resulting in 30 features.  For instance, field 3 is Mean Radius, field
-		13 is Radius SE, field 23 is Worst Radius.
-		
+        
+        The mean, standard error, and "worst" or largest (mean of the three
+        largest values) of these features were computed for each image,
+        resulting in 30 features.  For instance, field 3 is Mean Radius, field
+        13 is Radius SE, field 23 is Worst Radius.
+        
         - class:
                 - WDBC-Malignant
                 - WDBC-Benign
 
     :Summary Statistics:
-    ===================================== ====== ======
-										   Min    Max
-    ===================================== ====== ====== 
-    radius (mean):   					  6.981  28.11
-    texture (mean):    					  9.71   39.28
-    perimeter (mean):   				  43.79  188.5
-    area (mean):    					  143.5  2501.0
-	smoothness (mean):					  0.053  0.163
-	compactness (mean):					  0.019  0.345
-	concavity (mean):					  0.0    0.427
-	concave points (mean):				  0.0	 0.201
-	symmetry (mean): 					  0.106  0.304
-	fractal dimension (mean):			  0.05	 0.097
-    radius (standard error):   			  0.112  2.873
-    texture (standard error):    		  0.36	 4.885
-    perimeter (standard error):   		  0.757  21.98
-    area (standard error):				  6.802  542.2
-	smoothness (standard error):		  0.002	 0.031
-	compactness (standard error):		  0.002  0.135
-	concavity (standard error):			  0.0    0.396
-	concave points (standard error):	  0.0	 0.053
-	symmetry (standard error):			  0.008  0.079
-	fractal dimension (standard error):   0.001  0.03
-    radius (worst):   					  7.93	 36.04
-    texture (worst):    				  12.02  49.54
-    perimeter (worst):   				  50.41  251.2
-    area (worst):    					  185.2  4254.0
-	smoothness (worst):					  0.071  0.223
-	compactness (worst):				  0.027  1.058
-	concavity (worst):					  0.0    1.252
-	concave points (worst):				  0.0    0.291
-	symmetry (worst): 					  0.156  0.664
-	fractal dimension (worst):			  0.055	 0.208
-    ===================================== ====== ======
+
+    ===================================== ======= ========
+                                           Min     Max
+    ===================================== ======= ========
+    radius (mean):                         6.981   28.11
+    texture (mean):                        9.71    39.28
+    perimeter (mean):                      43.79   188.5
+    area (mean):                           143.5   2501.0
+    smoothness (mean):                     0.053   0.163
+    compactness (mean):                    0.019   0.345
+    concavity (mean):                      0.0     0.427
+    concave points (mean):                 0.0     0.201
+    symmetry (mean):                       0.106   0.304
+    fractal dimension (mean):              0.05    0.097
+    radius (standard error):               0.112   2.873
+    texture (standard error):              0.36    4.885
+    perimeter (standard error):            0.757   21.98
+    area (standard error):                 6.802   542.2
+    smoothness (standard error):           0.002   0.031
+    compactness (standard error):          0.002   0.135
+    concavity (standard error):            0.0     0.396
+    concave points (standard error):       0.0     0.053
+    symmetry (standard error):             0.008   0.079
+    fractal dimension (standard error):    0.001   0.03
+    radius (worst):                        7.93    36.04
+    texture (worst):                       12.02   49.54
+    perimeter (worst):                     50.41   251.2
+    area (worst):                          185.2   4254.0
+    smoothness (worst):                    0.071   0.223
+    compactness (worst):                   0.027   1.058
+    concavity (worst):                     0.0     1.252
+    concave points (worst):                0.0     0.291
+    symmetry (worst):                      0.156   0.664
+    fractal dimension (worst):             0.055   0.208
+    ===================================== ======= ========
 
     :Missing Attribute Values: None
 
@@ -107,11 +108,11 @@ References
 ----------
    - W.N. Street, W.H. Wolberg and O.L. Mangasarian. Nuclear feature extraction 
      for breast tumor diagnosis. IS&T/SPIE 1993 International Symposium on 
-	 Electronic Imaging: Science and Technology, volume 1905, pages 861-870, 
-	 San Jose, CA, 1993. 
+     Electronic Imaging: Science and Technology, volume 1905, pages 861-870, 
+     San Jose, CA, 1993. 
    - O.L. Mangasarian, W.N. Street and W.H. Wolberg. Breast cancer diagnosis and 
      prognosis via linear programming. Operations Research, 43(4), pages 570-577, 
-	 July-August 1995.
+     July-August 1995.
    - W.H. Wolberg, W.N. Street, and O.L. Mangasarian. Machine learning techniques
      to diagnose breast cancer from fine-needle aspirates. Cancer Letters 77 (1994) 
-	 163-171.
\ No newline at end of file
+     163-171.
diff --git a/sklearn/datasets/descr/diabetes.rst b/sklearn/datasets/descr/diabetes.rst
index 76b0132a2e..28706ac287 100644
--- a/sklearn/datasets/descr/diabetes.rst
+++ b/sklearn/datasets/descr/diabetes.rst
@@ -28,7 +28,7 @@ Data Set Characteristics:
     :S5:
     :S6:
 
-*Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).
+Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).
 
 Source URL:
 http://www4.stat.ncsu.edu/~boos/var.select/diabetes.html
diff --git a/sklearn/datasets/descr/digits.rst b/sklearn/datasets/descr/digits.rst
index 611363f69a..a30514474f 100644
--- a/sklearn/datasets/descr/digits.rst
+++ b/sklearn/datasets/descr/digits.rst
@@ -1,4 +1,5 @@
- Optical Recognition of Handwritten Digits Data Set
+Optical Recognition of Handwritten Digits Data Set
+===================================================
 
 Notes
 -----
diff --git a/sklearn/datasets/descr/iris.rst b/sklearn/datasets/descr/iris.rst
index 0e918f8b8c..ae05779ca6 100644
--- a/sklearn/datasets/descr/iris.rst
+++ b/sklearn/datasets/descr/iris.rst
@@ -15,6 +15,7 @@ Data Set Characteristics:
                 - Iris-Versicolour
                 - Iris-Virginica
     :Summary Statistics:
+
     ============== ==== ==== ======= ===== ====================
                     Min  Max   Mean    SD   Class Correlation
     ============== ==== ==== ======= ===== ====================
@@ -23,6 +24,7 @@ Data Set Characteristics:
     petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
     petal width:    0.1  2.5   1.20  0.76     0.9565  (high!)
     ============== ==== ==== ======= ===== ====================
+
     :Missing Attribute Values: None
     :Class Distribution: 33.3% for each of 3 classes.
     :Creator: R.A. Fisher
diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py
index a6172e5b03..53efca1122 100644
--- a/sklearn/linear_model/base.py
+++ b/sklearn/linear_model/base.py
@@ -394,7 +394,7 @@ class LinearRegression(LinearModel, RegressorMixin):
         self.n_jobs = n_jobs
 
     @property
-    @deprecated("residues_ is deprecated and will be removed in 0.19")
+    @deprecated("``residues_`` is deprecated and will be removed in 0.19")
     def residues_(self):
         """Get the residues of the fitted model."""
         return self._residues
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index 22aac39f44..54ea1ed4fc 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -233,7 +233,7 @@ class MinMaxScaler(BaseEstimator, TransformerMixin):
         Per feature maximum seen in the data
 
     data_range_ : ndarray, shape (n_features,)
-        Per feature range (data_max_ - data_min_) seen in the data
+        Per feature range ``(data_max_ - data_min_)`` seen in the data
     """
 
     def __init__(self, feature_range=(0, 1), copy=True):
@@ -242,13 +242,13 @@ class MinMaxScaler(BaseEstimator, TransformerMixin):
 
     @property
     @deprecated("Attribute data_range will be removed in "
-                "0.19. Use data_range_ instead")
+                "0.19. Use ``data_range_`` instead")
     def data_range(self):
         return self.data_range_
 
     @property
     @deprecated("Attribute data_min will be removed in "
-                "0.19. Use data_min_ instead")
+                "0.19. Use ``data_min_`` instead")
     def data_min(self):
         return self.data_min_
 
@@ -290,7 +290,7 @@ class MinMaxScaler(BaseEstimator, TransformerMixin):
 
         Parameters
         ----------
-        X : array-like, shape [n_samples_, n_features]
+        X : array-like, shape [n_samples, n_features]
             The data used to compute the mean and standard deviation
             used for later scaling along the features axis.
 
@@ -504,7 +504,7 @@ class StandardScaler(BaseEstimator, TransformerMixin):
         self.copy = copy
 
     @property
-    @deprecated("Attribute std_ will be removed in 0.19. Use scale_ instead")
+    @deprecated("Attribute ``std_`` will be removed in 0.19. Use ``scale_`` instead")
     def std_(self):
         return self.scale_
 
@@ -551,7 +551,7 @@ class StandardScaler(BaseEstimator, TransformerMixin):
 
         Parameters
         ----------
-        X : {array-like, sparse matrix}, shape [n_samples_, n_features]
+        X : {array-like, sparse matrix}, shape [n_samples, n_features]
             The data used to compute the mean and standard deviation
             used for later scaling along the features axis.
 
@@ -742,7 +742,7 @@ class MaxAbsScaler(BaseEstimator, TransformerMixin):
 
         Parameters
         ----------
-        X : {array-like, sparse matrix}, shape [n_samples_, n_features]
+        X : {array-like, sparse matrix}, shape [n_samples, n_features]
             The data used to compute the mean and standard deviation
             used for later scaling along the features axis.
 
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index b32aa6af9d..7a91d46aea 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -970,13 +970,13 @@ def test_deprecation_minmax_scaler():
     scaler = MinMaxScaler().fit(X)
 
     depr_message = ("Attribute data_range will be removed in "
-                    "0.19. Use data_range_ instead")
+                    "0.19. Use ``data_range_`` instead")
     data_range = assert_warns_message(DeprecationWarning, depr_message,
                                       getattr, scaler, "data_range")
     assert_array_equal(data_range, scaler.data_range)
 
     depr_message = ("Attribute data_min will be removed in "
-                    "0.19. Use data_min_ instead")
+                    "0.19. Use ``data_min_`` instead")
     data_min = assert_warns_message(DeprecationWarning, depr_message,
                                     getattr, scaler, "data_min")
     assert_array_equal(data_min, scaler.data_min)
@@ -1336,8 +1336,8 @@ def test_deprecation_standard_scaler():
     rng = np.random.RandomState(0)
     X = rng.random_sample((5, 4))
     scaler = StandardScaler().fit(X)
-    depr_message = ("Function std_ is deprecated; Attribute std_ will be "
-                    "removed in 0.19. Use scale_ instead")
+    depr_message = ("Function std_ is deprecated; Attribute ``std_`` will be "
+                    "removed in 0.19. Use ``scale_`` instead")
     std_ = assert_warns_message(DeprecationWarning, depr_message, getattr,
                                 scaler, "std_")
     assert_array_equal(std_, scaler.scale_)
-- 
GitLab