From 2df6bfad588813a19dddc72979f6f5e8322fa15a Mon Sep 17 00:00:00 2001 From: Andreas Mueller <amueller@nyu.edu> Date: Tue, 3 Nov 2015 17:43:46 -0500 Subject: [PATCH] DOC some fixes to the doc build. --- doc/datasets/rcv1.rst | 4 +- doc/modules/decomposition.rst | 1 + doc/modules/feature_selection.rst | 2 + doc/modules/multiclass.rst | 8 +- doc/whats_new.rst | 7 +- examples/applications/face_recognition.py | 4 +- sklearn/cross_decomposition/pls_.py | 23 +++--- sklearn/datasets/descr/breast_cancer.rst | 89 ++++++++++++----------- sklearn/datasets/descr/diabetes.rst | 2 +- sklearn/datasets/descr/digits.rst | 3 +- sklearn/datasets/descr/iris.rst | 2 + sklearn/linear_model/base.py | 2 +- sklearn/preprocessing/data.py | 14 ++-- sklearn/preprocessing/tests/test_data.py | 8 +- 14 files changed, 92 insertions(+), 77 deletions(-) diff --git a/doc/datasets/rcv1.rst b/doc/datasets/rcv1.rst index 486eeee905..ded38584ce 100644 --- a/doc/datasets/rcv1.rst +++ b/doc/datasets/rcv1.rst @@ -41,10 +41,10 @@ There are 103 topics, each represented by a string. Their corpus frequencies spa >>> rcv1.target_names[:3].tolist() # doctest: +SKIP ['E11', 'ECAT', 'M11'] -The dataset will be downloaded from the `dataset's homepage`_ if necessary. +The dataset will be downloaded from the `rcv1 homepage`_ if necessary. The compressed size is about 656 MB. -.. _dataset's homepage: http://jmlr.csail.mit.edu/papers/volume5/lewis04a/ +.. _rcv1 homepage: http://jmlr.csail.mit.edu/papers/volume5/lewis04a/ .. topic:: References diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst index 91d003ce70..f10e105664 100644 --- a/doc/modules/decomposition.rst +++ b/doc/modules/decomposition.rst @@ -776,6 +776,7 @@ a corpus with :math:`D` documents and :math:`K` topics: 2. For each document :math:`d`, draw :math:`\theta_d \sim Dirichlet(\alpha), \: d=1...D` 3. For each word :math:`i` in document :math:`d`: + a. Draw a topic index :math:`z_{di} \sim Multinomial(\theta_d)` b. Draw the observed word :math:`w_{ij} \sim Multinomial(beta_{z_{di}}.)` diff --git a/doc/modules/feature_selection.rst b/doc/modules/feature_selection.rst index 88ff7d56d6..60e4d0a38f 100644 --- a/doc/modules/feature_selection.rst +++ b/doc/modules/feature_selection.rst @@ -153,6 +153,8 @@ For examples on how it is to be used refer to the sections below. most important features from the Boston dataset without knowing the threshold beforehand. +.. _l1_feature_selection: + L1-based feature selection -------------------------- diff --git a/doc/modules/multiclass.rst b/doc/modules/multiclass.rst index 49ea0d588e..9db951f4c4 100644 --- a/doc/modules/multiclass.rst +++ b/doc/modules/multiclass.rst @@ -215,7 +215,7 @@ code book. The code size is the dimensionality of the aforementioned space. Intuitively, each class should be represented by a code as unique as possible and a good code book should be designed to optimize classification accuracy. In this implementation, we simply use a randomly-generated code -book as advocated in [2]_ although more elaborate methods may be added in the +book as advocated in [3]_ although more elaborate methods may be added in the future. At fitting time, one binary classifier per bit in the code book is fitted. @@ -262,16 +262,16 @@ Below is an example of multiclass learning using Output-Codes:: .. topic:: References: - .. [1] "Solving multiclass learning problems via error-correcting output codes", + .. [2] "Solving multiclass learning problems via error-correcting output codes", Dietterich T., Bakiri G., Journal of Artificial Intelligence Research 2, 1995. - .. [2] "The error coding method and PICTs", + .. [3] "The error coding method and PICTs", James G., Hastie T., Journal of Computational and Graphical statistics 7, 1998. - .. [3] "The Elements of Statistical Learning", + .. [4] "The Elements of Statistical Learning", Hastie T., Tibshirani R., Friedman J., page 606 (second-edition) 2008. diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 88b8d47512..b5cf4a93bb 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -281,7 +281,7 @@ Bug fixes - Fixed bug in :class:`cross_decomposition.PLS` that yielded unstable and platform dependent output, and failed on `fit_transform`. - By `Arthur Mensch`_. + By `Arthur Mensch`_. API changes summary ------------------- @@ -3313,8 +3313,8 @@ Changelog - New :ref:`gaussian_process` module by Vincent Dubourg. This module also has great documentation and some very neat examples. See - :ref:`example_gaussian_process_plot_gp_regression.py` or - :ref:`example_gaussian_process_plot_gp_probabilistic_classification_after_regression.py` + example_gaussian_process_plot_gp_regression.py or + example_gaussian_process_plot_gp_probabilistic_classification_after_regression.py for a taste of what can be done. - It is now possible to use liblinear’s Multi-class SVC (option @@ -3774,3 +3774,4 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson. .. _Graham Clenaghan: https://github.com/gclenaghan .. _Giorgio Patrini: https://github.com/giorgiop .. _Elvis Dohmatob: https://github.com/dohmatob +.. _yelite: https://github.com/yelite diff --git a/examples/applications/face_recognition.py b/examples/applications/face_recognition.py index be466e9532..1385bdd1c7 100644 --- a/examples/applications/face_recognition.py +++ b/examples/applications/face_recognition.py @@ -12,8 +12,9 @@ The dataset used in this example is a preprocessed excerpt of the Expected results for the top 5 most represented people in the dataset:: +================== ============ ======= ========== ======= precision recall f1-score support - +================== ============ ======= ========== ======= Ariel Sharon 0.67 0.92 0.77 13 Colin Powell 0.75 0.78 0.76 60 Donald Rumsfeld 0.78 0.67 0.72 27 @@ -23,6 +24,7 @@ Gerhard Schroeder 0.76 0.76 0.76 25 Tony Blair 0.81 0.69 0.75 36 avg / total 0.80 0.80 0.80 322 +================== ============ ======= ========== ======= """ from __future__ import print_function diff --git a/sklearn/cross_decomposition/pls_.py b/sklearn/cross_decomposition/pls_.py index 46961c90b3..a6f27f0810 100644 --- a/sklearn/cross_decomposition/pls_.py +++ b/sklearn/cross_decomposition/pls_.py @@ -521,7 +521,8 @@ class PLSRegression(_PLS): Notes ----- - Matrices : + Matrices:: + T: x_scores_ U: y_scores_ W: x_weights_ @@ -529,16 +530,17 @@ class PLSRegression(_PLS): P: x_loadings_ Q: y_loadings__ - Are computed such that: + Are computed such that:: + X = T P.T + Err and Y = U Q.T + Err T[:, k] = Xk W[:, k] for k in range(n_components) U[:, k] = Yk C[:, k] for k in range(n_components) x_rotations_ = W (P.T W)^(-1) y_rotations_ = C (Q.T C)^(-1) + where Xk and Yk are residual matrices at iteration k. - Slides explaining PLS - :ref:http://www.eigenvector.com/Docs/Wise_pls_properties.pdf + `Slides explaining PLS <http://www.eigenvector.com/Docs/Wise_pls_properties.pdf>` For each component k, find weights u, v that optimizes: ``max corr(Xk u, Yk v) * std(Xk u) std(Yk u)``, such that ``|u| = 1`` @@ -655,7 +657,8 @@ class PLSCanonical(_PLS): Notes ----- - Matrices : + Matrices:: + T: x_scores_ U: y_scores_ W: x_weights_ @@ -663,19 +666,21 @@ class PLSCanonical(_PLS): P: x_loadings_ Q: y_loadings__ - Are computed such that: + Are computed such that:: + X = T P.T + Err and Y = U Q.T + Err T[:, k] = Xk W[:, k] for k in range(n_components) U[:, k] = Yk C[:, k] for k in range(n_components) x_rotations_ = W (P.T W)^(-1) y_rotations_ = C (Q.T C)^(-1) + where Xk and Yk are residual matrices at iteration k. - Slides explaining PLS - :ref:http://www.eigenvector.com/Docs/Wise_pls_properties.pdf + `Slides explaining PLS <http://www.eigenvector.com/Docs/Wise_pls_properties.pdf>` For each component k, find weights u, v that optimize:: - max corr(Xk u, Yk v) * std(Xk u) std(Yk u), such that ``|u| = |v| = 1`` + + max corr(Xk u, Yk v) * std(Xk u) std(Yk u), such that ``|u| = |v| = 1`` Note that it maximizes both the correlations between the scores and the intra-block variances. diff --git a/sklearn/datasets/descr/breast_cancer.rst b/sklearn/datasets/descr/breast_cancer.rst index 518b2c6dff..a568db5330 100644 --- a/sklearn/datasets/descr/breast_cancer.rst +++ b/sklearn/datasets/descr/breast_cancer.rst @@ -18,51 +18,52 @@ Data Set Characteristics: - concave points (number of concave portions of the contour) - symmetry - fractal dimension ("coastline approximation" - 1) - - The mean, standard error, and "worst" or largest (mean of the three - largest values) of these features were computed for each image, - resulting in 30 features. For instance, field 3 is Mean Radius, field - 13 is Radius SE, field 23 is Worst Radius. - + + The mean, standard error, and "worst" or largest (mean of the three + largest values) of these features were computed for each image, + resulting in 30 features. For instance, field 3 is Mean Radius, field + 13 is Radius SE, field 23 is Worst Radius. + - class: - WDBC-Malignant - WDBC-Benign :Summary Statistics: - ===================================== ====== ====== - Min Max - ===================================== ====== ====== - radius (mean): 6.981 28.11 - texture (mean): 9.71 39.28 - perimeter (mean): 43.79 188.5 - area (mean): 143.5 2501.0 - smoothness (mean): 0.053 0.163 - compactness (mean): 0.019 0.345 - concavity (mean): 0.0 0.427 - concave points (mean): 0.0 0.201 - symmetry (mean): 0.106 0.304 - fractal dimension (mean): 0.05 0.097 - radius (standard error): 0.112 2.873 - texture (standard error): 0.36 4.885 - perimeter (standard error): 0.757 21.98 - area (standard error): 6.802 542.2 - smoothness (standard error): 0.002 0.031 - compactness (standard error): 0.002 0.135 - concavity (standard error): 0.0 0.396 - concave points (standard error): 0.0 0.053 - symmetry (standard error): 0.008 0.079 - fractal dimension (standard error): 0.001 0.03 - radius (worst): 7.93 36.04 - texture (worst): 12.02 49.54 - perimeter (worst): 50.41 251.2 - area (worst): 185.2 4254.0 - smoothness (worst): 0.071 0.223 - compactness (worst): 0.027 1.058 - concavity (worst): 0.0 1.252 - concave points (worst): 0.0 0.291 - symmetry (worst): 0.156 0.664 - fractal dimension (worst): 0.055 0.208 - ===================================== ====== ====== + + ===================================== ======= ======== + Min Max + ===================================== ======= ======== + radius (mean): 6.981 28.11 + texture (mean): 9.71 39.28 + perimeter (mean): 43.79 188.5 + area (mean): 143.5 2501.0 + smoothness (mean): 0.053 0.163 + compactness (mean): 0.019 0.345 + concavity (mean): 0.0 0.427 + concave points (mean): 0.0 0.201 + symmetry (mean): 0.106 0.304 + fractal dimension (mean): 0.05 0.097 + radius (standard error): 0.112 2.873 + texture (standard error): 0.36 4.885 + perimeter (standard error): 0.757 21.98 + area (standard error): 6.802 542.2 + smoothness (standard error): 0.002 0.031 + compactness (standard error): 0.002 0.135 + concavity (standard error): 0.0 0.396 + concave points (standard error): 0.0 0.053 + symmetry (standard error): 0.008 0.079 + fractal dimension (standard error): 0.001 0.03 + radius (worst): 7.93 36.04 + texture (worst): 12.02 49.54 + perimeter (worst): 50.41 251.2 + area (worst): 185.2 4254.0 + smoothness (worst): 0.071 0.223 + compactness (worst): 0.027 1.058 + concavity (worst): 0.0 1.252 + concave points (worst): 0.0 0.291 + symmetry (worst): 0.156 0.664 + fractal dimension (worst): 0.055 0.208 + ===================================== ======= ======== :Missing Attribute Values: None @@ -107,11 +108,11 @@ References ---------- - W.N. Street, W.H. Wolberg and O.L. Mangasarian. Nuclear feature extraction for breast tumor diagnosis. IS&T/SPIE 1993 International Symposium on - Electronic Imaging: Science and Technology, volume 1905, pages 861-870, - San Jose, CA, 1993. + Electronic Imaging: Science and Technology, volume 1905, pages 861-870, + San Jose, CA, 1993. - O.L. Mangasarian, W.N. Street and W.H. Wolberg. Breast cancer diagnosis and prognosis via linear programming. Operations Research, 43(4), pages 570-577, - July-August 1995. + July-August 1995. - W.H. Wolberg, W.N. Street, and O.L. Mangasarian. Machine learning techniques to diagnose breast cancer from fine-needle aspirates. Cancer Letters 77 (1994) - 163-171. \ No newline at end of file + 163-171. diff --git a/sklearn/datasets/descr/diabetes.rst b/sklearn/datasets/descr/diabetes.rst index 76b0132a2e..28706ac287 100644 --- a/sklearn/datasets/descr/diabetes.rst +++ b/sklearn/datasets/descr/diabetes.rst @@ -28,7 +28,7 @@ Data Set Characteristics: :S5: :S6: -*Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1). +Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1). Source URL: http://www4.stat.ncsu.edu/~boos/var.select/diabetes.html diff --git a/sklearn/datasets/descr/digits.rst b/sklearn/datasets/descr/digits.rst index 611363f69a..a30514474f 100644 --- a/sklearn/datasets/descr/digits.rst +++ b/sklearn/datasets/descr/digits.rst @@ -1,4 +1,5 @@ - Optical Recognition of Handwritten Digits Data Set +Optical Recognition of Handwritten Digits Data Set +=================================================== Notes ----- diff --git a/sklearn/datasets/descr/iris.rst b/sklearn/datasets/descr/iris.rst index 0e918f8b8c..ae05779ca6 100644 --- a/sklearn/datasets/descr/iris.rst +++ b/sklearn/datasets/descr/iris.rst @@ -15,6 +15,7 @@ Data Set Characteristics: - Iris-Versicolour - Iris-Virginica :Summary Statistics: + ============== ==== ==== ======= ===== ==================== Min Max Mean SD Class Correlation ============== ==== ==== ======= ===== ==================== @@ -23,6 +24,7 @@ Data Set Characteristics: petal length: 1.0 6.9 3.76 1.76 0.9490 (high!) petal width: 0.1 2.5 1.20 0.76 0.9565 (high!) ============== ==== ==== ======= ===== ==================== + :Missing Attribute Values: None :Class Distribution: 33.3% for each of 3 classes. :Creator: R.A. Fisher diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py index a6172e5b03..53efca1122 100644 --- a/sklearn/linear_model/base.py +++ b/sklearn/linear_model/base.py @@ -394,7 +394,7 @@ class LinearRegression(LinearModel, RegressorMixin): self.n_jobs = n_jobs @property - @deprecated("residues_ is deprecated and will be removed in 0.19") + @deprecated("``residues_`` is deprecated and will be removed in 0.19") def residues_(self): """Get the residues of the fitted model.""" return self._residues diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index 22aac39f44..54ea1ed4fc 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -233,7 +233,7 @@ class MinMaxScaler(BaseEstimator, TransformerMixin): Per feature maximum seen in the data data_range_ : ndarray, shape (n_features,) - Per feature range (data_max_ - data_min_) seen in the data + Per feature range ``(data_max_ - data_min_)`` seen in the data """ def __init__(self, feature_range=(0, 1), copy=True): @@ -242,13 +242,13 @@ class MinMaxScaler(BaseEstimator, TransformerMixin): @property @deprecated("Attribute data_range will be removed in " - "0.19. Use data_range_ instead") + "0.19. Use ``data_range_`` instead") def data_range(self): return self.data_range_ @property @deprecated("Attribute data_min will be removed in " - "0.19. Use data_min_ instead") + "0.19. Use ``data_min_`` instead") def data_min(self): return self.data_min_ @@ -290,7 +290,7 @@ class MinMaxScaler(BaseEstimator, TransformerMixin): Parameters ---------- - X : array-like, shape [n_samples_, n_features] + X : array-like, shape [n_samples, n_features] The data used to compute the mean and standard deviation used for later scaling along the features axis. @@ -504,7 +504,7 @@ class StandardScaler(BaseEstimator, TransformerMixin): self.copy = copy @property - @deprecated("Attribute std_ will be removed in 0.19. Use scale_ instead") + @deprecated("Attribute ``std_`` will be removed in 0.19. Use ``scale_`` instead") def std_(self): return self.scale_ @@ -551,7 +551,7 @@ class StandardScaler(BaseEstimator, TransformerMixin): Parameters ---------- - X : {array-like, sparse matrix}, shape [n_samples_, n_features] + X : {array-like, sparse matrix}, shape [n_samples, n_features] The data used to compute the mean and standard deviation used for later scaling along the features axis. @@ -742,7 +742,7 @@ class MaxAbsScaler(BaseEstimator, TransformerMixin): Parameters ---------- - X : {array-like, sparse matrix}, shape [n_samples_, n_features] + X : {array-like, sparse matrix}, shape [n_samples, n_features] The data used to compute the mean and standard deviation used for later scaling along the features axis. diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index b32aa6af9d..7a91d46aea 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -970,13 +970,13 @@ def test_deprecation_minmax_scaler(): scaler = MinMaxScaler().fit(X) depr_message = ("Attribute data_range will be removed in " - "0.19. Use data_range_ instead") + "0.19. Use ``data_range_`` instead") data_range = assert_warns_message(DeprecationWarning, depr_message, getattr, scaler, "data_range") assert_array_equal(data_range, scaler.data_range) depr_message = ("Attribute data_min will be removed in " - "0.19. Use data_min_ instead") + "0.19. Use ``data_min_`` instead") data_min = assert_warns_message(DeprecationWarning, depr_message, getattr, scaler, "data_min") assert_array_equal(data_min, scaler.data_min) @@ -1336,8 +1336,8 @@ def test_deprecation_standard_scaler(): rng = np.random.RandomState(0) X = rng.random_sample((5, 4)) scaler = StandardScaler().fit(X) - depr_message = ("Function std_ is deprecated; Attribute std_ will be " - "removed in 0.19. Use scale_ instead") + depr_message = ("Function std_ is deprecated; Attribute ``std_`` will be " + "removed in 0.19. Use ``scale_`` instead") std_ = assert_warns_message(DeprecationWarning, depr_message, getattr, scaler, "std_") assert_array_equal(std_, scaler.scale_) -- GitLab