From 38030a00a7f72a3528bd17f2345f34d1344d6d45 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Tue, 27 Sep 2016 16:19:47 -0400
Subject: [PATCH] [MRG + 1] More versionadded everywhere! (#7403)

* insert versionadded versionchanged directives in docstrings for 0.18

indicate where exception classes were moved from

* moved versionadded in the proper places
---
 sklearn/datasets/kddcup99.py                  |  3 +
 sklearn/decomposition/kernel_pca.py           |  6 ++
 sklearn/decomposition/pca.py                  |  2 +
 sklearn/ensemble/forest.py                    | 36 +++++++++++
 sklearn/ensemble/gradient_boosting.py         | 11 ++++
 sklearn/ensemble/iforest.py                   |  3 +
 sklearn/exceptions.py                         | 35 ++++++++++-
 sklearn/gaussian_process/gpc.py               |  5 ++
 sklearn/gaussian_process/gpr.py               |  3 +
 sklearn/gaussian_process/kernels.py           | 60 +++++++++++++++++--
 sklearn/linear_model/base.py                  |  2 +
 sklearn/metrics/classification.py             |  2 +
 sklearn/metrics/cluster/supervised.py         |  2 +
 .../neural_network/multilayer_perceptron.py   |  8 +++
 sklearn/tree/tree.py                          | 14 +++++
 15 files changed, 184 insertions(+), 8 deletions(-)

diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py
index 06b97da950..824809a80e 100644
--- a/sklearn/datasets/kddcup99.py
+++ b/sklearn/datasets/kddcup99.py
@@ -116,6 +116,8 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None,
     Targets               str, 'normal.' or name of the anomaly type
     ================      ==========================================
 
+    .. versionadded:: 0.18
+
     Parameters
     ----------
     subset : None, 'SA', 'SF', 'http', 'smtp'
@@ -156,6 +158,7 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None,
     .. [2] A Geometric Framework for Unsupervised Anomaly Detection: Detecting
            Intrusions in Unlabeled Data (2002) by Eleazar Eskin, Andrew Arnold,
            Michael Prerau, Leonid Portnoy, Sal Stolfo
+
     """
     kddcup99 = _fetch_brute_kddcup99(shuffle=shuffle, percent10=percent10,
                                      download_if_missing=download_if_missing)
diff --git a/sklearn/decomposition/kernel_pca.py b/sklearn/decomposition/kernel_pca.py
index 1b31562b46..49845e3268 100644
--- a/sklearn/decomposition/kernel_pca.py
+++ b/sklearn/decomposition/kernel_pca.py
@@ -78,15 +78,21 @@ class KernelPCA(BaseEstimator, TransformerMixin):
         A pseudo random number generator used for the initialization of the
         residuals when eigen_solver == 'arpack'.
 
+        .. versionadded:: 0.18
+
     n_jobs : int, default=1
         The number of parallel jobs to run.
         If `-1`, then the number of jobs is set to the number of CPU cores.
 
+        .. versionadded:: 0.18
+
     copy_X : boolean, default=True
         If True, input X is copied and stored by the model in the `X_fit_`
         attribute. If no further changes will be done to X, setting
         `copy_X=False` saves memory by storing a reference.
 
+        .. versionadded:: 0.18
+
     Attributes
     ----------
     lambdas_ : array, (n_components,)
diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index 2a6f0dd013..4aae66fa5d 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -194,6 +194,8 @@ class PCA(_BasePCA):
     explained_variance_ : array, [n_components]
         The amount of variance explained by each of the selected components.
 
+        .. versionadded:: 0.18
+
     explained_variance_ratio_ : array, [n_components]
         Percentage of variance explained by each of the selected components.
 
diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index 424b8266fe..ed0a5e0afe 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -73,6 +73,7 @@ __all__ = ["RandomForestClassifier",
 
 MAX_INT = np.iinfo(np.int32).max
 
+
 def _generate_sample_indices(random_state, n_samples):
     """Private function used to _parallel_build_trees function."""
     random_instance = check_random_state(random_state)
@@ -80,6 +81,7 @@ def _generate_sample_indices(random_state, n_samples):
 
     return sample_indices
 
+
 def _generate_unsampled_indices(random_state, n_samples):
     """Private function used to forest._set_oob_score function."""
     sample_indices = _generate_sample_indices(random_state, n_samples)
@@ -90,6 +92,7 @@ def _generate_unsampled_indices(random_state, n_samples):
 
     return unsampled_indices
 
+
 def _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees,
                           verbose=0, class_weight=None):
     """Private function used to fit a single tree in parallel."""
@@ -181,6 +184,8 @@ class BaseForest(six.with_metaclass(ABCMeta, BaseEnsemble,
     def decision_path(self, X):
         """Return the decision path in the forest
 
+        .. versionadded:: 0.18
+
         Parameters
         ----------
         X : array-like or sparse matrix, shape = [n_samples, n_features]
@@ -197,6 +202,7 @@ class BaseForest(six.with_metaclass(ABCMeta, BaseEnsemble,
         n_nodes_ptr : array of size (n_estimators + 1, )
             The columns from indicator[n_nodes_ptr[i]:n_nodes_ptr[i+1]]
             gives the indicator value for the i-th estimator.
+
         """
         X = self._validate_X_predict(X)
         indicators = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
@@ -786,6 +792,9 @@ class RandomForestClassifier(ForestClassifier):
           `ceil(min_samples_split * n_samples)` are the minimum
           number of samples for each split.
 
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
     min_samples_leaf : int, float, optional (default=1)
         The minimum number of samples required to be at a leaf node:
 
@@ -794,6 +803,9 @@ class RandomForestClassifier(ForestClassifier):
           `ceil(min_samples_leaf * n_samples)` are the minimum
           number of samples for each node.
 
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
     min_weight_fraction_leaf : float, optional (default=0.)
         The minimum weighted fraction of the input samples required to be at a
         leaf node.
@@ -991,6 +1003,9 @@ class RandomForestRegressor(ForestRegressor):
           `ceil(min_samples_split * n_samples)` are the minimum
           number of samples for each split.
 
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
     min_samples_leaf : int, float, optional (default=1)
         The minimum number of samples required to be at a leaf node:
 
@@ -999,6 +1014,9 @@ class RandomForestRegressor(ForestRegressor):
           `ceil(min_samples_leaf * n_samples)` are the minimum
           number of samples for each node.
 
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
     min_weight_fraction_leaf : float, optional (default=0.)
         The minimum weighted fraction of the input samples required to be at a
         leaf node.
@@ -1156,6 +1174,9 @@ class ExtraTreesClassifier(ForestClassifier):
           `ceil(min_samples_split * n_samples)` are the minimum
           number of samples for each split.
 
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
     min_samples_leaf : int, float, optional (default=1)
         The minimum number of samples required to be at a leaf node:
 
@@ -1164,6 +1185,9 @@ class ExtraTreesClassifier(ForestClassifier):
           `ceil(min_samples_leaf * n_samples)` are the minimum
           number of samples for each node.
 
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
     min_weight_fraction_leaf : float, optional (default=0.)
         The minimum weighted fraction of the input samples required to be at a
         leaf node.
@@ -1360,6 +1384,9 @@ class ExtraTreesRegressor(ForestRegressor):
           `ceil(min_samples_split * n_samples)` are the minimum
           number of samples for each split.
 
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
     min_samples_leaf : int, float, optional (default=1)
         The minimum number of samples required to be at a leaf node:
 
@@ -1368,6 +1395,9 @@ class ExtraTreesRegressor(ForestRegressor):
           `ceil(min_samples_leaf * n_samples)` are the minimum
           number of samples for each node.
 
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
     min_weight_fraction_leaf : float, optional (default=0.)
         The minimum weighted fraction of the input samples required to be at a
         leaf node.
@@ -1511,6 +1541,9 @@ class RandomTreesEmbedding(BaseForest):
           `ceil(min_samples_split * n_samples)` is the minimum
           number of samples for each split.
 
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
     min_samples_leaf : int, float, optional (default=1)
         The minimum number of samples required to be at a leaf node:
 
@@ -1519,6 +1552,9 @@ class RandomTreesEmbedding(BaseForest):
           `ceil(min_samples_leaf * n_samples)` is the minimum
           number of samples for each node.
 
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
     min_weight_fraction_leaf : float, optional (default=0.)
         The minimum weighted fraction of the input samples required to be at a
         leaf node.
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index 4ea8ef8e4e..edb72c7f05 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -1315,6 +1315,9 @@ class GradientBoostingClassifier(BaseGradientBoosting, ClassifierMixin):
           `ceil(min_samples_split * n_samples)` are the minimum
           number of samples for each split.
 
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
     min_samples_leaf : int, float, optional (default=1)
         The minimum number of samples required to be at a leaf node:
 
@@ -1323,6 +1326,8 @@ class GradientBoostingClassifier(BaseGradientBoosting, ClassifierMixin):
           `ceil(min_samples_leaf * n_samples)` are the minimum
           number of samples for each node.
 
+        .. versionchanged:: 0.18
+           Added float values for percentages.
 
     min_weight_fraction_leaf : float, optional (default=0.)
         The minimum weighted fraction of the input samples required to be at a
@@ -1678,6 +1683,9 @@ class GradientBoostingRegressor(BaseGradientBoosting, RegressorMixin):
           `ceil(min_samples_split * n_samples)` are the minimum
           number of samples for each split.
 
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
     min_samples_leaf : int, float, optional (default=1)
         The minimum number of samples required to be at a leaf node:
 
@@ -1686,6 +1694,9 @@ class GradientBoostingRegressor(BaseGradientBoosting, RegressorMixin):
           `ceil(min_samples_leaf * n_samples)` are the minimum
           number of samples for each node.
 
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
     min_weight_fraction_leaf : float, optional (default=0.)
         The minimum weighted fraction of the input samples required to be at a
         leaf node.
diff --git a/sklearn/ensemble/iforest.py b/sklearn/ensemble/iforest.py
index c96622c95f..de00dc8958 100644
--- a/sklearn/ensemble/iforest.py
+++ b/sklearn/ensemble/iforest.py
@@ -44,6 +44,8 @@ class IsolationForest(BaseBagging):
 
     Read more in the :ref:`User Guide <isolation_forest>`.
 
+    .. versionadded:: 0.18
+
     Parameters
     ----------
     n_estimators : int, optional (default=100)
@@ -106,6 +108,7 @@ class IsolationForest(BaseBagging):
     .. [2] Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. "Isolation-based
            anomaly detection." ACM Transactions on Knowledge Discovery from
            Data (TKDD) 6.1 (2012): 3.
+
     """
 
     def __init__(self,
diff --git a/sklearn/exceptions.py b/sklearn/exceptions.py
index c830ef8116..70dda2a76b 100644
--- a/sklearn/exceptions.py
+++ b/sklearn/exceptions.py
@@ -30,15 +30,26 @@ class NotFittedError(ValueError, AttributeError):
     ...     print(repr(e))
     ...                        # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
     NotFittedError('This LinearSVC instance is not fitted yet',)
+
+    .. versionchanged:: 0.18
+       Moved from sklearn.utils.validation.
     """
 
 
 class ChangedBehaviorWarning(UserWarning):
-    """Warning class used to notify the user of any change in the behavior."""
+    """Warning class used to notify the user of any change in the behavior.
+
+    .. versionchanged:: 0.18
+       Moved from sklearn.base.
+    """
 
 
 class ConvergenceWarning(UserWarning):
-    """Custom warning to capture convergence problems"""
+    """Custom warning to capture convergence problems
+
+    .. versionchanged:: 0.18
+       Moved from sklearn.utils.
+    """
 
 
 class DataConversionWarning(UserWarning):
@@ -53,6 +64,9 @@ class DataConversionWarning(UserWarning):
         - requests a non-copying operation, but a copy is required to meet the
           implementation's data-type expectations;
         - passes an input whose shape can be interpreted ambiguously.
+
+    .. versionchanged:: 0.18
+       Moved from sklearn.utils.validation.
     """
 
 
@@ -64,6 +78,9 @@ class DataDimensionalityWarning(UserWarning):
     projection space, is higher than the number of features, which quantifies
     the dimensionality of the original source space, to imply that the
     dimensionality of the problem will not be reduced.
+
+    .. versionchanged:: 0.18
+       Moved from sklearn.utils.
     """
 
 
@@ -73,6 +90,8 @@ class EfficiencyWarning(UserWarning):
     This warning notifies the user that the efficiency may not be optimal due
     to some reason which may be included as a part of the warning message.
     This may be subclassed into a more specific Warning class.
+
+    .. versionadded:: 0.18
     """
 
 
@@ -102,6 +121,9 @@ class FitFailedWarning(RuntimeWarning):
     FitFailedWarning("Classifier fit failed. The score on this train-test
     partition for these parameters will be set to 0.000000. Details:
     \\nValueError('Penalty term must be positive; got (C=-2)',)",)
+
+    .. versionchanged:: 0.18
+       Moved from sklearn.cross_validation.
     """
 
 
@@ -110,8 +132,15 @@ class NonBLASDotWarning(EfficiencyWarning):
 
     This warning is used to notify the user that BLAS was not used for dot
     operation and hence the efficiency may be affected.
+
+    .. versionchanged:: 0.18
+       Moved from sklearn.utils.validation, extends EfficiencyWarning.
     """
 
 
 class UndefinedMetricWarning(UserWarning):
-    """Warning used when the metric is invalid"""
+    """Warning used when the metric is invalid
+
+    .. versionchanged:: 0.18
+       Moved from sklearn.base.
+    """
diff --git a/sklearn/gaussian_process/gpc.py b/sklearn/gaussian_process/gpc.py
index 3f637393d3..f5cec7bd89 100644
--- a/sklearn/gaussian_process/gpc.py
+++ b/sklearn/gaussian_process/gpc.py
@@ -45,6 +45,8 @@ class _BinaryGaussianProcessClassifierLaplace(BaseEstimator):
     Currently, the implementation is restricted to using the logistic link
     function.
 
+    .. versionadded:: 0.18
+
     Parameters
     ----------
     kernel : kernel object
@@ -138,6 +140,7 @@ class _BinaryGaussianProcessClassifierLaplace(BaseEstimator):
 
     log_marginal_likelihood_value_: float
         The log-marginal-likelihood of ``self.kernel_.theta``
+
     """
     def __init__(self, kernel=None, optimizer="fmin_l_bfgs_b",
                  n_restarts_optimizer=0, max_iter_predict=100,
@@ -546,6 +549,8 @@ class GaussianProcessClassifier(BaseEstimator, ClassifierMixin):
 
     n_classes_ : int
         The number of classes in the training data
+
+    .. versionadded:: 0.18
     """
     def __init__(self, kernel=None, optimizer="fmin_l_bfgs_b",
                  n_restarts_optimizer=0, max_iter_predict=100,
diff --git a/sklearn/gaussian_process/gpr.py b/sklearn/gaussian_process/gpr.py
index 24ff1b058a..4cf4cb07a7 100644
--- a/sklearn/gaussian_process/gpr.py
+++ b/sklearn/gaussian_process/gpr.py
@@ -35,6 +35,8 @@ class GaussianProcessRegressor(BaseEstimator, RegressorMixin):
 
     Read more in the :ref:`User Guide <gaussian_process>`.
 
+    .. versionadded:: 0.18
+
     Parameters
     ----------
     kernel : kernel object
@@ -125,6 +127,7 @@ class GaussianProcessRegressor(BaseEstimator, RegressorMixin):
 
     log_marginal_likelihood_value_: float
         The log-marginal-likelihood of ``self.kernel_.theta``
+
     """
     def __init__(self, kernel=None, alpha=1e-10,
                  optimizer="fmin_l_bfgs_b", n_restarts_optimizer=0,
diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py
index 7b5aa005ad..c8466026e7 100644
--- a/sklearn/gaussian_process/kernels.py
+++ b/sklearn/gaussian_process/kernels.py
@@ -49,6 +49,8 @@ class Hyperparameter(namedtuple('Hyperparameter',
                                  'n_elements', 'fixed'))):
     """A kernel hyperparameter's specification in form of a namedtuple.
 
+    .. versionadded:: 0.18
+
     Attributes
     ----------
     name : string
@@ -76,6 +78,7 @@ class Hyperparameter(namedtuple('Hyperparameter',
         Whether the value of this hyperparameter is fixed, i.e., cannot be
         changed during hyperparameter tuning. If None is passed, the "fixed" is
         derived based on the given bounds.
+
     """
     # A raw namedtuple is very memory efficient as it packs the attributes
     # in a struct to get rid of the __dict__ of attributes in particular it
@@ -114,7 +117,10 @@ class Hyperparameter(namedtuple('Hyperparameter',
 
 
 class Kernel(six.with_metaclass(ABCMeta)):
-    """Base class for all kernels."""
+    """Base class for all kernels.
+
+    .. versionadded:: 0.18
+    """
 
     def get_params(self, deep=True):
         """Get parameters of this kernel.
@@ -349,7 +355,10 @@ class Kernel(six.with_metaclass(ABCMeta)):
 
 
 class NormalizedKernelMixin(object):
-    """Mixin for kernels which are normalized: k(X, X)=1."""
+    """Mixin for kernels which are normalized: k(X, X)=1.
+
+    .. versionadded:: 0.18
+    """
 
     def diag(self, X):
         """Returns the diagonal of the kernel k(X, X).
@@ -372,7 +381,10 @@ class NormalizedKernelMixin(object):
 
 
 class StationaryKernelMixin(object):
-    """Mixin for kernels which are stationary: k(X, Y)= f(X-Y)."""
+    """Mixin for kernels which are stationary: k(X, Y)= f(X-Y).
+
+    .. versionadded:: 0.18
+    """
 
     def is_stationary(self):
         """Returns whether the kernel is stationary. """
@@ -380,7 +392,10 @@ class StationaryKernelMixin(object):
 
 
 class CompoundKernel(Kernel):
-    """Kernel which is composed of a set of other kernels."""
+    """Kernel which is composed of a set of other kernels.
+
+    .. versionadded:: 0.18
+    """
 
     def __init__(self, kernels):
         self.kernels = kernels
@@ -513,7 +528,10 @@ class CompoundKernel(Kernel):
 
 
 class KernelOperator(Kernel):
-    """Base class for all kernel operators. """
+    """Base class for all kernel operators.
+
+    .. versionadded:: 0.18
+    """
 
     def __init__(self, k1, k2):
         self.k1 = k1
@@ -619,6 +637,8 @@ class Sum(KernelOperator):
     The resulting kernel is defined as
     k_sum(X, Y) = k1(X, Y) + k2(X, Y)
 
+    .. versionadded:: 0.18
+
     Parameters
     ----------
     k1 : Kernel object
@@ -626,6 +646,7 @@ class Sum(KernelOperator):
 
     k2 : Kernel object
         The second base-kernel of the sum-kernel
+
     """
 
     def __call__(self, X, Y=None, eval_gradient=False):
@@ -690,6 +711,8 @@ class Product(KernelOperator):
     The resulting kernel is defined as
     k_prod(X, Y) = k1(X, Y) * k2(X, Y)
 
+    .. versionadded:: 0.18
+
     Parameters
     ----------
     k1 : Kernel object
@@ -697,6 +720,7 @@ class Product(KernelOperator):
 
     k2 : Kernel object
         The second base-kernel of the product-kernel
+
     """
 
     def __call__(self, X, Y=None, eval_gradient=False):
@@ -762,6 +786,8 @@ class Exponentiation(Kernel):
     The resulting kernel is defined as
     k_exp(X, Y) = k(X, Y) ** exponent
 
+    .. versionadded:: 0.18
+
     Parameters
     ----------
     kernel : Kernel object
@@ -920,6 +946,8 @@ class ConstantKernel(StationaryKernelMixin, Kernel):
 
     k(x_1, x_2) = constant_value for all x_1, x_2
 
+    .. versionadded:: 0.18
+
     Parameters
     ----------
     constant_value : float, default: 1.0
@@ -928,6 +956,7 @@ class ConstantKernel(StationaryKernelMixin, Kernel):
 
     constant_value_bounds : pair of floats >= 0, default: (1e-5, 1e5)
         The lower and upper bound on constant_value
+
     """
     def __init__(self, constant_value=1.0, constant_value_bounds=(1e-5, 1e5)):
         self.constant_value = constant_value
@@ -1012,6 +1041,8 @@ class WhiteKernel(StationaryKernelMixin, Kernel):
 
     k(x_1, x_2) = noise_level if x_1 == x_2 else 0
 
+    .. versionadded:: 0.18
+
     Parameters
     ----------
     noise_level : float, default: 1.0
@@ -1019,6 +1050,7 @@ class WhiteKernel(StationaryKernelMixin, Kernel):
 
     noise_level_bounds : pair of floats >= 0, default: (1e-5, 1e5)
         The lower and upper bound on noise_level
+
     """
     def __init__(self, noise_level=1.0, noise_level_bounds=(1e-5, 1e5)):
         self.noise_level = noise_level
@@ -1111,6 +1143,8 @@ class RBF(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
     kernel as covariance function have mean square derivatives of all orders,
     and are thus very smooth.
 
+    .. versionadded:: 0.18
+
     Parameters
     -----------
     length_scale : float or array with shape (n_features,), default: 1.0
@@ -1120,6 +1154,7 @@ class RBF(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
 
     length_scale_bounds : pair of floats >= 0, default: (1e-5, 1e5)
         The lower and upper bound on length_scale
+
     """
     def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5)):
         self.length_scale = length_scale
@@ -1221,6 +1256,8 @@ class Matern(RBF):
     See Rasmussen and Williams 2006, pp84 for details regarding the
     different variants of the Matern kernel.
 
+    .. versionadded:: 0.18
+
     Parameters
     -----------
     length_scale : float or array with shape (n_features,), default: 1.0
@@ -1242,6 +1279,7 @@ class Matern(RBF):
         (appr. 10 times higher) since they require to evaluate the modified
         Bessel function. Furthermore, in contrast to l, nu is kept fixed to
         its initial value and not optimized.
+
     """
     def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5),
                  nu=1.5):
@@ -1365,6 +1403,8 @@ class RationalQuadratic(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
 
     k(x_i, x_j) = (1 + d(x_i, x_j)^2 / (2*alpha * length_scale^2))^-alpha
 
+    .. versionadded:: 0.18
+
     Parameters
     ----------
     length_scale : float > 0, default: 1.0
@@ -1378,6 +1418,7 @@ class RationalQuadratic(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
 
     alpha_bounds : pair of floats >= 0, default: (1e-5, 1e5)
         The lower and upper bound on alpha
+
     """
     def __init__(self, length_scale=1.0, alpha=1.0,
                  length_scale_bounds=(1e-5, 1e5), alpha_bounds=(1e-5, 1e5)):
@@ -1473,6 +1514,8 @@ class ExpSineSquared(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
 
     k(x_i, x_j) = exp(-2 sin(\pi / periodicity * d(x_i, x_j)) / length_scale)^2
 
+    .. versionadded:: 0.18
+
     Parameters
     ----------
     length_scale : float > 0, default: 1.0
@@ -1486,6 +1529,7 @@ class ExpSineSquared(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
 
     periodicity_bounds : pair of floats >= 0, default: (1e-5, 1e5)
         The lower and upper bound on periodicity
+
     """
     def __init__(self, length_scale=1.0, periodicity=1.0,
                  length_scale_bounds=(1e-5, 1e5),
@@ -1587,6 +1631,8 @@ class DotProduct(Kernel):
 
     The DotProduct kernel is commonly combined with exponentiation.
 
+    .. versionadded:: 0.18
+
     Parameters
     ----------
     sigma_0 : float >= 0, default: 1.0
@@ -1595,6 +1641,7 @@ class DotProduct(Kernel):
 
     sigma_0_bounds : pair of floats >= 0, default: (1e-5, 1e5)
         The lower and upper bound on l
+
     """
 
     def __init__(self, sigma_0=1.0, sigma_0_bounds=(1e-5, 1e5)):
@@ -1703,6 +1750,8 @@ class PairwiseKernel(Kernel):
           kernel parameters are set directly at initialization and are kept
           fixed.
 
+    .. versionadded:: 0.18
+
     Parameters
     ----------
     gamma: float >= 0, default: 1.0
@@ -1724,6 +1773,7 @@ class PairwiseKernel(Kernel):
     pairwise_kernels_kwargs : dict, default: None
         All entries of this dict (if any) are passed as keyword arguments to
         the pairwise kernel function.
+
     """
 
     def __init__(self, gamma=1.0, gamma_bounds=(1e-5, 1e5), metric="linear",
diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py
index f713593741..004aeac140 100644
--- a/sklearn/linear_model/base.py
+++ b/sklearn/linear_model/base.py
@@ -459,6 +459,8 @@ class LinearRegression(LinearModel, RegressorMixin):
         array. If the target vector passed during the fit is 1-dimensional,
         this is a (1,) shape array.
 
+        .. versionadded:: 0.18
+
     intercept_ : array
         Independent term in the linear model.
 
diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
index 7821060a95..60cdacea97 100644
--- a/sklearn/metrics/classification.py
+++ b/sklearn/metrics/classification.py
@@ -1459,6 +1459,8 @@ def hamming_loss(y_true, y_pred, labels=None, sample_weight=None,
     sample_weight : array-like of shape = [n_samples], optional
         Sample weights.
 
+        .. versionadded:: 0.18
+
     classes : array, shape = [n_labels], optional
         (deprecated) Integer array of labels. This parameter has been
          renamed to ``labels`` in version 0.18 and will be removed in 0.20.
diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py
index 3fc3373558..010f8bb928 100644
--- a/sklearn/metrics/cluster/supervised.py
+++ b/sklearn/metrics/cluster/supervised.py
@@ -73,6 +73,8 @@ def contingency_matrix(labels_true, labels_pred, eps=None, sparse=False):
 
         .. versionadded:: 0.18
 
+        .. versionadded:: 0.18
+
     Returns
     -------
     contingency : {array-like, sparse}, shape=[n_classes_true, n_classes_pred]
diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py
index 9f7117eb1b..9f1d1320d1 100644
--- a/sklearn/neural_network/multilayer_perceptron.py
+++ b/sklearn/neural_network/multilayer_perceptron.py
@@ -41,6 +41,8 @@ class BaseMultilayerPerceptron(six.with_metaclass(ABCMeta, BaseEstimator)):
 
     Warning: This class should not be used directly.
     Use derived classes instead.
+
+    .. versionadded:: 0.18
     """
 
     @abstractmethod
@@ -683,6 +685,8 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):
     This model optimizes the log-loss function using LBFGS or stochastic
     gradient descent.
 
+    .. versionadded:: 0.18
+
     Parameters
     ----------
     hidden_layer_sizes : tuple, length = n_layers - 2, default (100,)
@@ -868,6 +872,7 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):
 
     Kingma, Diederik, and Jimmy Ba. "Adam: A method for stochastic
         optimization." arXiv preprint arXiv:1412.6980 (2014).
+
     """
     def __init__(self, hidden_layer_sizes=(100,), activation="relu",
                  solver='adam', alpha=0.0001,
@@ -1025,6 +1030,8 @@ class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin):
     This model optimizes the squared-loss using LBFGS or stochastic gradient
     descent.
 
+    .. versionadded:: 0.18
+
     Parameters
     ----------
     hidden_layer_sizes : tuple, length = n_layers - 2, default (100,)
@@ -1207,6 +1214,7 @@ class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin):
 
     Kingma, Diederik, and Jimmy Ba. "Adam: A method for stochastic
         optimization." arXiv preprint arXiv:1412.6980 (2014).
+
     """
     def __init__(self, hidden_layer_sizes=(100,), activation="relu",
                  solver='adam', alpha=0.0001,
diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index 1ec6789317..76458c72b3 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -486,6 +486,8 @@ class BaseDecisionTree(six.with_metaclass(ABCMeta, BaseEstimator,
     def decision_path(self, X, check_input=True):
         """Return the decision path in the tree
 
+        .. versionadded:: 0.18
+
         Parameters
         ----------
         X : array_like or sparse matrix, shape = [n_samples, n_features]
@@ -575,6 +577,9 @@ class DecisionTreeClassifier(BaseDecisionTree, ClassifierMixin):
           `ceil(min_samples_split * n_samples)` are the minimum
           number of samples for each split.
 
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
     min_samples_leaf : int, float, optional (default=1)
         The minimum number of samples required to be at a leaf node:
 
@@ -583,6 +588,9 @@ class DecisionTreeClassifier(BaseDecisionTree, ClassifierMixin):
           `ceil(min_samples_leaf * n_samples)` are the minimum
           number of samples for each node.
 
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
     min_weight_fraction_leaf : float, optional (default=0.)
         The minimum weighted fraction of the input samples required to be at a
         leaf node.
@@ -839,6 +847,9 @@ class DecisionTreeRegressor(BaseDecisionTree, RegressorMixin):
           `ceil(min_samples_split * n_samples)` are the minimum
           number of samples for each split.
 
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
     min_samples_leaf : int, float, optional (default=1)
         The minimum number of samples required to be at a leaf node:
 
@@ -847,6 +858,9 @@ class DecisionTreeRegressor(BaseDecisionTree, RegressorMixin):
           `ceil(min_samples_leaf * n_samples)` are the minimum
           number of samples for each node.
 
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
     min_weight_fraction_leaf : float, optional (default=0.)
         The minimum weighted fraction of the input samples required to be at a
         leaf node.
-- 
GitLab