From 9551ec85ea79b35c8cae420ca9cab55252314051 Mon Sep 17 00:00:00 2001
From: Josh Karnofsky <jkarno@seas.upenn.edu>
Date: Wed, 5 Oct 2016 23:01:12 -0400
Subject: [PATCH] DOC add spaces before colons in docstrings (#7589)

---
 sklearn/base.py                               |  2 +-
 sklearn/covariance/graph_lasso_.py            |  8 ++---
 sklearn/datasets/mldata.py                    | 10 +++----
 sklearn/datasets/olivetti_faces.py            |  2 +-
 sklearn/datasets/samples_generator.py         | 20 ++++++-------
 sklearn/datasets/species_distributions.py     |  2 +-
 sklearn/datasets/svmlight_format.py           | 16 +++++-----
 sklearn/datasets/twenty_newsgroups.py         | 20 ++++++-------
 sklearn/decomposition/dict_learning.py        | 30 +++++++++----------
 sklearn/feature_extraction/dict_vectorizer.py |  6 ++--
 sklearn/feature_extraction/text.py            |  4 +--
 sklearn/gaussian_process/gpc.py               |  8 ++---
 sklearn/gaussian_process/gpr.py               | 14 ++++-----
 sklearn/linear_model/ransac.py                |  4 +--
 sklearn/manifold/locally_linear.py            |  2 +-
 sklearn/metrics/classification.py             |  8 ++---
 sklearn/metrics/cluster/supervised.py         | 14 ++++-----
 sklearn/metrics/pairwise.py                   |  4 +--
 sklearn/mixture/dpgmm.py                      |  4 +--
 sklearn/mixture/gaussian_mixture.py           |  2 +-
 sklearn/neighbors/nearest_centroid.py         |  2 +-
 .../preprocessing/_function_transformer.py    |  2 +-
 sklearn/preprocessing/data.py                 |  6 ++--
 23 files changed, 94 insertions(+), 96 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index d1628f39b3..67a7c61c60 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -222,7 +222,7 @@ class BaseEstimator(object):
 
         Parameters
         ----------
-        deep: boolean, optional
+        deep : boolean, optional
             If True, will return the parameters for this estimator and
             contained subobjects that are estimators.
 
diff --git a/sklearn/covariance/graph_lasso_.py b/sklearn/covariance/graph_lasso_.py
index 41e50ac29f..e166cfe207 100644
--- a/sklearn/covariance/graph_lasso_.py
+++ b/sklearn/covariance/graph_lasso_.py
@@ -479,7 +479,7 @@ class GraphLassoCV(GraphLasso):
         Refer :ref:`User Guide <cross_validation>` for the various
         cross-validation strategies that can be used here.
 
-    tol: positive float, optional
+    tol : positive float, optional
         The tolerance to declare convergence: if the dual gap goes below
         this value, iterations are stopped.
 
@@ -489,7 +489,7 @@ class GraphLassoCV(GraphLasso):
         for a given column update, not of the overall parameter estimate. Only
         used for mode='cd'.
 
-    max_iter: integer, optional
+    max_iter : integer, optional
         Maximum number of iterations.
 
     mode: {'cd', 'lars'}
@@ -498,10 +498,10 @@ class GraphLassoCV(GraphLasso):
         than number of samples. Elsewhere prefer cd which is more numerically
         stable.
 
-    n_jobs: int, optional
+    n_jobs : int, optional
         number of jobs to run in parallel (default 1).
 
-    verbose: boolean, optional
+    verbose : boolean, optional
         If verbose is True, the objective function and duality gap are
         printed at each iteration.
 
diff --git a/sklearn/datasets/mldata.py b/sklearn/datasets/mldata.py
index 1ab3edea91..c15e12cc7d 100644
--- a/sklearn/datasets/mldata.py
+++ b/sklearn/datasets/mldata.py
@@ -61,21 +61,21 @@ def fetch_mldata(dataname, target_name='label', data_name='data',
     Parameters
     ----------
 
-    dataname:
+    dataname :
         Name of the data set on mldata.org,
         e.g.: "leukemia", "Whistler Daily Snowfall", etc.
         The raw name is automatically converted to a mldata.org URL .
 
-    target_name: optional, default: 'label'
+    target_name : optional, default: 'label'
         Name or index of the column containing the target values.
 
-    data_name: optional, default: 'data'
+    data_name : optional, default: 'data'
         Name or index of the column containing the data.
 
-    transpose_data: optional, default: True
+    transpose_data : optional, default: True
         If True, transpose the downloaded data array.
 
-    data_home: optional, default: None
+    data_home : optional, default: None
         Specify another download and cache folder for the data sets. By default
         all scikit learn data is stored in '~/scikit_learn_data' subfolders.
 
diff --git a/sklearn/datasets/olivetti_faces.py b/sklearn/datasets/olivetti_faces.py
index ba21bba64b..e74d65d60e 100644
--- a/sklearn/datasets/olivetti_faces.py
+++ b/sklearn/datasets/olivetti_faces.py
@@ -67,7 +67,7 @@ def fetch_olivetti_faces(data_home=None, shuffle=False, random_state=0,
         If True the order of the dataset is shuffled to avoid having
         images of the same person grouped.
 
-    download_if_missing: optional, True by default
+    download_if_missing : optional, True by default
         If False, raise a IOError if the data is not locally available
         instead of trying to download the data from the source site.
 
diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py
index 5f6d1a71b5..53ee8987ba 100644
--- a/sklearn/datasets/samples_generator.py
+++ b/sklearn/datasets/samples_generator.py
@@ -631,7 +631,7 @@ def make_moons(n_samples=100, shuffle=True, noise=None, random_state=None):
     """Make two interleaving half circles
 
     A simple toy dataset to visualize clustering and classification
-    algorithms.
+    algorithms. Read more in the :ref:`User Guide <sample_generators>`.
 
     Parameters
     ----------
@@ -644,8 +644,6 @@ def make_moons(n_samples=100, shuffle=True, noise=None, random_state=None):
     noise : double or None (default=None)
         Standard deviation of Gaussian noise added to the data.
 
-    Read more in the :ref:`User Guide <sample_generators>`.
-
     Returns
     -------
     X : array of shape [n_samples, 2]
@@ -697,10 +695,10 @@ def make_blobs(n_samples=100, n_features=2, centers=3, cluster_std=1.0,
         (default=3)
         The number of centers to generate, or the fixed center locations.
 
-    cluster_std: float or sequence of floats, optional (default=1.0)
+    cluster_std : float or sequence of floats, optional (default=1.0)
         The standard deviation of the clusters.
 
-    center_box: pair of floats (min, max), optional (default=(-10.0, 10.0))
+    center_box : pair of floats (min, max), optional (default=(-10.0, 10.0))
         The bounding box for each cluster center when centers are
         generated at random.
 
@@ -1061,18 +1059,18 @@ def make_sparse_coded_signal(n_samples, n_components, n_features,
     n_nonzero_coefs : int
         number of active (non-zero) coefficients in each sample
 
-    random_state: int or RandomState instance, optional (default=None)
+    random_state : int or RandomState instance, optional (default=None)
         seed used by the pseudo random number generator
 
     Returns
     -------
-    data: array of shape [n_features, n_samples]
+    data : array of shape [n_features, n_samples]
         The encoded signal (Y).
 
-    dictionary: array of shape [n_features, n_components]
+    dictionary : array of shape [n_features, n_components]
         The dictionary with normalized components (D).
 
-    code: array of shape [n_components, n_samples]
+    code : array of shape [n_components, n_samples]
         The sparse code such that each column of this matrix has exactly
         n_nonzero_coefs non-zero items (X).
 
@@ -1192,10 +1190,10 @@ def make_sparse_spd_matrix(dim=1, alpha=0.95, norm_diag=False,
 
     Parameters
     ----------
-    dim: integer, optional (default=1)
+    dim : integer, optional (default=1)
         The size of the random matrix to generate.
 
-    alpha: float between 0 and 1, optional (default=0.95)
+    alpha : float between 0 and 1, optional (default=0.95)
         The probability that a coefficient is zero (see notes). Larger values 
         enforce more sparsity.
 
diff --git a/sklearn/datasets/species_distributions.py b/sklearn/datasets/species_distributions.py
index 2720aab6e1..6af36e6745 100644
--- a/sklearn/datasets/species_distributions.py
+++ b/sklearn/datasets/species_distributions.py
@@ -141,7 +141,7 @@ def fetch_species_distributions(data_home=None,
         Specify another download and cache folder for the datasets. By default
         all scikit learn data is stored in '~/scikit_learn_data' subfolders.
 
-    download_if_missing: optional, True by default
+    download_if_missing : optional, True by default
         If False, raise a IOError if the data is not locally available
         instead of trying to download the data from the source site.
 
diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py
index 951eb00c82..52e81da086 100644
--- a/sklearn/datasets/svmlight_format.py
+++ b/sklearn/datasets/svmlight_format.py
@@ -100,12 +100,12 @@ def load_svmlight_file(f, n_features=None, dtype=np.float64,
 
     Returns
     -------
-    X: scipy.sparse matrix of shape (n_samples, n_features)
+    X : scipy.sparse matrix of shape (n_samples, n_features)
 
-    y: ndarray of shape (n_samples,), or, in the multilabel a list of
+    y : ndarray of shape (n_samples,), or, in the multilabel a list of
         tuples of length n_samples.
 
-    query_id: array of shape (n_samples,)
+    query_id : array of shape (n_samples,)
        query_id for each sample. Only returned when query_id is set to
        True.
 
@@ -198,7 +198,7 @@ def load_svmlight_files(files, n_features=None, dtype=np.float64,
         closed by this function. File-like objects must be opened in binary
         mode.
 
-    n_features: int or None
+    n_features : int or None
         The number of features to use. If None, it will be inferred from the
         maximum column index occurring in any of the files.
 
@@ -206,11 +206,11 @@ def load_svmlight_files(files, n_features=None, dtype=np.float64,
         in any of the input files, but setting it to a lower value will cause
         an exception to be raised.
 
-    multilabel: boolean, optional
+    multilabel : boolean, optional
         Samples may have several labels each (see
         http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)
 
-    zero_based: boolean or "auto", optional
+    zero_based : boolean or "auto", optional
         Whether column indices in f are zero-based (True) or one-based
         (False). If column indices are one-based, they are transformed to
         zero-based to match Python/NumPy conventions.
@@ -219,7 +219,7 @@ def load_svmlight_files(files, n_features=None, dtype=np.float64,
         are unfortunately not self-identifying. Using "auto" or True should
         always be safe.
 
-    query_id: boolean, defaults to False
+    query_id : boolean, defaults to False
         If True, will return the query_id array for each file.
 
     dtype : numpy data type, default np.float64
@@ -374,7 +374,7 @@ def dump_svmlight_file(X, y, f,  zero_based=True, comment=None, query_id=None,
         Array containing pairwise preference constraints (qid in svmlight
         format).
 
-    multilabel: boolean, optional
+    multilabel : boolean, optional
         Samples may have several labels each (see
         http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)
 
diff --git a/sklearn/datasets/twenty_newsgroups.py b/sklearn/datasets/twenty_newsgroups.py
index 524d38d6c1..128610fd28 100644
--- a/sklearn/datasets/twenty_newsgroups.py
+++ b/sklearn/datasets/twenty_newsgroups.py
@@ -161,32 +161,32 @@ def fetch_20newsgroups(data_home=None, subset='train', categories=None,
 
     Parameters
     ----------
-    subset: 'train' or 'test', 'all', optional
+    subset : 'train' or 'test', 'all', optional
         Select the dataset to load: 'train' for the training set, 'test'
         for the test set, 'all' for both, with shuffled ordering.
 
-    data_home: optional, default: None
+    data_home : optional, default: None
         Specify a download and cache folder for the datasets. If None,
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
 
-    categories: None or collection of string or unicode
+    categories : None or collection of string or unicode
         If None (default), load all the categories.
         If not None, list of category names to load (other categories
         ignored).
 
-    shuffle: bool, optional
+    shuffle : bool, optional
         Whether or not to shuffle the data: might be important for models that
         make the assumption that the samples are independent and identically
         distributed (i.i.d.), such as stochastic gradient descent.
 
-    random_state: numpy random number generator or seed integer
+    random_state : numpy random number generator or seed integer
         Used to shuffle the dataset.
 
-    download_if_missing: optional, True by default
+    download_if_missing : optional, True by default
         If False, raise an IOError if the data is not locally available
         instead of trying to download the data from the source site.
 
-    remove: tuple
+    remove : tuple
         May contain any subset of ('headers', 'footers', 'quotes'). Each of
         these are kinds of text that will be detected and removed from the
         newsgroup posts, preventing classifiers from overfitting on
@@ -297,15 +297,15 @@ def fetch_20newsgroups_vectorized(subset="train", remove=(), data_home=None):
     Parameters
     ----------
 
-    subset: 'train' or 'test', 'all', optional
+    subset : 'train' or 'test', 'all', optional
         Select the dataset to load: 'train' for the training set, 'test'
         for the test set, 'all' for both, with shuffled ordering.
 
-    data_home: optional, default: None
+    data_home : optional, default: None
         Specify an download and cache folder for the datasets. If None,
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
 
-    remove: tuple
+    remove : tuple
         May contain any subset of ('headers', 'footers', 'quotes'). Each of
         these are kinds of text that will be detected and removed from the
         newsgroup posts, preventing classifiers from overfitting on
diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py
index 990baef4c8..7e6a136f3d 100644
--- a/sklearn/decomposition/dict_learning.py
+++ b/sklearn/decomposition/dict_learning.py
@@ -394,44 +394,44 @@ def dict_learning(X, n_components, alpha, max_iter=100, tol=1e-8,
 
     Parameters
     ----------
-    X: array of shape (n_samples, n_features)
+    X : array of shape (n_samples, n_features)
         Data matrix.
 
-    n_components: int,
+    n_components : int,
         Number of dictionary atoms to extract.
 
-    alpha: int,
+    alpha : int,
         Sparsity controlling parameter.
 
-    max_iter: int,
+    max_iter : int,
         Maximum number of iterations to perform.
 
-    tol: float,
+    tol : float,
         Tolerance for the stopping condition.
 
-    method: {'lars', 'cd'}
+    method : {'lars', 'cd'}
         lars: uses the least angle regression method to solve the lasso problem
         (linear_model.lars_path)
         cd: uses the coordinate descent method to compute the
         Lasso solution (linear_model.Lasso). Lars will be faster if
         the estimated components are sparse.
 
-    n_jobs: int,
+    n_jobs : int,
         Number of parallel jobs to run, or -1 to autodetect.
 
-    dict_init: array of shape (n_components, n_features),
+    dict_init : array of shape (n_components, n_features),
         Initial value for the dictionary for warm restart scenarios.
 
-    code_init: array of shape (n_samples, n_components),
+    code_init : array of shape (n_samples, n_components),
         Initial value for the sparse code for warm restart scenarios.
 
-    callback:
+    callback :
         Callable that gets invoked every five iterations.
 
-    verbose:
+    verbose :
         Degree of output the procedure will print.
 
-    random_state: int or RandomState
+    random_state : int or RandomState
         Pseudo number generator state used for random sampling.
 
     return_n_iter : bool
@@ -439,13 +439,13 @@ def dict_learning(X, n_components, alpha, max_iter=100, tol=1e-8,
 
     Returns
     -------
-    code: array of shape (n_samples, n_components)
+    code : array of shape (n_samples, n_components)
         The sparse code factor in the matrix factorization.
 
-    dictionary: array of shape (n_components, n_features),
+    dictionary : array of shape (n_components, n_features),
         The dictionary factor in the matrix factorization.
 
-    errors: array
+    errors : array
         Vector of errors at each iteration.
 
     n_iter : int
diff --git a/sklearn/feature_extraction/dict_vectorizer.py b/sklearn/feature_extraction/dict_vectorizer.py
index 408024bce2..66390d7a2c 100644
--- a/sklearn/feature_extraction/dict_vectorizer.py
+++ b/sklearn/feature_extraction/dict_vectorizer.py
@@ -52,13 +52,13 @@ class DictVectorizer(BaseEstimator, TransformerMixin):
     dtype : callable, optional
         The type of feature values. Passed to Numpy array/scipy.sparse matrix
         constructors as the dtype argument.
-    separator: string, optional
+    separator : string, optional
         Separator string used when constructing new features for one-hot
         coding.
-    sparse: boolean, optional.
+    sparse : boolean, optional.
         Whether transform should produce scipy.sparse matrices.
         True by default.
-    sort: boolean, optional.
+    sort : boolean, optional.
         Whether ``feature_names_`` and ``vocabulary_`` should be sorted when fitting.
         True by default.
 
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index b9d0944383..856a2db060 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -397,12 +397,12 @@ class HashingVectorizer(BaseEstimator, VectorizerMixin):
     norm : 'l1', 'l2' or None, optional
         Norm used to normalize term vectors. None for no normalization.
 
-    binary: boolean, default=False.
+    binary : boolean, default=False.
         If True, all non zero counts are set to 1. This is useful for discrete
         probabilistic models that model binary events rather than integer
         counts.
 
-    dtype: type, optional
+    dtype : type, optional
         Type of the matrix returned by fit_transform() or transform().
 
     non_negative : boolean, default=False
diff --git a/sklearn/gaussian_process/gpc.py b/sklearn/gaussian_process/gpc.py
index f5cec7bd89..f0e6c6c439 100644
--- a/sklearn/gaussian_process/gpc.py
+++ b/sklearn/gaussian_process/gpc.py
@@ -483,7 +483,7 @@ class GaussianProcessClassifier(BaseEstimator, ClassifierMixin):
 
             'fmin_l_bfgs_b'
 
-    n_restarts_optimizer: int, optional (default: 0)
+    n_restarts_optimizer : int, optional (default: 0)
         The number of restarts of the optimizer for finding the kernel's
         parameters which maximize the log-marginal likelihood. The first run
         of the optimizer is performed from the kernel's initial parameters,
@@ -492,7 +492,7 @@ class GaussianProcessClassifier(BaseEstimator, ClassifierMixin):
         must be finite. Note that n_restarts_optimizer=0 implies that one
         run is performed.
 
-    max_iter_predict: int, optional (default: 100)
+    max_iter_predict : int, optional (default: 100)
         The maximum number of iterations in Newton's method for approximating
         the posterior during predict. Smaller values will reduce computation
         time at the cost of worse results.
@@ -515,7 +515,7 @@ class GaussianProcessClassifier(BaseEstimator, ClassifierMixin):
         given, it fixes the seed. Defaults to the global numpy random
         number generator.
 
-    multi_class: string, default: "one_vs_rest"
+    multi_class: string, default : "one_vs_rest"
         Specifies how multi-class classification problems are handled.
         Supported are "one_vs_rest" and "one_vs_one". In "one_vs_rest",
         one binary Gaussian process classifier is fitted for each class, which
@@ -541,7 +541,7 @@ class GaussianProcessClassifier(BaseEstimator, ClassifierMixin):
         classification, a CompoundKernel is returned which consists of the
         different kernels used in the one-versus-rest classifiers.
 
-    log_marginal_likelihood_value_: float
+    log_marginal_likelihood_value_ : float
         The log-marginal-likelihood of ``self.kernel_.theta``
 
     classes_ : array-like, shape = (n_classes,)
diff --git a/sklearn/gaussian_process/gpr.py b/sklearn/gaussian_process/gpr.py
index 4cf4cb07a7..4f4941fe1d 100644
--- a/sklearn/gaussian_process/gpr.py
+++ b/sklearn/gaussian_process/gpr.py
@@ -79,7 +79,7 @@ class GaussianProcessRegressor(BaseEstimator, RegressorMixin):
 
             'fmin_l_bfgs_b'
 
-    n_restarts_optimizer: int, optional (default: 0)
+    n_restarts_optimizer : int, optional (default: 0)
         The number of restarts of the optimizer for finding the kernel's
         parameters which maximize the log-marginal likelihood. The first run
         of the optimizer is performed from the kernel's initial parameters,
@@ -88,7 +88,7 @@ class GaussianProcessRegressor(BaseEstimator, RegressorMixin):
         must be finite. Note that n_restarts_optimizer == 0 implies that one
         run is performed.
 
-    normalize_y: boolean, optional (default: False)
+    normalize_y : boolean, optional (default: False)
         Whether the target values y are normalized, i.e., the mean of the
         observed target values become zero. This parameter should be set to
         True if the target values' mean is expected to differ considerable from
@@ -112,20 +112,20 @@ class GaussianProcessRegressor(BaseEstimator, RegressorMixin):
     X_train_ : array-like, shape = (n_samples, n_features)
         Feature values in training data (also required for prediction)
 
-    y_train_: array-like, shape = (n_samples, [n_output_dims])
+    y_train_ : array-like, shape = (n_samples, [n_output_dims])
         Target values in training data (also required for prediction)
 
-    kernel_: kernel object
+    kernel_ : kernel object
         The kernel used for prediction. The structure of the kernel is the
         same as the one passed as parameter but with optimized hyperparameters
 
-    L_: array-like, shape = (n_samples, n_samples)
+    L_ : array-like, shape = (n_samples, n_samples)
         Lower-triangular Cholesky decomposition of the kernel in ``X_train_``
 
-    alpha_: array-like, shape = (n_samples,)
+    alpha_ : array-like, shape = (n_samples,)
         Dual coefficients of training data points in kernel space
 
-    log_marginal_likelihood_value_: float
+    log_marginal_likelihood_value_ : float
         The log-marginal-likelihood of ``self.kernel_.theta``
 
     """
diff --git a/sklearn/linear_model/ransac.py b/sklearn/linear_model/ransac.py
index 1e65358746..1c29f7fa6b 100644
--- a/sklearn/linear_model/ransac.py
+++ b/sklearn/linear_model/ransac.py
@@ -138,7 +138,7 @@ class RANSACRegressor(BaseEstimator, MetaEstimatorMixin, RegressorMixin):
         NOTE: residual_metric is deprecated from 0.18 and will be removed in 0.20
         Use ``loss`` instead.
 
-    loss: string, callable, optional, default "absolute_loss"
+    loss : string, callable, optional, default "absolute_loss"
         String inputs, "absolute_loss" and "squared_loss" are supported which
         find the absolute loss and squared loss per sample
         respectively.
@@ -206,7 +206,7 @@ class RANSACRegressor(BaseEstimator, MetaEstimatorMixin, RegressorMixin):
         y : array-like, shape = [n_samples] or [n_samples, n_targets]
             Target values.
 
-        sample_weight: array-like, shape = [n_samples]
+        sample_weight : array-like, shape = [n_samples]
             Individual weights for each sample
             raises error if sample_weight is passed and base_estimator
             fit method does not support it.
diff --git a/sklearn/manifold/locally_linear.py b/sklearn/manifold/locally_linear.py
index 7de60140e8..fd9e496d09 100644
--- a/sklearn/manifold/locally_linear.py
+++ b/sklearn/manifold/locally_linear.py
@@ -139,7 +139,7 @@ def null_space(M, k, k_skip=1, eigen_solver='arpack', tol=1E-6, max_iter=100,
     max_iter : maximum number of iterations for 'arpack' method
         not used if eigen_solver=='dense'
 
-    random_state: numpy.RandomState or int, optional
+    random_state : numpy.RandomState or int, optional
         The generator or seed used to determine the starting vector for arpack
         iterations.  Defaults to numpy.random.
 
diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
index 60cdacea97..a0b5593b08 100644
--- a/sklearn/metrics/classification.py
+++ b/sklearn/metrics/classification.py
@@ -941,16 +941,16 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
 
     Returns
     -------
-    precision: float (if average is not None) or array of float, shape =\
+    precision : float (if average is not None) or array of float, shape =\
         [n_unique_labels]
 
-    recall: float (if average is not None) or array of float, , shape =\
+    recall : float (if average is not None) or array of float, , shape =\
         [n_unique_labels]
 
-    fbeta_score: float (if average is not None) or array of float, shape =\
+    fbeta_score : float (if average is not None) or array of float, shape =\
         [n_unique_labels]
 
-    support: int (if average is not None) or array of int, shape =\
+    support : int (if average is not None) or array of int, shape =\
         [n_unique_labels]
         The number of occurrences of each label in ``y_true``.
 
diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py
index 010f8bb928..59f048b6d8 100644
--- a/sklearn/metrics/cluster/supervised.py
+++ b/sklearn/metrics/cluster/supervised.py
@@ -253,13 +253,13 @@ def homogeneity_completeness_v_measure(labels_true, labels_pred):
 
     Returns
     -------
-    homogeneity: float
+    homogeneity : float
        score between 0.0 and 1.0. 1.0 stands for perfectly homogeneous labeling
 
-    completeness: float
+    completeness : float
        score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling
 
-    v_measure: float
+    v_measure : float
         harmonic mean of the first two
 
     See also
@@ -317,7 +317,7 @@ def homogeneity_score(labels_true, labels_pred):
 
     Returns
     -------
-    homogeneity: float
+    homogeneity : float
        score between 0.0 and 1.0. 1.0 stands for perfectly homogeneous labeling
 
     References
@@ -465,7 +465,7 @@ def v_measure_score(labels_true, labels_pred):
 
     Returns
     -------
-    v_measure: float
+    v_measure : float
        score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling
 
     References
@@ -573,7 +573,7 @@ def mutual_info_score(labels_true, labels_pred, contingency=None):
 
     Returns
     -------
-    mi: float
+    mi : float
        Mutual information, a non-negative value
 
     See also
@@ -741,7 +741,7 @@ def normalized_mutual_info_score(labels_true, labels_pred):
 
     Returns
     -------
-    nmi: float
+    nmi : float
        score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling
 
     See also
diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index c058e20967..e8f5090cba 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -787,7 +787,7 @@ def sigmoid_kernel(X, Y=None, gamma=None, coef0=1):
 
     Returns
     -------
-    Gram matrix: array of shape (n_samples_1, n_samples_2)
+    Gram matrix : array of shape (n_samples_1, n_samples_2)
     """
     X, Y = check_pairwise_arrays(X, Y)
     if gamma is None:
@@ -1354,7 +1354,7 @@ def pairwise_kernels(X, Y=None, metric="linear", filter_params=False,
         (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one
         are used.
 
-    filter_params: boolean
+    filter_params : boolean
         Whether to filter invalid parameters or not.
 
     `**kwds` : optional keyword parameters
diff --git a/sklearn/mixture/dpgmm.py b/sklearn/mixture/dpgmm.py
index a8a1e2d928..0b7f11affe 100644
--- a/sklearn/mixture/dpgmm.py
+++ b/sklearn/mixture/dpgmm.py
@@ -262,7 +262,7 @@ class _DPGMMBase(_GMMBase):
         -------
         logprob : array_like, shape (n_samples,)
             Log probabilities of each data point in X
-        responsibilities: array_like, shape (n_samples, n_components)
+        responsibilities : array_like, shape (n_samples, n_components)
             Posterior probabilities of each mixture component for each
             observation
         """
@@ -787,7 +787,7 @@ class VBGMM(_DPGMMBase):
         -------
         logprob : array_like, shape (n_samples,)
             Log probabilities of each data point in X
-        responsibilities: array_like, shape (n_samples, n_components)
+        responsibilities : array_like, shape (n_samples, n_components)
             Posterior probabilities of each mixture component for each
             observation
         """
diff --git a/sklearn/mixture/gaussian_mixture.py b/sklearn/mixture/gaussian_mixture.py
index f4a182a7c9..e7c489cbb5 100644
--- a/sklearn/mixture/gaussian_mixture.py
+++ b/sklearn/mixture/gaussian_mixture.py
@@ -500,7 +500,7 @@ class GaussianMixture(BaseMixture):
             (n_components, n_features)             if 'diag',
             (n_components, n_features, n_features) if 'full'
 
-    random_state: RandomState or an int seed, defaults to None.
+    random_state : RandomState or an int seed, defaults to None.
         A random number generator instance.
 
     warm_start : bool, default to False.
diff --git a/sklearn/neighbors/nearest_centroid.py b/sklearn/neighbors/nearest_centroid.py
index 10eb107d3c..778605ad84 100644
--- a/sklearn/neighbors/nearest_centroid.py
+++ b/sklearn/neighbors/nearest_centroid.py
@@ -29,7 +29,7 @@ class NearestCentroid(BaseEstimator, ClassifierMixin):
 
     Parameters
     ----------
-    metric: string, or callable
+    metric : string, or callable
         The metric to use when calculating distance between instances in a
         feature array. If metric is a string or callable, it must be one of
         the options allowed by metrics.pairwise.pairwise_distances for its
diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py
index bf6199b004..19c0ac0d5b 100644
--- a/sklearn/preprocessing/_function_transformer.py
+++ b/sklearn/preprocessing/_function_transformer.py
@@ -50,7 +50,7 @@ class FunctionTransformer(BaseEstimator, TransformerMixin):
         False, this has no effect. Otherwise, if accept_sparse is false,
         sparse matrix inputs will cause an exception to be raised.
 
-    pass_y: bool, optional default=False
+    pass_y : bool, optional default=False
         Indicate that transform should forward the y argument to the
         inner callable.
 
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index 08be1c75d0..4740d18f5b 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -204,7 +204,7 @@ class MinMaxScaler(BaseEstimator, TransformerMixin):
 
     Parameters
     ----------
-    feature_range: tuple (min, max), default=(0, 1)
+    feature_range : tuple (min, max), default=(0, 1)
         Desired range of transformed data.
 
     copy : boolean, optional, default True
@@ -403,7 +403,7 @@ def minmax_scale(X, feature_range=(0, 1), axis=0, copy=True):
 
     Parameters
     ----------
-    feature_range: tuple (min, max), default=(0, 1)
+    feature_range : tuple (min, max), default=(0, 1)
         Desired range of transformed data.
 
     axis : int (0 by default)
@@ -1754,7 +1754,7 @@ class OneHotEncoder(BaseEstimator, TransformerMixin):
                   ``X[:, i]``. Each feature value should be
                   in ``range(n_values[i])``
 
-    categorical_features: "all" or array of indices or mask
+    categorical_features : "all" or array of indices or mask
         Specify what features are treated as categorical.
 
         - 'all' (default): All features are treated as categorical.
-- 
GitLab