diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 0293cc04a997a184dfb6683453bd53c406cb5a2e..7e7e76077926ceb550f31036c6c24a48ae5e17de 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -754,7 +754,7 @@ For large dataset, you may also consider using :class:`SGDClassifier` with 'log'
 
   * :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_path.py`
 
-  * :ref:`example_linear_model_plot_logistic_multinomial.py`
+  * :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_multinomial.py`
 
 .. _liblinear_differences:
 
@@ -1118,7 +1118,7 @@ in the following ways.
 
 .. topic:: Examples:
 
-  * :ref:`example_linear_model_plot_huber_vs_ridge.py`
+  * :ref:`sphx_glr_auto_examples_linear_model_plot_huber_vs_ridge.py`
 
 .. topic:: References:
 
diff --git a/doc/modules/mixture.rst b/doc/modules/mixture.rst
index 5e3c2c448de7cfe565996ef43dc5585e1661be7f..cf9c3ea7e7e5ad69f1d5e5c49838fbf29d7f691e 100644
--- a/doc/modules/mixture.rst
+++ b/doc/modules/mixture.rst
@@ -175,7 +175,7 @@ points.
 
 .. topic:: Examples:
 
-    * See :ref:`plot_bayesian_gaussian_mixture.py` for a comparaison of
+    * See :ref:`sphx_glr_auto_examples_plot_bayesian_gaussian_mixture.py` for a comparaison of
       the results of the ``BayesianGaussianMixture`` for different values
       of the parameter ``dirichlet_concentration_prior``.
 
@@ -190,10 +190,10 @@ Pros
    expectation-maximization solutions.
 
 :Automatic selection: when `dirichlet_concentration_prior` is small enough and
-`n_components` is larger than what is found necessary by the model, the
-Variational Bayesian mixture model has a natural tendency to set some mixture
-weights values close to zero. This makes it possible to let the model choose a
-suitable number of effective components automatically.
+   `n_components` is larger than what is found necessary by the model, the
+   Variational Bayesian mixture model has a natural tendency to set some mixture
+   weights values close to zero. This makes it possible to let the model choose a
+   suitable number of effective components automatically.
 
 Cons
 .....
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 1abec9a49184cdfdcedfa02980c4ea81c38bf010..690e85f9150bf5e99e57d22e988d5ae465c5eb2c 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -1083,7 +1083,7 @@ Here is a small example of usage of this function:::
 
 .. topic:: Example:
 
-  * See :ref:`example_calibration_plot_calibration.py`
+  * See :ref:`sphx_glr_calibration_plot_calibration.py`
     for an example of Brier score loss usage to perform probability
     calibration of classifiers.
 
diff --git a/doc/testimonials/testimonials.rst b/doc/testimonials/testimonials.rst
index 0355ca36f539bed8be141fa89b43f79e60d0b57a..5c0ac1b306b597273e8813c9aaa82251405f1770 100644
--- a/doc/testimonials/testimonials.rst
+++ b/doc/testimonials/testimonials.rst
@@ -292,7 +292,9 @@ Greg Lamp, Co-founder Yhat
 .. raw:: html
 
    </span>
-------------------------------------------
+
+`Rangespan <https://www.rangespan.com>_`
+----------------------------------------
 
 .. raw:: html
 
diff --git a/doc/tutorial/statistical_inference/finding_help.rst b/doc/tutorial/statistical_inference/finding_help.rst
index 9d73929fa72d11cc4a762cae6965330926b825b5..9d2c0d48e3074d2cbbadd73b2e81c4a6f97d9065 100644
--- a/doc/tutorial/statistical_inference/finding_help.rst
+++ b/doc/tutorial/statistical_inference/finding_help.rst
@@ -19,9 +19,6 @@ Q&A communities with Machine Learning practitioners
     also features some interesting discussions:
     https://www.quora.com/topic/Machine-Learning
 
-    Have a look at the best questions section, eg: `What are some
-    good resources for learning about machine learning`_.
-	
   :Stack Exchange:
 
     The Stack Exchange family of sites hosts `multiple subdomains for Machine Learning questions`_.
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 6b588ff1a67a2cf6b24961a1acabf0902f1fb872..7ac7c5fcc8241121ac630fcc4702bd99a355b1f5 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -290,7 +290,7 @@ Enhancements
    - Added support for substituting or disabling :class:`pipeline.Pipeline`
      and :class:`pipeline.FeatureUnion` components using the ``set_params``
      interface that powers :mod:`sklearn.grid_search`.
-     See :ref:`example_plot_compare_reduction.py`. By `Joel Nothman`_ and
+     See :ref:`sphx_glr_plot_compare_reduction.py`. By `Joel Nothman`_ and
      `Robert McGibbon`_.
 
    - Simplification of the ``clone`` function, deprecate support for estimators
@@ -395,7 +395,7 @@ Bug fixes
       Oliveira <https://github.com/caioaao>`_.
 
     - Fix :class:`linear_model.ElasticNet` sparse decision function to match
-    output with dense in the multioutput case.
+      output with dense in the multioutput case.
 
 API changes summary
 -------------------
@@ -4468,3 +4468,5 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
 .. _Mads Jensen: https://github.com/indianajensen
 
 .. _Sebastián Vanrell: https://github.com/srvanrell
+
+.. _Robert McGibbon: https://github.com/rmcgibbo
diff --git a/sklearn/datasets/descr/breast_cancer.rst b/sklearn/datasets/descr/breast_cancer.rst
index 8e12472941a667a2458cedfdb85482d3576becee..547b41021ef2f4c979696594903937a1f4e9762b 100644
--- a/sklearn/datasets/descr/breast_cancer.rst
+++ b/sklearn/datasets/descr/breast_cancer.rst
@@ -30,6 +30,7 @@ Data Set Characteristics:
                 - WDBC-Benign
 
     :Summary Statistics:
+
     ===================================== ====== ======
                                            Min    Max
     ===================================== ====== ======
diff --git a/sklearn/decomposition/kernel_pca.py b/sklearn/decomposition/kernel_pca.py
index bc429f85890daf50b8fcc90fca434f2ab6d267e1..fdd1f852c6af31c4abb162bc42263c7c5db26eaf 100644
--- a/sklearn/decomposition/kernel_pca.py
+++ b/sklearn/decomposition/kernel_pca.py
@@ -100,7 +100,7 @@ class KernelPCA(BaseEstimator, TransformerMixin):
 
     dual_coef_ : array, (n_samples, n_features)
         Inverse transform matrix. If `fit_inverse_transform=False`,
-        dual_coef_ is not present.
+        ``dual_coef_`` is not present.
 
     X_transformed_fit_ : array, (n_samples, n_components)
         Projection of the fitted data on the kernel principal components.
diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index 881a4a593cfd2590f70d3016f151e85c96754183..aecab027b7db8d656a959a03052a5af72d2311e8 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -183,7 +183,7 @@ class PCA(_BasePCA):
     components_ : array, [n_components, n_features]
         Principal axes in feature space, representing the directions of
         maximum variance in the data. The components are sorted by
-        explained_variance_.
+        ``explained_variance_``.
 
     explained_variance_ : array, [n_components]
         The amount of variance explained by each of the selected components.
@@ -514,7 +514,7 @@ class PCA(_BasePCA):
 
 @deprecated("RandomizedPCA was deprecated in 0.18 and will be removed in 0.20. "
             "Use PCA(svd_solver='randomized') instead. The new implementation "
-            "DOES NOT store whiten components_. Apply transform to get them.")
+            "DOES NOT store whiten ``components_``. Apply transform to get them.")
 class RandomizedPCA(BaseEstimator, TransformerMixin):
     """Principal component analysis (PCA) using randomized SVD
 
diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py
index f8393d74c3273b8e1d8a2eb4aef6f6f44a767653..e650bff25b580250dd58d383d2a7b1881f4ad42d 100644
--- a/sklearn/multioutput.py
+++ b/sklearn/multioutput.py
@@ -147,8 +147,8 @@ class MultiOutputRegressor(MultiOutputEstimator, RegressorMixin):
         predicts the expected value of y, disregarding the input features,
         would get a R^2 score of 0.0.
 
-        Note
-        ----
+        Notes
+        -----
         R^2 is calculated by weighting all the targets equally using
         `multioutput='uniform_average'`.
 
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index 1c3d8db580272c2e60e5d55cd4d36f94df4a2f68..e7f242cdedc5da6d3389ccac244d41a60d0d6427 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -933,7 +933,7 @@ class RobustScaler(BaseEstimator, TransformerMixin):
 
     quantile_range : tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0
         Default: (25.0, 75.0) = (1st quantile, 3rd quantile) = IQR
-        Quantile range used to calculate scale_
+        Quantile range used to calculate ``scale_``.
 
         .. versionadded:: 0.18
 
@@ -1101,7 +1101,7 @@ def robust_scale(X, axis=0, with_centering=True, with_scaling=True,
 
     quantile_range : tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0
         Default: (25.0, 75.0) = (1st quantile, 3rd quantile) = IQR
-        Quantile range used to calculate scale_
+        Quantile range used to calculate ``scale_``.
 
         .. versionadded:: 0.18