diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh
index f3d12cae7d7bd85530e392d991270357e4027eb0..2e92393309aa9d61b5348c947d097e1e97b44657 100755
--- a/build_tools/circle/build_doc.sh
+++ b/build_tools/circle/build_doc.sh
@@ -74,7 +74,9 @@ fi
 
 if [[ "$CIRCLE_BRANCH" =~ ^master$|^[0-9]+\.[0-9]+\.X$ && -z "$CI_PULL_REQUEST" ]]
 then
-    MAKE_TARGET=dist  # PDF linked into HTML
+    # nonstopmode is used to not wait for CI timeout in case of an error
+    # PDF linked into HTML
+    MAKE_TARGET="dist LATEXMKOPTS=--interaction=nonstopmode"
 elif [[ "$build_type" =~ ^QUICK ]]
 then
 	MAKE_TARGET=html-noplot
@@ -105,7 +107,7 @@ conda update --yes --quiet conda
 # Configure the conda environment and put it in the path using the
 # provided versions
 conda create -n $CONDA_ENV_NAME --yes --quiet python numpy scipy \
-  cython nose coverage matplotlib sphinx=1.5 pillow
+  cython nose coverage matplotlib sphinx=1.6.2 pillow
 source activate testenv
 
 # Build and install scikit-learn in dev mode
diff --git a/doc/about.rst b/doc/about.rst
index 1fa63a6fc331b614aa4b5b24c9bc3d6b856b0732..c4208efdc247a5717cd04374f8a89bb4138ee018 100644
--- a/doc/about.rst
+++ b/doc/about.rst
@@ -1,5 +1,3 @@
-
-
 About us
 ========
 
@@ -221,7 +219,7 @@ The 2013 Paris international sprint
    :width: 120pt
    :target: http://www.frs-fnrs.be/
 
-.. figure:: http://sites.uclouvain.be/dysco/pmwiki/uploads/Main/dysco.gif
+.. figure:: images/dysco.png
    :width: 120pt
    :target: http://sites.uclouvain.be/dysco/
 
diff --git a/doc/images/dysco.png b/doc/images/dysco.png
new file mode 100644
index 0000000000000000000000000000000000000000..4054e7f1dea37df05425b8fd8c33b859fc8aff89
Binary files /dev/null and b/doc/images/dysco.png differ
diff --git a/sklearn/gaussian_process/gaussian_process.py b/sklearn/gaussian_process/gaussian_process.py
index 7adac552a5c1ed0e4c6fd39bcbff8cb8f841bd64..53c519e5d5ac8b19dc0a5a6c8861efca38233763 100644
--- a/sklearn/gaussian_process/gaussian_process.py
+++ b/sklearn/gaussian_process/gaussian_process.py
@@ -566,20 +566,15 @@ class GaussianProcess(BaseEstimator, RegressorMixin):
             A dictionary containing the requested Gaussian Process model
             parameters:
 
-                sigma2
-                        Gaussian Process variance.
-                beta
-                        Generalized least-squares regression weights for
-                        Universal Kriging or given beta0 for Ordinary
-                        Kriging.
-                gamma
-                        Gaussian Process weights.
-                C
-                        Cholesky decomposition of the correlation matrix [R].
-                Ft
-                        Solution of the linear equation system : [R] x Ft = F
-                G
-                        QR decomposition of the matrix Ft.
+            - ``sigma2`` is the Gaussian Process variance.
+            - ``beta`` is the generalized least-squares regression weights for
+              Universal Kriging or given beta0 for Ordinary Kriging.
+            - ``gamma`` is the Gaussian Process weights.
+            - ``C`` is the Cholesky decomposition of the correlation
+              matrix [R].
+            - ``Ft`` is the solution of the linear equation system
+              [R] x Ft = F
+            - ``G`` is the QR decomposition of the matrix Ft.
         """
         check_is_fitted(self, "X")
 
diff --git a/sklearn/preprocessing/label.py b/sklearn/preprocessing/label.py
index e8ea17f413a59e82f6ef2506aace74af19009e5f..f1d85b1c36e2efd85351bc9d631476412da33983 100644
--- a/sklearn/preprocessing/label.py
+++ b/sklearn/preprocessing/label.py
@@ -349,10 +349,9 @@ class LabelBinarizer(BaseEstimator, TransformerMixin):
         threshold : float or None
             Threshold used in the binary and multi-label cases.
 
-            Use 0 when:
-                - Y contains the output of decision_function (classifier)
-            Use 0.5 when:
-                - Y contains the output of predict_proba
+            Use 0 when ``Y`` contains the output of decision_function
+            (classifier).
+            Use 0.5 when ``Y`` contains the output of predict_proba.
 
             If None, the threshold is assumed to be half way between
             neg_label and pos_label.