From 12a0125a3cac4691ff5cad00e50ca80daa22099f Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Sun, 11 Sep 2016 12:02:20 +0200
Subject: [PATCH] DOC fix for svd_solver in PCA docstring

---
 sklearn/decomposition/pca.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index aecab027b7..2a6f0dd013 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -108,9 +108,12 @@ class PCA(_BasePCA):
     Linear dimensionality reduction using Singular Value Decomposition of the
     data to project it to a lower dimensional space.
 
-    It uses the scipy.linalg ARPACK implementation of the SVD or a randomized
-    SVD by the method of Halko et al. 2009, depending on which is the most
-    efficient.
+    It uses the LAPACK implementation of the full SVD or a randomized truncated
+    SVD by the method of Halko et al. 2009, depending on the shape of the input
+    data and the number of components to extract.
+
+    It can also use the scipy.sparse.linalg ARPACK implementation of the
+    truncated SVD.
 
     Read more in the :ref:`User Guide <PCA>`.
 
@@ -147,10 +150,13 @@ class PCA(_BasePCA):
     svd_solver : string {'auto', 'full', 'arpack', 'randomized'}
         auto :
             the solver is selected by a default policy based on `X.shape` and
-            `n_components` which favors 'randomized' when the problem is
-            computationally demanding for 'full' PCA
+            `n_components`: if the input data is larger than 500x500 and the
+            number of components to extract is lower than 80% of the smallest
+            dimension of the data, then then more efficient 'randomized'
+            method is enabled. Otherwise the exact full SVD is computed and
+            optionally truncated afterwards.
         full :
-            run exact SVD calling ARPACK solver via
+            run exact full SVD calling the standard LAPACK solver via
             `scipy.linalg.svd` and select the components by postprocessing
         arpack :
             run SVD truncated to n_components calling ARPACK solver via
-- 
GitLab