diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py index aecab027b7db8d656a959a03052a5af72d2311e8..2a6f0dd0138571627a72659f01012b71c5db511b 100644 --- a/sklearn/decomposition/pca.py +++ b/sklearn/decomposition/pca.py @@ -108,9 +108,12 @@ class PCA(_BasePCA): Linear dimensionality reduction using Singular Value Decomposition of the data to project it to a lower dimensional space. - It uses the scipy.linalg ARPACK implementation of the SVD or a randomized - SVD by the method of Halko et al. 2009, depending on which is the most - efficient. + It uses the LAPACK implementation of the full SVD or a randomized truncated + SVD by the method of Halko et al. 2009, depending on the shape of the input + data and the number of components to extract. + + It can also use the scipy.sparse.linalg ARPACK implementation of the + truncated SVD. Read more in the :ref:`User Guide <PCA>`. @@ -147,10 +150,13 @@ class PCA(_BasePCA): svd_solver : string {'auto', 'full', 'arpack', 'randomized'} auto : the solver is selected by a default policy based on `X.shape` and - `n_components` which favors 'randomized' when the problem is - computationally demanding for 'full' PCA + `n_components`: if the input data is larger than 500x500 and the + number of components to extract is lower than 80% of the smallest + dimension of the data, then then more efficient 'randomized' + method is enabled. Otherwise the exact full SVD is computed and + optionally truncated afterwards. full : - run exact SVD calling ARPACK solver via + run exact full SVD calling the standard LAPACK solver via `scipy.linalg.svd` and select the components by postprocessing arpack : run SVD truncated to n_components calling ARPACK solver via