From 12f4eb9047a1ec4ceb65552f1fa704d49dbf54a5 Mon Sep 17 00:00:00 2001 From: Jake Vanderplas <vanderplas@astro.washington.edu> Date: Wed, 15 Aug 2012 21:45:37 -0700 Subject: [PATCH] @jakevdp's version of pinvh speed up symmetric_pinv --- sklearn/covariance/empirical_covariance_.py | 6 +-- sklearn/covariance/graph_lasso_.py | 4 +- sklearn/covariance/robust_covariance.py | 10 ++--- sklearn/utils/extmath.py | 45 ++++++++++----------- sklearn/utils/tests/test_utils.py | 6 +-- 5 files changed, 35 insertions(+), 36 deletions(-) diff --git a/sklearn/covariance/empirical_covariance_.py b/sklearn/covariance/empirical_covariance_.py index d968019156..b71bfb660e 100644 --- a/sklearn/covariance/empirical_covariance_.py +++ b/sklearn/covariance/empirical_covariance_.py @@ -17,7 +17,7 @@ from scipy import linalg from ..base import BaseEstimator from ..utils import array2d -from ..utils.extmath import fast_logdet, symmetric_pinv +from ..utils.extmath import fast_logdet, pinvh def log_likelihood(emp_cov, precision): @@ -113,7 +113,7 @@ class EmpiricalCovariance(BaseEstimator): self.covariance_ = covariance # set precision if self.store_precision: - self.precision_ = symmetric_pinv(covariance) + self.precision_ = pinvh(covariance) else: self.precision_ = None @@ -129,7 +129,7 @@ class EmpiricalCovariance(BaseEstimator): if self.store_precision: precision = self.precision_ else: - precision = symmetric_pinv(self.covariance_) + precision = pinvh(self.covariance_) return precision def fit(self, X): diff --git a/sklearn/covariance/graph_lasso_.py b/sklearn/covariance/graph_lasso_.py index a9134683a1..df0b89eaad 100644 --- a/sklearn/covariance/graph_lasso_.py +++ b/sklearn/covariance/graph_lasso_.py @@ -17,7 +17,7 @@ from .empirical_covariance_ import empirical_covariance, \ EmpiricalCovariance, log_likelihood from ..utils import ConvergenceWarning -from ..utils.extmath import symmetric_pinv +from ..utils.extmath import pinvh from ..linear_model import lars_path from ..linear_model import cd_fast from ..cross_validation import check_cv, cross_val_score @@ -144,7 +144,7 @@ def graph_lasso(emp_cov, alpha, cov_init=None, mode='cd', tol=1e-4, covariance_ *= 0.95 diagonal = emp_cov.flat[::n_features + 1] covariance_.flat[::n_features + 1] = diagonal - precision_ = symmetric_pinv(covariance_) + precision_ = pinvh(covariance_) indices = np.arange(n_features) costs = list() diff --git a/sklearn/covariance/robust_covariance.py b/sklearn/covariance/robust_covariance.py index 92ec07ab27..d8eea3db35 100644 --- a/sklearn/covariance/robust_covariance.py +++ b/sklearn/covariance/robust_covariance.py @@ -13,7 +13,7 @@ from scipy import linalg from scipy.stats import chi2 from . import empirical_covariance, EmpiricalCovariance -from ..utils.extmath import fast_logdet, symmetric_pinv +from ..utils.extmath import fast_logdet, pinvh from ..utils import check_random_state @@ -85,7 +85,7 @@ def c_step(X, n_support, remaining_iterations=30, initial_estimates=None, location = initial_estimates[0] covariance = initial_estimates[1] # run a special iteration for that case (to get an initial support) - precision = symmetric_pinv(covariance) + precision = pinvh(covariance) X_centered = X - location dist = (np.dot(X_centered, precision) * X_centered).sum(1) # compute new estimates @@ -104,7 +104,7 @@ def c_step(X, n_support, remaining_iterations=30, initial_estimates=None, previous_det = det previous_support = support # compute a new support from the full data set mahalanobis distances - precision = symmetric_pinv(covariance) + precision = pinvh(covariance) X_centered = X - location dist = (np.dot(X_centered, precision) * X_centered).sum(axis=1) # compute new estimates @@ -344,7 +344,7 @@ def fast_mcd(X, support_fraction=None, covariance = np.asarray([[np.var(X[support])]]) location = np.array([location]) # get precision matrix in an optimized way - precision = symmetric_pinv(covariance) + precision = pinvh(covariance) dist = (np.dot(X_centered, precision) \ * (X_centered)).sum(axis=1) @@ -545,7 +545,7 @@ class MinCovDet(EmpiricalCovariance): raw_covariance = self._nonrobust_covariance( X[raw_support], assume_centered=True) # get precision matrix in an optimized way - precision = symmetric_pinv(raw_covariance) + precision = pinvh(raw_covariance) raw_dist = np.sum(np.dot(X, precision) * X, 1) self.raw_location_ = raw_location self.raw_covariance_ = raw_covariance diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index 73a2e84ab8..6ce09e502f 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -299,38 +299,41 @@ def weighted_mode(a, w, axis=0): return mostfrequent, oldcounts -def symmetric_pinv(a, cond=None, rcond=None): - """Compute the (Moore-Penrose) pseudo-inverse of a matrix. +def pinvh(a, cond=None, rcond=None, lower=True): + """Compute the (Moore-Penrose) pseudo-inverse of a hermetian matrix. Calculate a generalized inverse of a symmetric matrix using its eigenvalue decomposition and including all 'large' eigenvalues. - Inspired by ``scipy.linalg.pinv2``, credited to Pearu Peterson and Travis - Oliphant. - Parameters ---------- a : array, shape (N, N) - Symmetric matrix to be pseudo-inverted + Real symmetric or complex hermetian matrix to be pseudo-inverted cond, rcond : float or None Cutoff for 'small' eigenvalues. Singular values smaller than rcond * largest_eigenvalue are considered zero. If None or -1, suitable machine precision is used. + lower : boolean + Whether the pertinent array data is taken from the lower or upper + triangle of a. (Default: lower) Returns ------- B : array, shape (N, N) - Raises LinAlgError if eigenvalue does not converge + Raises + ------ + LinAlgError + If eigenvalue does not converge Examples -------- >>> from numpy import * >>> a = random.randn(9, 6) >>> a = np.dot(a, a.T) - >>> B = symmetric_pinv(a) + >>> B = pinvh(a) >>> allclose(a, dot(a, dot(B, a))) True >>> allclose(B, dot(B, dot(a, B))) @@ -338,22 +341,18 @@ def symmetric_pinv(a, cond=None, rcond=None): """ a = np.asarray_chkfinite(a) - s, u = linalg.eigh(a) - # eigh returns eigvals in reverse order, but this doesn't affect anything. + s, u = linalg.eigh(a, lower=lower) - t = u.dtype.char if rcond is not None: cond = rcond if cond in [None, -1]: - eps = np.finfo(np.float).eps - feps = np.finfo(np.single).eps - _array_precision = {'f': 0, 'd': 1, 'F': 0, 'D': 1} - cond = {0: feps * 1e3, 1: eps * 1e6}[_array_precision[t]] - n = a.shape[0] - cutoff = cond * np.maximum.reduce(s) - psigma = np.zeros(n, t) - above_cutoff = np.where(s > cutoff) - psigma[above_cutoff] = 1.0 / np.conjugate(s[above_cutoff]) - #XXX: use lapack/blas routines for dot - #XXX: above comment is from scipy, but I (@vene)'ll take a look - return np.transpose(np.conjugate(np.dot(u * psigma, u.T.conjugate()))) + t = u.dtype.char.lower() + factor = {'f': 1E3, 'd': 1E6} + cond = factor[t] * np.finfo(t).eps + + # unlike svd case, eigh can lead to negative eigenvalues + above_cutoff = (abs(s) > cond * np.max(abs(s))) + psigma_diag = np.zeros_like(s) + psigma_diag[above_cutoff] = 1.0 / s[above_cutoff] + + return np.dot(u * psigma_diag, np.conjugate(u).T) diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py index 78945c5c7e..dae08d0086 100644 --- a/sklearn/utils/tests/test_utils.py +++ b/sklearn/utils/tests/test_utils.py @@ -11,7 +11,7 @@ from sklearn.utils import check_random_state from sklearn.utils import deprecated from sklearn.utils import resample from sklearn.utils import safe_mask -from sklearn.utils.extmath import symmetric_pinv +from sklearn.utils.extmath import pinvh def test_make_rng(): @@ -93,7 +93,7 @@ def test_safe_mask(): assert_equal(X_csr[mask].shape[0], 3) -def test_symmetric_pinv(): +def test_pinvh(): a = np.random.randn(5, 3) a = np.dot(a, a.T) # symmetric singular matrix - assert_almost_equal(pinv2(a), symmetric_pinv(a)) + assert_almost_equal(pinv2(a), pinvh(a)) -- GitLab