From 9bd0aadaa30295452ba1ba1811b4086cbb2ef5f3 Mon Sep 17 00:00:00 2001 From: Andreas Mueller <amueller@ais.uni-bonn.de> Date: Sun, 7 Oct 2012 15:13:29 +0200 Subject: [PATCH] MISC renamed n_iterations to n_iter in all other places. --- benchmarks/bench_plot_svd.py | 18 ++++++------- .../wikipedia_principal_eigenvector.py | 2 +- examples/svm/plot_svm_scale_c.py | 2 +- sklearn/cluster/k_means_.py | 14 +++++------ sklearn/decomposition/pca.py | 2 +- sklearn/utils/extmath.py | 25 +++++++++++++------ sklearn/utils/tests/test_svd.py | 14 +++++------ 7 files changed, 44 insertions(+), 33 deletions(-) diff --git a/benchmarks/bench_plot_svd.py b/benchmarks/bench_plot_svd.py index 9f76a23dcc..b8a551c63d 100644 --- a/benchmarks/bench_plot_svd.py +++ b/benchmarks/bench_plot_svd.py @@ -12,7 +12,7 @@ from sklearn.utils.extmath import randomized_svd from sklearn.datasets.samples_generator import make_low_rank_matrix -def compute_bench(samples_range, features_range, n_iterations=3, rank=50): +def compute_bench(samples_range, features_range, n_iter=3, rank=50): it = 0 @@ -36,19 +36,19 @@ def compute_bench(samples_range, features_range, n_iterations=3, rank=50): results['scipy svd'].append(time() - tstart) gc.collect() - print "benching scikit-learn randomized_svd: n_iterations=0" + print "benching scikit-learn randomized_svd: n_iter=0" tstart = time() - randomized_svd(X, rank, n_iterations=0) - results['scikit-learn randomized_svd (n_iterations=0)'].append( + randomized_svd(X, rank, n_iter=0) + results['scikit-learn randomized_svd (n_iter=0)'].append( time() - tstart) gc.collect() - print ("benching scikit-learn randomized_svd: n_iterations=%d " - % n_iterations) + print ("benching scikit-learn randomized_svd: n_iter=%d " + % n_iter) tstart = time() - randomized_svd(X, rank, n_iterations=n_iterations) - results['scikit-learn randomized_svd (n_iterations=%d)' - % n_iterations].append(time() - tstart) + randomized_svd(X, rank, n_iter=n_iter) + results['scikit-learn randomized_svd (n_iter=%d)' + % n_iter].append(time() - tstart) return results diff --git a/examples/applications/wikipedia_principal_eigenvector.py b/examples/applications/wikipedia_principal_eigenvector.py index 0d7bebe691..f053916641 100644 --- a/examples/applications/wikipedia_principal_eigenvector.py +++ b/examples/applications/wikipedia_principal_eigenvector.py @@ -172,7 +172,7 @@ names = dict((i, name) for name, i in index_map.iteritems()) print "Computing the principal singular vectors using randomized_svd" t0 = time() -U, s, V = randomized_svd(X, 5, n_iterations=3) +U, s, V = randomized_svd(X, 5, n_iter=3) print "done in %0.3fs" % (time() - t0) # print the names of the wikipedia related strongest compenents of the the diff --git a/examples/svm/plot_svm_scale_c.py b/examples/svm/plot_svm_scale_c.py index 6b926c4cea..6569e4875c 100644 --- a/examples/svm/plot_svm_scale_c.py +++ b/examples/svm/plot_svm_scale_c.py @@ -128,7 +128,7 @@ for fignum, (clf, cs, X, y) in enumerate(clf_sets): # reduce the variance grid = GridSearchCV(clf, refit=False, param_grid=param_grid, cv=ShuffleSplit(n=n_samples, train_size=train_size, - n_iterations=250, random_state=1)) + n_iter=250, random_state=1)) grid.fit(X, y) scores = [x[1] for x in grid.grid_scores_] diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py index 770eab588f..0f143a5fcf 100644 --- a/sklearn/cluster/k_means_.py +++ b/sklearn/cluster/k_means_.py @@ -897,7 +897,7 @@ def _mini_batch_step(X, x_squared_norms, centers, counts, return inertia, squared_diff -def _mini_batch_convergence(model, iteration_idx, n_iterations, tol, +def _mini_batch_convergence(model, iteration_idx, n_iter, tol, n_samples, centers_squared_diff, batch_inertia, context, verbose=0): """Helper function to encapsulte the early stopping logic""" @@ -926,7 +926,7 @@ def _mini_batch_convergence(model, iteration_idx, n_iterations, tol, progress_msg = ( 'Minibatch iteration %d/%d:' 'mean batch inertia: %f, ewa inertia: %f ' % ( - iteration_idx + 1, n_iterations, batch_inertia, + iteration_idx + 1, n_iter, batch_inertia, ewa_inertia)) print progress_msg @@ -935,7 +935,7 @@ def _mini_batch_convergence(model, iteration_idx, n_iterations, tol, if tol > 0.0 and ewa_diff < tol: if verbose: print 'Converged (small centers change) at iteration %d/%d' % ( - iteration_idx + 1, n_iterations) + iteration_idx + 1, n_iter) return True # Early stopping heuristic due to lack of improvement on smoothed inertia @@ -952,7 +952,7 @@ def _mini_batch_convergence(model, iteration_idx, n_iterations, tol, if verbose: print ('Converged (lack of improvement in inertia)' ' at iteration %d/%d' % ( - iteration_idx + 1, n_iterations)) + iteration_idx + 1, n_iter)) return True # update the convergence context to maintain state across sucessive calls: @@ -1102,7 +1102,7 @@ class MiniBatchKMeans(KMeans): distances = np.zeros(self.batch_size, dtype=np.float64) n_batches = int(np.ceil(float(n_samples) / self.batch_size)) - n_iterations = int(self.max_iter * n_batches) + n_iter = int(self.max_iter * n_batches) init_size = self.init_size if init_size is None: @@ -1158,7 +1158,7 @@ class MiniBatchKMeans(KMeans): # Perform the iterative optimization untill the final convergence # criterion - for iteration_idx in xrange(n_iterations): + for iteration_idx in xrange(n_iter): # Sample the minibatch from the full dataset minibatch_indices = self.random_state.random_integers( @@ -1172,7 +1172,7 @@ class MiniBatchKMeans(KMeans): # Monitor the convergence and do early stopping if necessary if _mini_batch_convergence( - self, iteration_idx, n_iterations, tol, n_samples, + self, iteration_idx, n_iter, tol, n_samples, centers_squared_diff, batch_inertia, convergence_context, verbose=self.verbose): break diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py index 57c5591396..fdab4a51ca 100644 --- a/sklearn/decomposition/pca.py +++ b/sklearn/decomposition/pca.py @@ -485,7 +485,7 @@ class RandomizedPCA(BaseEstimator, TransformerMixin): n_components = self.n_components U, S, V = randomized_svd(X, n_components, - n_iterations=self.iterated_power, + n_iter=self.iterated_power, random_state=self.random_state) self.explained_variance_ = exp_var = (S ** 2) / n_samples diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index 6ce09e502f..6293fcbed5 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -4,6 +4,7 @@ Extended math utilities. # Authors: G. Varoquaux, A. Gramfort, A. Passos, O. Grisel # License: BSD +import warnings import numpy as np from scipy import linalg @@ -78,7 +79,8 @@ def safe_sparse_dot(a, b, dense_output=False): return np.dot(a, b) -def randomized_range_finder(A, size, n_iterations, random_state=None): +def randomized_range_finder(A, size, n_iter, random_state=None, + n_iterations=None): """Computes an orthonormal matrix whose range approximates the range of A. Parameters @@ -87,7 +89,7 @@ def randomized_range_finder(A, size, n_iterations, random_state=None): The input data matrix size: integer Size of the return array - n_iterations: integer + n_iter: integer Number of power iterations used to stabilize the result random_state: RandomState or an int seed (0 by default) A random number generator instance @@ -106,6 +108,10 @@ def randomized_range_finder(A, size, n_iterations, random_state=None): approximate matrix decompositions Halko, et al., 2009 (arXiv:909) http://arxiv.org/pdf/0909.4061 """ + if n_iterations is not None: + warnings.warn("n_iterations was renamed to n_iter for consistency " + "and will be removed in 0.16.", DeprecationWarning) + n_iter = n_iterations random_state = check_random_state(random_state) # generating random gaussian vectors r with shape: (A.shape[1], size) @@ -117,7 +123,7 @@ def randomized_range_finder(A, size, n_iterations, random_state=None): # perform power iterations with Y to further 'imprint' the top # singular vectors of A in Y - for i in xrange(n_iterations): + for i in xrange(n_iter): Y = safe_sparse_dot(A, safe_sparse_dot(A.T, Y)) # extracting an orthonormal basis of the A range samples @@ -125,8 +131,8 @@ def randomized_range_finder(A, size, n_iterations, random_state=None): return Q -def randomized_svd(M, n_components, n_oversamples=10, n_iterations=0, - transpose='auto', random_state=0): +def randomized_svd(M, n_components, n_oversamples=10, n_iter=0, + transpose='auto', random_state=0, n_iterations=None): """Computes a truncated randomized SVD Parameters @@ -142,7 +148,7 @@ def randomized_svd(M, n_components, n_oversamples=10, n_iterations=0, to ensure proper conditioning. The total number of random vectors used to find the range of M is n_components + n_oversamples. - n_iterations: int (default is 0) + n_iter: int (default is 0) Number of power iterations (can be used to deal with very noisy problems). @@ -172,6 +178,11 @@ def randomized_svd(M, n_components, n_oversamples=10, n_iterations=0, * A randomized algorithm for the decomposition of matrices Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert """ + if n_iterations is not None: + warnings.warn("n_iterations was renamed to n_iter for consistency " + "and will be removed in 0.16.", DeprecationWarning) + n_iter = n_iterations + random_state = check_random_state(random_state) n_random = n_components + n_oversamples n_samples, n_features = M.shape @@ -182,7 +193,7 @@ def randomized_svd(M, n_components, n_oversamples=10, n_iterations=0, # this implementation is a bit faster with smaller shape[1] M = M.T - Q = randomized_range_finder(M, n_random, n_iterations, random_state) + Q = randomized_range_finder(M, n_random, n_iter, random_state) # project M to the (k + p) dimensional space using the basis vectors B = safe_sparse_dot(Q.T, M) diff --git a/sklearn/utils/tests/test_svd.py b/sklearn/utils/tests/test_svd.py index 6e5cf6d963..57e437d49e 100644 --- a/sklearn/utils/tests/test_svd.py +++ b/sklearn/utils/tests/test_svd.py @@ -69,14 +69,14 @@ def test_randomized_svd_low_rank_with_noise(): # compute the singular values of X using the fast approximate method # without the iterated power method - _, sa, _ = randomized_svd(X, k, n_iterations=0) + _, sa, _ = randomized_svd(X, k, n_iter=0) # the approximation does not tolerate the noise: assert_greater(np.abs(s[:k] - sa).max(), 0.05) # compute the singular values of X using the fast approximate method with # iterated power method - _, sap, _ = randomized_svd(X, k, n_iterations=5) + _, sap, _ = randomized_svd(X, k, n_iter=5) # the iterated power method is helping getting rid of the noise: assert_almost_equal(s[:k], sap, decimal=3) @@ -100,14 +100,14 @@ def test_randomized_svd_infinite_rank(): # compute the singular values of X using the fast approximate method # without the iterated power method - _, sa, _ = randomized_svd(X, k, n_iterations=0) + _, sa, _ = randomized_svd(X, k, n_iter=0) # the approximation does not tolerate the noise: assert_greater(np.abs(s[:k] - sa).max(), 0.1) # compute the singular values of X using the fast approximate method with # iterated power method - _, sap, _ = randomized_svd(X, k, n_iterations=5) + _, sap, _ = randomized_svd(X, k, n_iter=5) # the iterated power method is still managing to get most of the structure # at the requested rank @@ -125,11 +125,11 @@ def test_randomized_svd_transpose_consistency(): effective_rank=rank, tail_strength=0.5, random_state=0) assert_equal(X.shape, (n_samples, n_features)) - U1, s1, V1 = randomized_svd(X, k, n_iterations=3, transpose=False, + U1, s1, V1 = randomized_svd(X, k, n_iter=3, transpose=False, random_state=0) - U2, s2, V2 = randomized_svd(X, k, n_iterations=3, transpose=True, + U2, s2, V2 = randomized_svd(X, k, n_iter=3, transpose=True, random_state=0) - U3, s3, V3 = randomized_svd(X, k, n_iterations=3, transpose='auto', + U3, s3, V3 = randomized_svd(X, k, n_iter=3, transpose='auto', random_state=0) U4, s4, V4 = linalg.svd(X, full_matrices=False) -- GitLab