From eeb6dcc4cc898af1a1c395a8e76eeee00d58c25f Mon Sep 17 00:00:00 2001 From: Andreas Mueller <amueller@ais.uni-bonn.de> Date: Sun, 6 May 2012 16:22:39 +0200 Subject: [PATCH] ENH use random states everywhere, never call np.random. --- sklearn/cluster/tests/test_hierarchical.py | 23 ++++++++-------- sklearn/cluster/tests/test_k_means.py | 3 ++- .../tests/test_robust_covariance.py | 4 +-- sklearn/decomposition/tests/test_fastica.py | 4 +-- sklearn/ensemble/tests/test_forest.py | 6 ++--- .../ensemble/tests/test_gradient_boosting.py | 11 ++++---- .../tests/test_feature_select.py | 9 ++++--- sklearn/hmm.py | 6 ++--- sklearn/linear_model/tests/test_logistic.py | 8 +++--- sklearn/linear_model/tests/test_ridge.py | 27 +++++++++---------- sklearn/linear_model/tests/test_sgd.py | 20 +++++++------- sklearn/manifold/tests/test_locally_linear.py | 6 ++--- sklearn/metrics/tests/test_metrics.py | 4 +-- sklearn/metrics/tests/test_pairwise.py | 2 -- sklearn/neighbors/tests/test_ball_tree.py | 14 +++++----- sklearn/neighbors/tests/test_neighbors.py | 13 +++++---- sklearn/svm/tests/test_sparse.py | 3 ++- sklearn/tests/test_kernel_approximation.py | 5 ++-- sklearn/tests/test_multiclass.py | 3 ++- sklearn/tests/test_preprocessing.py | 10 +++---- sklearn/utils/tests/test_shortest_path.py | 6 ++--- sklearn/utils/tests/test_weighted_mode.py | 8 +++--- 22 files changed, 100 insertions(+), 95 deletions(-) diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py index 5e0da5f92b..53962a631f 100644 --- a/sklearn/cluster/tests/test_hierarchical.py +++ b/sklearn/cluster/tests/test_hierarchical.py @@ -17,9 +17,9 @@ def test_structured_ward_tree(): """ Check that we obtain the correct solution for structured ward tree. """ - np.random.seed(0) + rnd = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) - X = np.random.randn(50, 100) + X = rnd.randn(50, 100) connectivity = grid_to_graph(*mask.shape) children, n_components, n_leaves = ward_tree(X.T, connectivity) n_nodes = 2 * X.shape[1] - 1 @@ -30,8 +30,8 @@ def test_unstructured_ward_tree(): """ Check that we obtain the correct solution for unstructured ward tree. """ - np.random.seed(0) - X = np.random.randn(50, 100) + rnd = np.random.RandomState(0) + X = rnd.randn(50, 100) children, n_nodes, n_leaves = ward_tree(X.T) n_nodes = 2 * X.shape[1] - 1 assert_true(len(children) + n_leaves == n_nodes) @@ -41,9 +41,9 @@ def test_height_ward_tree(): """ Check that the height of ward tree is sorted. """ - np.random.seed(0) + rnd = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) - X = np.random.randn(50, 100) + X = rnd.randn(50, 100) connectivity = grid_to_graph(*mask.shape) children, n_nodes, n_leaves = ward_tree(X.T, connectivity) n_nodes = 2 * X.shape[1] - 1 @@ -54,9 +54,9 @@ def test_ward_clustering(): """ Check that we obtain the correct number of clusters with Ward clustering. """ - np.random.seed(0) + rnd = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) - X = np.random.randn(100, 50) + X = rnd.randn(100, 50) connectivity = grid_to_graph(*mask.shape) clustering = Ward(n_clusters=10, connectivity=connectivity) clustering.fit(X) @@ -67,9 +67,9 @@ def test_ward_agglomeration(): """ Check that we obtain the correct solution in a simplistic case """ - np.random.seed(0) + rnd = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) - X = np.random.randn(50, 100) + X = rnd.randn(50, 100) connectivity = grid_to_graph(*mask.shape) ward = WardAgglomeration(n_clusters=5, connectivity=connectivity) ward.fit(X) @@ -98,10 +98,11 @@ def test_scikit_vs_scipy(): """ from scipy.sparse import lil_matrix n, p, k = 10, 5, 3 + rnd = np.random.RandomState(0) connectivity = lil_matrix(np.ones((n, n))) for i in range(5): - X = .1 * np.random.normal(size=(n, p)) + X = .1 * rnd.normal(size=(n, p)) X -= 4 * np.arange(n)[:, np.newaxis] X -= X.mean(axis=1)[:, np.newaxis] diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py index fe9226c1cc..b735ab3454 100644 --- a/sklearn/cluster/tests/test_k_means.py +++ b/sklearn/cluster/tests/test_k_means.py @@ -43,7 +43,8 @@ def test_square_norms(): def test_kmeans_dtype(): - X = np.random.normal(size=(40, 2)) + rnd = np.random.RandomState(0) + X = rnd.normal(size=(40, 2)) X = (X * 10).astype(np.uint8) km = KMeans(n_init=1).fit(X) with warnings.catch_warnings(record=True) as w: diff --git a/sklearn/covariance/tests/test_robust_covariance.py b/sklearn/covariance/tests/test_robust_covariance.py index b34374bb8a..74cb39047b 100644 --- a/sklearn/covariance/tests/test_robust_covariance.py +++ b/sklearn/covariance/tests/test_robust_covariance.py @@ -72,8 +72,8 @@ def test_outlier_detection(): """ """ - np.random.RandomState(0) - X = np.random.randn(100, 10) + rnd = np.random.RandomState(0) + X = rnd.randn(100, 10) clf = EllipticEnvelope(contamination=0.1) clf.fit(X) y_pred = clf.predict(X) diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py index 5f8e6dc3c2..d958462ad1 100644 --- a/sklearn/decomposition/tests/test_fastica.py +++ b/sklearn/decomposition/tests/test_fastica.py @@ -51,7 +51,7 @@ def test_fastica(add_noise=False): """ Test the FastICA algorithm on very simple data. """ # scipy.stats uses the global RNG: - np.random.seed(0) + rng = np.random.RandomState(0) n_samples = 1000 # Generate two sources: s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1 @@ -67,7 +67,7 @@ def test_fastica(add_noise=False): m = np.dot(mixing, s) if add_noise: - m += 0.1 * np.random.randn(2, 1000) + m += 0.1 * rng.randn(2, 1000) center_and_norm(m) diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py index 56f6f20986..0eabf44b48 100644 --- a/sklearn/ensemble/tests/test_forest.py +++ b/sklearn/ensemble/tests/test_forest.py @@ -30,15 +30,15 @@ true_result = [-1, 1, 1] # also load the iris dataset # and randomly permute it iris = datasets.load_iris() -np.random.seed([1]) -perm = np.random.permutation(iris.target.size) +rng = np.random.RandomState(0) +perm = rng.permutation(iris.target.size) iris.data = iris.data[perm] iris.target = iris.target[perm] # also load the boston dataset # and randomly permute it boston = datasets.load_boston() -perm = np.random.permutation(boston.target.size) +perm = rng.permutation(boston.target.size) boston.data = boston.data[perm] boston.target = boston.target[perm] diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py index 15a641fdab..276a737f1a 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/tests/test_gradient_boosting.py @@ -20,17 +20,18 @@ y = [-1, -1, -1, 1, 1, 1] T = [[-1, -1], [2, 2], [3, 2]] true_result = [-1, 1, 1] +rng = np.random.RandomState(0) # also load the boston dataset # and randomly permute it boston = datasets.load_boston() -perm = np.random.permutation(boston.target.size) +perm = rng.permutation(boston.target.size) boston.data = boston.data[perm] boston.target = boston.target[perm] # also load the iris dataset # and randomly permute it iris = datasets.load_iris() -perm = np.random.permutation(iris.target.size) +perm = rng.permutation(iris.target.size) iris.data = iris.data[perm] iris.target = iris.target[perm] @@ -248,7 +249,7 @@ def test_check_inputs_predict(): assert_raises(ValueError, clf.predict, x) clf = GradientBoostingRegressor(n_estimators=100, random_state=1) - clf.fit(X, np.random.rand(len(X))) + clf.fit(X, rng.rand(len(X))) x = np.array([1.0, 2.0])[:, np.newaxis] assert_raises(ValueError, clf.predict, x) @@ -312,6 +313,6 @@ def test_degenerate_targets(): clf = GradientBoostingRegressor(n_estimators=100, random_state=1) clf.fit(X, np.ones(len(X))) - clf.predict(np.random.rand(2)) + clf.predict(rng.rand(2)) assert_array_equal(np.ones((1,), dtype=np.float64), - clf.predict(np.random.rand(2))) + clf.predict(rng.rand(2))) diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py index 0975ed10d0..c579a1b13c 100644 --- a/sklearn/feature_selection/tests/test_feature_select.py +++ b/sklearn/feature_selection/tests/test_feature_select.py @@ -19,8 +19,9 @@ from sklearn.datasets.samples_generator import make_classification, \ def test_f_oneway_vs_scipy_stats(): """Test that our f_oneway gives the same result as scipy.stats""" - X1 = np.random.randn(10, 3) - X2 = 1 + np.random.randn(10, 3) + rng = np.random.RandomState(0) + X1 = rng.randn(10, 3) + X2 = 1 + rng.randn(10, 3) f, pv = stats.f_oneway(X1, X2) f2, pv2 = f_oneway(X1, X2) assert_true(np.allclose(f, f2)) @@ -67,8 +68,8 @@ def test_f_regression_input_dtype(): Test whether f_regression returns the same value for any numeric data_type """ - - X = np.random.rand(10, 20) + rng = np.random.RandomState(0) + X = rng.rand(10, 20) y = np.arange(10).astype(np.int) F1, pv1 = f_regression(X, y) diff --git a/sklearn/hmm.py b/sklearn/hmm.py index 605c6e7ddf..e8c2542fb1 100644 --- a/sklearn/hmm.py +++ b/sklearn/hmm.py @@ -135,7 +135,7 @@ class _BaseHMM(BaseEstimator): self._algorithm = algorithm else: self._algorithm = "viterbi" - self.random_state = random_state + self.random_state = check_random_state(random_state) def eval(self, obs): """Compute the log probability under the model and compute posteriors @@ -900,8 +900,8 @@ class MultinomialHMM(_BaseHMM): super(MultinomialHMM, self)._init(obs, params=params) if 'e' in params: - emissionprob = normalize(np.random.rand(self.n_components, - self.n_symbols), 1) + emissionprob = normalize(self.random_state.rand(self.n_components, + self.n_symbols), 1) self.emissionprob_ = emissionprob def _initialize_sufficient_statistics(self): diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 6f517580be..7946141059 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -77,7 +77,8 @@ def test_predict_iris(): def test_inconsistent_input(): """Test that an exception is raised on inconsistent input""" - X_ = np.random.random((5, 10)) + rng = np.random.RandomState(0) + X_ = rng.random_sample((5, 10)) y_ = np.ones(X_.shape[0]) clf = logistic.LogisticRegression() @@ -87,9 +88,8 @@ def test_inconsistent_input(): assert_raises(ValueError, clf.fit, X, y_wrong) # Wrong dimensions for test data - assert_raises(ValueError, - clf.fit(X_, y_).predict, - np.random.random((3, 12))) + assert_raises(ValueError, clf.fit(X_, y_).predict, + rng.random_sample((3, 12))) @raises(ValueError) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index e7ba5b33cd..6b5cec7293 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -17,11 +17,11 @@ from sklearn.linear_model.ridge import RidgeClassifierCV from sklearn.cross_validation import KFold +rng = np.random.RandomState(0) diabetes = datasets.load_diabetes() - X_diabetes, y_diabetes = diabetes.data, diabetes.target ind = np.arange(X_diabetes.shape[0]) -np.random.shuffle(ind) +rng.shuffle(ind) ind = ind[:200] X_diabetes, y_diabetes = X_diabetes[ind], y_diabetes[ind] @@ -30,8 +30,6 @@ iris = datasets.load_iris() X_iris = sp.csr_matrix(iris.data) y_iris = iris.target -np.random.seed(0) - DENSE_FILTER = lambda X: X SPARSE_FILTER = lambda X: sp.csr_matrix(X) @@ -46,8 +44,8 @@ def test_ridge(): # With more samples than features n_samples, n_features = 6, 5 - y = np.random.randn(n_samples) - X = np.random.randn(n_samples, n_features) + y = rng.randn(n_samples) + X = rng.randn(n_samples, n_features) ridge = Ridge(alpha=alpha) ridge.fit(X, y) @@ -59,8 +57,8 @@ def test_ridge(): # With more features than samples n_samples, n_features = 5, 10 - y = np.random.randn(n_samples) - X = np.random.randn(n_samples, n_features) + y = rng.randn(n_samples) + X = rng.randn(n_samples, n_features) ridge = Ridge(alpha=alpha) ridge.fit(X, y) assert_greater(ridge.score(X, y), .9) @@ -73,8 +71,8 @@ def test_ridge_shapes(): """Test shape of coef_ and intercept_ """ n_samples, n_features = 5, 10 - X = np.random.randn(n_samples, n_features) - y = np.random.randn(n_samples) + X = rng.randn(n_samples, n_features) + y = rng.randn(n_samples) Y1 = y[:, np.newaxis] Y = np.c_[y, 1 + y] @@ -97,8 +95,8 @@ def test_ridge_intercept(): """Test intercept with multiple targets GH issue #708 """ n_samples, n_features = 5, 10 - X = np.random.randn(n_samples, n_features) - y = np.random.randn(n_samples) + X = rng.randn(n_samples, n_features) + y = rng.randn(n_samples) Y = np.c_[y, 1. + y] ridge = Ridge() @@ -140,9 +138,8 @@ def test_ridge_vs_lstsq(): # we need more samples than features n_samples, n_features = 5, 4 - np.random.seed(0) - y = np.random.randn(n_samples) - X = np.random.randn(n_samples, n_features) + y = rng.randn(n_samples) + X = rng.randn(n_samples, n_features) ridge = Ridge(alpha=0., fit_intercept=False) ols = LinearRegression(fit_intercept=False) diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py index fbd26e4cdf..53ae5f3b3c 100644 --- a/sklearn/linear_model/tests/test_sgd.py +++ b/sklearn/linear_model/tests/test_sgd.py @@ -279,9 +279,9 @@ class DenseSGDClassifierTestCase(unittest.TestCase, CommonTest): def test_sgd_l1(self): """Test L1 regularization""" n = len(X4) - np.random.seed(13) + rng = np.random.RandomState(13) idx = np.arange(n) - np.random.shuffle(idx) + rng.shuffle(idx) X = X4[idx, :] Y = Y4[idx, :] @@ -350,8 +350,8 @@ class DenseSGDClassifierTestCase(unittest.TestCase, CommonTest): X, y = iris.data, iris.target X = preprocessing.scale(X) idx = np.arange(X.shape[0]) - np.random.seed(13) - np.random.shuffle(idx) + rng = np.random.RandomState(0) + rng.shuffle(idx) X = X[idx] y = y[idx] clf = self.factory(alpha=0.0001, n_iter=1000, @@ -528,6 +528,7 @@ class DenseSGDRegressorTestCase(unittest.TestCase): def test_sgd_least_squares_fit(self): xmin, xmax = -5, 5 n_samples = 100 + rng = np.random.RandomState(0) X = np.linspace(xmin, xmax, n_samples).reshape(n_samples, 1) # simple linear function without noise @@ -540,8 +541,7 @@ class DenseSGDRegressorTestCase(unittest.TestCase): assert_greater(score, 0.99) # simple linear function with noise - y = 0.5 * X.ravel() \ - + np.random.randn(n_samples, 1).ravel() + y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel() clf = self.factory(loss='squared_loss', alpha=0.1, n_iter=20, fit_intercept=False) @@ -552,6 +552,7 @@ class DenseSGDRegressorTestCase(unittest.TestCase): def test_sgd_huber_fit(self): xmin, xmax = -5, 5 n_samples = 100 + rng = np.random.RandomState(0) X = np.linspace(xmin, xmax, n_samples).reshape(n_samples, 1) # simple linear function without noise @@ -564,8 +565,7 @@ class DenseSGDRegressorTestCase(unittest.TestCase): assert_greater(score, 0.99) # simple linear function with noise - y = 0.5 * X.ravel() \ - + np.random.randn(n_samples, 1).ravel() + y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel() clf = self.factory(loss="huber", p=0.1, alpha=0.1, n_iter=20, fit_intercept=False) @@ -577,11 +577,11 @@ class DenseSGDRegressorTestCase(unittest.TestCase): """Check that the SGD ouput is consistent with coordinate descent""" n_samples, n_features = 1000, 5 - np.random.seed(0) + rng = np.random.RandomState(0) X = np.random.randn(n_samples, n_features) # ground_truth linear model that generate y from X and to which the # models should converge if the regularizer would be set to 0.0 - ground_truth_coef = np.random.randn(n_features) + ground_truth_coef = rng.randn(n_features) y = np.dot(X, ground_truth_coef) # XXX: alpha = 0.1 seems to cause convergence problems diff --git a/sklearn/manifold/tests/test_locally_linear.py b/sklearn/manifold/tests/test_locally_linear.py index a45c8addb8..3e246640a4 100644 --- a/sklearn/manifold/tests/test_locally_linear.py +++ b/sklearn/manifold/tests/test_locally_linear.py @@ -65,14 +65,14 @@ def test_lle_simple_grid(): def test_lle_manifold(): + rng = np.random.RandomState(0) # similar test on a slightly more complex manifold X = np.array(list(product(range(20), repeat=2))) X = np.c_[X, X[:, 0] ** 2 / 20] - X = X + 1e-10 * np.random.uniform(size=X.shape) + X = X + 1e-10 * rng.uniform(size=X.shape) n_components = 2 clf = manifold.LocallyLinearEmbedding(n_neighbors=5, - n_components=n_components, - random_state=0) + n_components=n_components, random_state=0) tol = 1.5 N = barycenter_kneighbors_graph(X, clf.n_neighbors).toarray() diff --git a/sklearn/metrics/tests/test_metrics.py b/sklearn/metrics/tests/test_metrics.py index e37213bdf4..d409866d12 100644 --- a/sklearn/metrics/tests/test_metrics.py +++ b/sklearn/metrics/tests/test_metrics.py @@ -53,8 +53,8 @@ def make_prediction(dataset=None, binary=False): half = int(n_samples / 2) # add noisy features to make the problem harder and avoid perfect results - np.random.seed(0) - X = np.c_[X, np.random.randn(n_samples, 200 * n_features)] + rng = np.random.RandomState(0) + X = np.c_[X, rng.randn(n_samples, 200 * n_features)] # run classifier, get class probabilities and label predictions clf = svm.SVC(kernel='linear', probability=True) diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py index 29109c1ba7..43c0ee25e3 100644 --- a/sklearn/metrics/tests/test_pairwise.py +++ b/sklearn/metrics/tests/test_pairwise.py @@ -17,8 +17,6 @@ from ..pairwise import pairwise_kernel_functions from ..pairwise import check_pairwise_arrays from ..pairwise import _parallel_pairwise -np.random.seed(0) - def test_pairwise_distances(): """ Test the pairwise_distance helper function. """ diff --git a/sklearn/neighbors/tests/test_ball_tree.py b/sklearn/neighbors/tests/test_ball_tree.py index 4480475e0d..891f80960d 100644 --- a/sklearn/neighbors/tests/test_ball_tree.py +++ b/sklearn/neighbors/tests/test_ball_tree.py @@ -8,11 +8,13 @@ from sklearn import neighbors # Note: simple tests of BallTree.query() and BallTree.query_radius() # are contained within the tests of test_neighbors.py +rng = np.random.RandomState(0) + def test_warning_flag(n_samples=100, n_features=3, k=3): """test that discarding identical distances triggers warning flag""" - X = np.random.random(size=(n_samples, n_features)) - q = np.random.random(size=n_features) + X = rng.random_sample(size=(n_samples, n_features)) + q = rng.random_sample(size=n_features) bt = neighbors.BallTree(X[:-1], leaf_size=5) dist, ind = bt.query(q, k=k) @@ -38,7 +40,7 @@ def test_warning_flag(n_samples=100, n_features=3, k=3): def test_ball_tree_query_radius(n_samples=100, n_features=10): - X = 2 * np.random.random(size=(n_samples, n_features)) - 1 + X = 2 * rng.random_sample(size=(n_samples, n_features)) - 1 query_pt = np.zeros(n_features, dtype=float) eps = 1E-15 # roundoff error can cause test to fail @@ -56,7 +58,7 @@ def test_ball_tree_query_radius(n_samples=100, n_features=10): def test_ball_tree_query_radius_distance(n_samples=100, n_features=10): - X = 2 * np.random.random(size=(n_samples, n_features)) - 1 + X = 2 * rng.random_sample(size=(n_samples, n_features)) - 1 query_pt = np.zeros(n_features, dtype=float) eps = 1E-15 # roundoff error can cause test to fail @@ -76,7 +78,7 @@ def test_ball_tree_query_radius_distance(n_samples=100, n_features=10): def test_ball_tree_pickle(): import pickle - X = np.random.random(size=(10, 3)) + X = rng.random_sample(size=(10, 3)) bt1 = neighbors.BallTree(X, leaf_size=1) ind1, dist1 = bt1.query(X) for protocol in (0, 1, 2): @@ -88,7 +90,7 @@ def test_ball_tree_pickle(): def test_ball_tree_p_distance(): - X = np.random.random(size=(100, 5)) + X = rng.random_sample(size=(100, 5)) for p in (1, 2, 3, 4, np.inf): bt = neighbors.BallTree(X, leaf_size=10, p=p) diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index 19a56da28b..6481645e79 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -11,9 +11,10 @@ from scipy.spatial import cKDTree from sklearn import neighbors, datasets +rng = np.random.RandomState(0) # load and shuffle iris dataset iris = datasets.load_iris() -perm = np.random.permutation(iris.target.size) +perm = rng.permutation(iris.target.size) iris.data = iris.data[perm] iris.target = iris.target[perm] @@ -39,8 +40,8 @@ def _weight_func(dist): def test_warn_on_equidistant(n_samples=100, n_features=3, k=3): """test the production of a warning if equidistant points are discarded""" - X = np.random.random(size=(n_samples, n_features)) - q = np.random.random(size=n_features) + X = rng.random_sample(size=(n_samples, n_features)) + q = rng.random_sample(size=n_features) neigh = neighbors.NearestNeighbors(n_neighbors=k) neigh.fit(X[:-1]) @@ -75,8 +76,6 @@ def test_unsupervised_kneighbors(n_samples=20, n_features=5, n_query_pts=2, n_neighbors=5, random_state=0): """Test unsupervised neighbors methods""" - rng = np.random.RandomState(random_state) - X = rng.rand(n_samples, n_features) test = rng.rand(n_query_pts, n_features) @@ -103,7 +102,7 @@ def test_unsupervised_kneighbors(n_samples=20, n_features=5, def test_unsupervised_inputs(): """test the types of valid input into NearestNeighbors""" - X = np.random.random((10, 3)) + X = rng.random_sample((10, 3)) nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1) nbrs_fid.fit(X) @@ -462,7 +461,7 @@ def test_neighbors_badargs(): neighbors.NearestNeighbors, algorithm='blah') - X = np.random.random((10, 2)) + X = rng.random_sample((10, 2)) for cls in (neighbors.KNeighborsClassifier, neighbors.RadiusNeighborsClassifier, diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py index 38bd4d8d16..c6fd3cb913 100644 --- a/sklearn/svm/tests/test_sparse.py +++ b/sklearn/svm/tests/test_sparse.py @@ -26,7 +26,8 @@ true_result2 = [1, 2, 3] iris = datasets.load_iris() # permute -perm = np.random.permutation(iris.target.size) +rng = np.random.RandomState(0) +perm = rng.permutation(iris.target.size) iris.data = iris.data[perm] iris.target = iris.target[perm] # sparsify diff --git a/sklearn/tests/test_kernel_approximation.py b/sklearn/tests/test_kernel_approximation.py index 0f177fae2a..6868fe0835 100644 --- a/sklearn/tests/test_kernel_approximation.py +++ b/sklearn/tests/test_kernel_approximation.py @@ -7,8 +7,9 @@ from sklearn.kernel_approximation import SkewedChi2Sampler from sklearn.metrics.pairwise import rbf_kernel # generate data -X = np.random.uniform(size=(300, 50)) -Y = np.random.uniform(size=(300, 50)) +rng = np.random.RandomState(0) +X = rng.random_sample(size=(300, 50)) +Y = rng.random_sample(size=(300, 50)) X /= X.sum(axis=1)[:, np.newaxis] Y /= Y.sum(axis=1)[:, np.newaxis] diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py index 62f05ad5bd..6256f8eb81 100644 --- a/sklearn/tests/test_multiclass.py +++ b/sklearn/tests/test_multiclass.py @@ -20,7 +20,8 @@ from sklearn import svm from sklearn import datasets iris = datasets.load_iris() -perm = np.random.permutation(iris.target.size) +rng = np.random.RandomState(0) +perm = rng.permutation(iris.target.size) iris.data = iris.data[perm] iris.target = iris.target[perm] n_classes = 3 diff --git a/sklearn/tests/test_preprocessing.py b/sklearn/tests/test_preprocessing.py index cf156bd385..5a16a6f9ca 100644 --- a/sklearn/tests/test_preprocessing.py +++ b/sklearn/tests/test_preprocessing.py @@ -21,8 +21,6 @@ from sklearn.preprocessing import scale from sklearn import datasets from sklearn.linear_model.stochastic_gradient import SGDClassifier -np.random.seed(0) - iris = datasets.load_iris() @@ -440,7 +438,8 @@ def test_label_binarizer_multilabel_unlabeled(): def test_center_kernel(): """Test that KernelCenterer is equivalent to Scaler in feature space""" - X_fit = np.random.random((5, 4)) + rng = np.random.RandomState(0) + X_fit = rng.random_sample((5, 4)) scaler = Scaler(with_std=False) scaler.fit(X_fit) X_fit_centered = scaler.transform(X_fit) @@ -453,7 +452,7 @@ def test_center_kernel(): assert_array_almost_equal(K_fit_centered, K_fit_centered2) # center predict time matrix - X_pred = np.random.random((2, 4)) + X_pred = rng.random_sample((2, 4)) K_pred = np.dot(X_pred, X_fit.T) X_pred_centered = scaler.transform(X_pred) K_pred_centered = np.dot(X_pred_centered, X_fit_centered.T) @@ -462,7 +461,8 @@ def test_center_kernel(): def test_fit_transform(): - X = np.random.random((5, 4)) + rng = np.random.RandomState(0) + X = rng.random_sample((5, 4)) for obj in ((Scaler(), Normalizer(), Binarizer())): X_transformed = obj.fit(X).transform(X) X_transformed2 = obj.fit_transform(X) diff --git a/sklearn/utils/tests/test_shortest_path.py b/sklearn/utils/tests/test_shortest_path.py index a6fa27eff0..7d876a000d 100644 --- a/sklearn/utils/tests/test_shortest_path.py +++ b/sklearn/utils/tests/test_shortest_path.py @@ -30,14 +30,14 @@ def floyd_warshall_slow(graph, directed=False): def generate_graph(N=20): #sparse grid of distances - dist_matrix = np.random.random((N, N)) + rng = np.random.RandomState(0) + dist_matrix = rng.random_sample((N, N)) #make symmetric: distances are not direction-dependent dist_matrix += dist_matrix.T #make graph sparse - i = (np.random.randint(N, size=N * N / 2), - np.random.randint(N, size=N * N / 2)) + i = (rng.randint(N, size=N * N / 2), rng.randint(N, size=N * N / 2)) dist_matrix[i] = 0 #set diagonal to zero diff --git a/sklearn/utils/tests/test_weighted_mode.py b/sklearn/utils/tests/test_weighted_mode.py index e58e31c524..85ef08fae2 100644 --- a/sklearn/utils/tests/test_weighted_mode.py +++ b/sklearn/utils/tests/test_weighted_mode.py @@ -7,7 +7,8 @@ from scipy import stats def test_uniform_weights(): # with uniform weights, results should be identical to stats.mode - x = np.random.randint(10, size=(10, 5)) + rng = np.random.RandomState(0) + x = rng.randint(10, size=(10, 5)) weights = np.ones(x.shape) for axis in (None, 0, 1): @@ -23,8 +24,9 @@ def test_random_weights(): # with a score that is easily reproduced mode_result = 6 - x = np.random.randint(mode_result, size=(100, 10)) - w = np.random.random(x.shape) + rng = np.random.RandomState(0) + x = rng.randint(mode_result, size=(100, 10)) + w = rng.random_sample(x.shape) x[:, :5] = mode_result w[:, :5] += 1 -- GitLab