diff --git a/benchmarks/bench_glmnet.py b/benchmarks/bench_glmnet.py index c8779d9c93557a3a6ab6b665ba24c3d06187000d..2b866eb09b9cfd4e71b6e373ea7b270ce1193cb2 100644 --- a/benchmarks/bench_glmnet.py +++ b/benchmarks/bench_glmnet.py @@ -63,7 +63,7 @@ if __name__ == '__main__': print '==================' X, Y, coef_ = make_regression( - n_samples=(i * step) + n_test_samples, n_features=n_features, + n_samples=(i * step) + n_test_samples, n_features=n_features, noise=0.1, n_informative=n_informative, coef=True) X_test = X[-n_test_samples:] @@ -103,7 +103,7 @@ if __name__ == '__main__': n_informative = n_features / 10 X, Y, coef_ = make_regression( - n_samples=(i * step) + n_test_samples, n_features=n_features, + n_samples=(i * step) + n_test_samples, n_features=n_features, noise=0.1, n_informative=n_informative, coef=True) X_test = X[-n_test_samples:] diff --git a/benchmarks/bench_lasso.py b/benchmarks/bench_lasso.py index f8f1f24e75d6b35aa65ec866b13822444a053005..b54e6bdeb1e239fdcc927aa82719cb91958644f0 100644 --- a/benchmarks/bench_lasso.py +++ b/benchmarks/bench_lasso.py @@ -34,8 +34,8 @@ def compute_bench(alpha, n_samples, n_features, precompute): len(n_features))) print '==================' n_informative = nf // 10 - X, Y, coef_ = make_regression(n_samples=ns, n_features=nf, - n_informative=n_informative, + X, Y, coef_ = make_regression(n_samples=ns, n_features=nf, + n_informative=n_informative, noise=0.1, coef=True) X /= np.sqrt(np.sum(X**2, axis=0)) # Normalize data diff --git a/benchmarks/bench_sgd_regression.py b/benchmarks/bench_sgd_regression.py index 3e5259096cdc8e1b86bf1d027eb1e27769c56c7e..df84c2d1aa90455fbad69a393705779c4ce83c7c 100644 --- a/benchmarks/bench_sgd_regression.py +++ b/benchmarks/bench_sgd_regression.py @@ -33,9 +33,9 @@ if __name__ == "__main__": for i, n_train in enumerate(list_n_samples): for j, n_features in enumerate(list_n_features): X, y, coef = make_regression( - n_samples=n_train + n_test, n_features=n_features, + n_samples=n_train + n_test, n_features=n_features, noise=noise, coef=True) - + X_train = X[:n_train] y_train = y[:n_train] X_test = X[n_train:] diff --git a/examples/decomposition/plot_faces_decomposition.py b/examples/decomposition/plot_faces_decomposition.py index e3aebb64ea29b96feb498786aa8582dccb55b05a..397b865df700a94b4e708b1293278031d396da4e 100644 --- a/examples/decomposition/plot_faces_decomposition.py +++ b/examples/decomposition/plot_faces_decomposition.py @@ -69,7 +69,7 @@ estimators = [ True, False), ('Non-negative components - NMF', - decomposition.NMF(n_components=n_components, init='nndsvda', beta=5.0, + decomposition.NMF(n_components=n_components, init='nndsvda', beta=5.0, tol=5e-3, sparseness='components'), False, False), @@ -78,7 +78,7 @@ estimators = [ True, True), ('Sparse comp. - MiniBatchSparsePCA', - decomposition.MiniBatchSparsePCA(n_components=n_components, alpha=1e-3, + decomposition.MiniBatchSparsePCA(n_components=n_components, alpha=1e-3, n_iter=100, chunk_size=3), True, False), diff --git a/examples/feature_selection_pipeline.py b/examples/feature_selection_pipeline.py index 025e2c4d671feeddb392c33f293a1db7254ef3c3..8c5454e13d89d1acba42f19646c12b1af2e13ef8 100644 --- a/examples/feature_selection_pipeline.py +++ b/examples/feature_selection_pipeline.py @@ -15,7 +15,7 @@ from sklearn.pipeline import Pipeline # import some data to play with X, y = samples_generator.make_classification( - n_features=20, n_informative=3, n_redundant=0, + n_features=20, n_informative=3, n_redundant=0, n_classes=4, n_clusters_per_class=2) # ANOVA SVM-C diff --git a/examples/linear_model/plot_bayesian_ridge.py b/examples/linear_model/plot_bayesian_ridge.py index 7296bb02a21df8baf9ae34148aad3bf4057a5952..e3fad8daa5bb283ca4696468a5a344d2b47c5f8e 100644 --- a/examples/linear_model/plot_bayesian_ridge.py +++ b/examples/linear_model/plot_bayesian_ridge.py @@ -62,7 +62,7 @@ pl.legend(loc="best", prop=dict(size=12)) pl.figure(figsize=(6, 5)) pl.title("Histogram of the weights") pl.hist(clf.coef_, bins=n_features, log=True) -pl.plot(clf.coef_[relevant_features], 5*np.ones(len(relevant_features)), +pl.plot(clf.coef_[relevant_features], 5*np.ones(len(relevant_features)), 'ro', label="Relevant features") pl.ylabel("Features") pl.xlabel("Values of the weights") diff --git a/examples/linear_model/plot_lasso_model_selection.py b/examples/linear_model/plot_lasso_model_selection.py index 133cfa67114cb0a24e8530f154c85cbdb062cbd5..3a10e2563d939283876ee26679088ee493839a4b 100644 --- a/examples/linear_model/plot_lasso_model_selection.py +++ b/examples/linear_model/plot_lasso_model_selection.py @@ -82,7 +82,7 @@ def plot_ic_criterion(model, name, color): alpha_ = model.alpha_ alphas_ = model.alphas_ criterion_ = model.criterion_ - pl.plot(-np.log10(alphas_), criterion_, '--', color=color, + pl.plot(-np.log10(alphas_), criterion_, '--', color=color, linewidth=3, label='%s criterion' % name) pl.axvline(-np.log10(alpha_), color=color, linewidth=3, label='alpha: %s estimate' % name) diff --git a/examples/manifold/plot_compare_methods.py b/examples/manifold/plot_compare_methods.py index 8be3de5d40323cf48638e948f0d122e100e50d0c..0d4c3b2e4247daa7cc47484b0751c28f0b5f1142 100644 --- a/examples/manifold/plot_compare_methods.py +++ b/examples/manifold/plot_compare_methods.py @@ -41,7 +41,7 @@ except: pl.scatter(X[:, 0], X[:, 2], c=color, cmap=pl.cm.Spectral) methods = ['standard', 'ltsa', 'hessian', 'modified'] -labels = ['LLE', 'LTSA', 'Hessian LLE', 'Modified LLE'] +labels = ['LLE', 'LTSA', 'Hessian LLE', 'Modified LLE'] for i, method in enumerate(methods): t0 = time() diff --git a/examples/manifold/plot_lle_digits.py b/examples/manifold/plot_lle_digits.py index c0e7eb230d2187d6822a37d6e1555468a82ea4b2..9e4298b5c411902c732313e156bfd8c669f602f4 100644 --- a/examples/manifold/plot_lle_digits.py +++ b/examples/manifold/plot_lle_digits.py @@ -88,8 +88,8 @@ plot_embedding(X_projected, "Random Projection of the digits") print "Computing PCA projection" t0 = time() X_pca = decomposition.RandomizedPCA(n_components=2).fit_transform(X) -plot_embedding(X_pca, - "Principal Components projection of the digits (time %.2fs)" % +plot_embedding(X_pca, + "Principal Components projection of the digits (time %.2fs)" % (time() - t0)) #---------------------------------------------------------------------- @@ -100,8 +100,8 @@ X2 = X.copy() X2.flat[::X.shape[1] + 1] += 0.01 # Make X invertible t0 = time() X_lda = lda.LDA(n_components=2).fit_transform(X2, y) -plot_embedding(X_lda, - "Linear Discriminant projection of the digits (time %.2fs)" % +plot_embedding(X_lda, + "Linear Discriminant projection of the digits (time %.2fs)" % (time() - t0)) @@ -111,8 +111,8 @@ print "Computing Isomap embedding" t0 = time() X_iso = manifold.Isomap(n_neighbors, out_dim=2).fit_transform(X) print "Done." -plot_embedding(X_iso, - "Isomap projection of the digits (time %.2fs)" % +plot_embedding(X_iso, + "Isomap projection of the digits (time %.2fs)" % (time() - t0)) @@ -124,8 +124,8 @@ clf = manifold.LocallyLinearEmbedding(n_neighbors, out_dim=2, t0 = time() X_lle = clf.fit_transform(X) print "Done. Reconstruction error: %g" % clf.reconstruction_error_ -plot_embedding(X_lle, - "Locally Linear Embedding of the digits (time %.2fs)" % +plot_embedding(X_lle, + "Locally Linear Embedding of the digits (time %.2fs)" % (time() - t0)) @@ -137,8 +137,8 @@ clf = manifold.LocallyLinearEmbedding(n_neighbors, out_dim=2, t0 = time() X_mlle = clf.fit_transform(X) print "Done. Reconstruction error: %g" % clf.reconstruction_error_ -plot_embedding(X_mlle, - "Modified Locally Linear Embedding of the digits (time %.2fs)" % +plot_embedding(X_mlle, + "Modified Locally Linear Embedding of the digits (time %.2fs)" % (time() - t0)) @@ -150,8 +150,8 @@ clf = manifold.LocallyLinearEmbedding(n_neighbors, out_dim=2, t0 = time() X_hlle = clf.fit_transform(X) print "Done. Reconstruction error: %g" % clf.reconstruction_error_ -plot_embedding(X_hlle, - "Hessian Locally Linear Embedding of the digits (time %.2fs)" % +plot_embedding(X_hlle, + "Hessian Locally Linear Embedding of the digits (time %.2fs)" % (time() - t0)) @@ -164,7 +164,7 @@ t0 = time() X_ltsa = clf.fit_transform(X) print "Done. Reconstruction error: %g" % clf.reconstruction_error_ plot_embedding(X_ltsa, - "Local Tangent Space Alignment of the digits (time %.2fs)" % + "Local Tangent Space Alignment of the digits (time %.2fs)" % (time() - t0)) pl.show() diff --git a/examples/mlcomp_sparse_document_classification.py b/examples/mlcomp_sparse_document_classification.py index 48b9d6125c6cda858862019865dd9fa08160c20a..4ac6839538d3b21915d6e8cc200338cec63980c9 100644 --- a/examples/mlcomp_sparse_document_classification.py +++ b/examples/mlcomp_sparse_document_classification.py @@ -105,22 +105,22 @@ def benchmark(clf_class, params, name): t0 = time() pred = clf.predict(X_test) print "done in %fs" % (time() - t0) - + print "Classification report on test set for classifier:" print clf print print classification_report(y_test, pred, target_names=news_test.target_names) - + cm = confusion_matrix(y_test, pred) print "Confusion matrix:" print cm - + # Show confusion matrix pl.matshow(cm) pl.title('Confusion matrix of the %s classifier' % name) pl.colorbar() - - + + print "Testbenching a linear classifier..." parameters = { 'loss': 'hinge', diff --git a/examples/plot_digits_classification.py b/examples/plot_digits_classification.py index 67da225d19b4f773d2f1725408f4f516298dc1f5..e1fc6e300d46abcf65f4b9d3d6b314c1736158c2 100644 --- a/examples/plot_digits_classification.py +++ b/examples/plot_digits_classification.py @@ -6,7 +6,7 @@ Recognizing hand-written digits An example showing how the scikit-learn can be used to recognize images of hand-written digits. -This example is commented in the +This example is commented in the :ref:`tutorial section of the user manual <getting_started>`. """ @@ -27,7 +27,7 @@ digits = datasets.load_digits() # The data that we are interested in is made of 8x8 images of digits, # let's have a look at the first 3 images, stored in the `images` # attribute of the dataset. If we were working from image files, we -# could load them using pylab.imread. For these images know which +# could load them using pylab.imread. For these images know which # digit they represent: it is given in the 'target' of the dataset. for index, (image, label) in enumerate(zip(digits.images, digits.target)[:4]): pl.subplot(2, 4, index+1) diff --git a/sklearn/base.py b/sklearn/base.py index 7f85b2fb475b37f1bf5e086916e1c205af6ea891..5248a3f9b34278e63776387831be3d523baf21d3 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -52,12 +52,12 @@ def clone(estimator, safe=True): param2 = params_set[name] if isinstance(param1, np.ndarray): # For ndarrays, we do not test for complete equality - equality_test = (param1.shape == param2.shape - and param1.dtype == param2.dtype - and param1[0] == param2[0] + equality_test = (param1.shape == param2.shape + and param1.dtype == param2.dtype + and param1[0] == param2[0] and param1[-1] == param2[-1]) elif sparse.issparse(param1): - # For sparse matrices equality doesn't work + # For sparse matrices equality doesn't work equality_test = (param1.__class__ == param2.__class__ and param1.data[0] == param2.data[0] and param1.data[-1] == param2.data[-1] diff --git a/sklearn/check_build/setup.py b/sklearn/check_build/setup.py index 685d375401769f0b8179d250830b3b02793fa4f3..41fe65fd286d2d9ed5d79b9a7579991a3f455ab6 100644 --- a/sklearn/check_build/setup.py +++ b/sklearn/check_build/setup.py @@ -11,7 +11,7 @@ def configuration(parent_package='', top_path=None): config.add_extension('_check_build', sources=['_check_build.c'], include_dirs=[numpy.get_include()]) - + return config if __name__ == '__main__': diff --git a/sklearn/cluster/setup.py b/sklearn/cluster/setup.py index afbf73daad4d7874cee8cd96d321eaa9aa92e3b0..b7c3e75cacbeb0c2ade270ff7e6a3d8f395c1653 100644 --- a/sklearn/cluster/setup.py +++ b/sklearn/cluster/setup.py @@ -15,7 +15,7 @@ def configuration(parent_package='', top_path=None): sources=['_k_means.c'], include_dirs=[numpy.get_include()] ) - + return config if __name__ == '__main__': diff --git a/sklearn/cluster/tests/test_mean_shift.py b/sklearn/cluster/tests/test_mean_shift.py index d725a0e86bffe8b3628f3e0931687e836c6ab29b..3d3b972831265599b1ac1833c0546c939c67c69d 100644 --- a/sklearn/cluster/tests/test_mean_shift.py +++ b/sklearn/cluster/tests/test_mean_shift.py @@ -43,13 +43,13 @@ def test_bin_seeds(): # Data is just 6 points in the plane X = np.array([[1., 1.], [1.5, 1.5], [1.8, 1.2], [2., 1.], [2.1, 1.1], [0., 0.]]) - + # With a bin coarseness of 1.0 and min_bin_freq of 1, 3 bins should be found ground_truth = set([(1.,1.), (2.,1.), (0.,0.)]) test_bins = get_bin_seeds(X, 1, 1) test_result = set([tuple(p) for p in test_bins]) assert_true(len(ground_truth.symmetric_difference(test_result)) == 0) - + # With a bin coarseness of 1.0 and min_bin_freq of 2, 2 bins should be found ground_truth = set([(1.,1.), (2.,1.)]) test_bins = get_bin_seeds(X, 1, 2) @@ -61,4 +61,4 @@ def test_bin_seeds(): test_result = set([tuple(p) for p in test_bins]) assert_true(len(test_result) == 6) - + diff --git a/sklearn/covariance/empirical_covariance_.py b/sklearn/covariance/empirical_covariance_.py index ff488a9ce85fb3ec692c1c1f5b3fe9b003ab63af..fc270be2576e15fe4ff8aacfe46f9275b7dade4c 100644 --- a/sklearn/covariance/empirical_covariance_.py +++ b/sklearn/covariance/empirical_covariance_.py @@ -210,5 +210,5 @@ class EmpiricalCovariance(BaseEstimator): result = squared_norm else: result = np.sqrt(squared_norm) - + return result diff --git a/sklearn/datasets/olivetti_faces.py b/sklearn/datasets/olivetti_faces.py index ed33eb15d23437ee3e9f9c4e79ee9d3d2a7868ba..6ce33ebfd19d5c40b4d65f4d4826f6b75f46547d 100644 --- a/sklearn/datasets/olivetti_faces.py +++ b/sklearn/datasets/olivetti_faces.py @@ -52,7 +52,7 @@ def fetch_olivetti_faces(data_home=None, shuffle=False, random_state=0, all scikit learn data is stored in '~/scikit_learn_data' subfolders. shuffle : boolean, optional - If True the order of the dataset is shuffled to avoid having + If True the order of the dataset is shuffled to avoid having images of the same person grouped. download_if_missing: optional, True by default diff --git a/sklearn/datasets/twenty_newsgroups.py b/sklearn/datasets/twenty_newsgroups.py index a103750fb8a491854abf1489cc347e1ea6621317..cd06b6aafb832d6c042a61108d76e31b5630725f 100644 --- a/sklearn/datasets/twenty_newsgroups.py +++ b/sklearn/datasets/twenty_newsgroups.py @@ -142,7 +142,7 @@ def fetch_20newsgroups(data_home=None, subset='train', categories=None, cache_path=cache_path) else: raise IOError('20Newsgroups dataset not found') - + if subset in ('train', 'test'): data = cache[subset] elif subset == 'all': diff --git a/sklearn/externals/joblib/__init__.py b/sklearn/externals/joblib/__init__.py index 581a4d506378782f56962c6d7893226008efcdc1..a6117d133e1ae2dc55af66d0d3316209a027c1e2 100755 --- a/sklearn/externals/joblib/__init__.py +++ b/sklearn/externals/joblib/__init__.py @@ -15,9 +15,9 @@ data and has specific optimizations for `numpy` arrays. It is ============================== ============================================== **User documentation**: http://packages.python.org/joblib - + **Download packages**: http://pypi.python.org/pypi/joblib#downloads - + **Source code**: http://github.com/joblib/joblib **Report issues**: http://github.com/joblib/joblib/issues @@ -36,7 +36,7 @@ solution. over, for instance when prototyping computational-heavy jobs (as in scientific development), but hand-crafted solution to aleviate this issue is error-prone and often leads to unreproducible results - + * **Persist to disk transparently**: persisting in an efficient way arbitrary objects containing large data is hard. In addition, hand-written persistence does not link easily the file on disk to the @@ -46,7 +46,7 @@ solution. It strives to address these problems while **leaving your code and your flow control as unmodified as possible** (no framework, no new -paradigms). +paradigms). Main features ------------------ @@ -75,7 +75,7 @@ Main features >>> c = square(a) >>> # The above call did not trigger an evaluation -2) **Embarrassingly parallel helper:** to make is easy to write readable +2) **Embarrassingly parallel helper:** to make is easy to write readable parallel code and debug it quickly: >>> from sklearn.externals.joblib import Parallel, delayed @@ -88,10 +88,10 @@ Main features progressively acquire better logging mechanism to help track what has been ran, and capture I/O easily. In addition, Joblib will provide a few I/O primitives, to easily define define logging and - display streams, and provide a way of compiling a report. + display streams, and provide a way of compiling a report. We want to be able to quickly inspect what has been run. -.. +.. >>> import shutil ; shutil.rmtree('/tmp/joblib/') """ diff --git a/sklearn/externals/joblib/disk.py b/sklearn/externals/joblib/disk.py index 79f6797715b912549deb1a8f35919c5e395a55f9..8b026af05ee74b8887d82a79dd831fe99add5899 100755 --- a/sklearn/externals/joblib/disk.py +++ b/sklearn/externals/joblib/disk.py @@ -2,7 +2,7 @@ Disk management utilities. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2010 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -13,7 +13,7 @@ import shutil import time def disk_used(path): - """ Return the disk usage in a directory. + """ Return the disk usage in a directory. """ size = 0 for file in os.listdir(path) + ['.']: @@ -49,7 +49,7 @@ RM_SUBDIRS_RETRY_TIME = 0.1 def rm_subdirs(path, onerror=None): """Remove all subdirectories in this path. - + The directory indicated by `path` is left in place, and its subdirectories are erased. diff --git a/sklearn/externals/joblib/format_stack.py b/sklearn/externals/joblib/format_stack.py index 246db3fb1fb38a01d6a6b8341423746bc0c68fa1..c01795e6f27bf361aeb7d5479554a159139e15a4 100755 --- a/sklearn/externals/joblib/format_stack.py +++ b/sklearn/externals/joblib/format_stack.py @@ -63,7 +63,7 @@ def safe_repr(value): except: return 'UNRECOVERABLE REPR FAILURE' -def eq_repr(value, repr=safe_repr): +def eq_repr(value, repr=safe_repr): return '=%s' % repr(value) @@ -92,7 +92,7 @@ def uniq_stable(elems): ################################################################################ def fix_frame_records_filenames(records): """Try to fix the filenames in each record from inspect.getinnerframes(). - + Particularly, modules loaded from within zip files have useless filenames attached to their code object, and inspect.getinnerframes() just uses it. """ @@ -106,7 +106,7 @@ def fix_frame_records_filenames(records): # __file__. It might also be None if the error occurred during # import. filename = better_fn - fixed_records.append((frame, filename, line_no, func_name, lines, index)) + fixed_records.append((frame, filename, line_no, func_name, lines, index)) return fixed_records @@ -158,7 +158,7 @@ def _format_traceback_lines(lnum, index, lines, lvals=None): if pad >= 3: marker = '-'*(pad-3) + '-> ' elif pad == 2: - marker = '> ' + marker = '> ' elif pad == 1: marker = '>' else: @@ -196,7 +196,7 @@ def format_records(records): #, print_globals=False): # able to remove this try/except when 2.4 becomes a # requirement. Bug details at http://python.org/sf/1005466 print "\nJoblib's exception reporting continues...\n" - + if func == '?': call = '' else: @@ -228,7 +228,7 @@ def format_records(records): #, print_globals=False): there is no way to disambguate partial dotted structures until the full list is known. The caller is responsible for pruning the final list of duplicates before using it.""" - + # build composite names if token == '.': try: @@ -275,7 +275,7 @@ def format_records(records): #, print_globals=False): print ("An unexpected error occurred while tokenizing input\n" "The following traceback may be corrupted or invalid\n" "The error message is: %s\n" % msg) - + # prune names list of duplicates, but keep the right order unique_names = uniq_stable(names) @@ -315,14 +315,14 @@ def format_records(records): #, print_globals=False): else: frames.append('%s%s' % (level,''.join( _format_traceback_lines(lnum, index, lines, lvals)))) - + return frames ################################################################################ def format_exc(etype, evalue, etb, context=5, tb_offset=0): """ Return a nice text document describing the traceback. - + Parameters ----------- etype, evalue, etb: as returned by sys.exc_info @@ -340,7 +340,7 @@ def format_exc(etype, evalue, etb, context=5, tb_offset=0): pyver = 'Python ' + string.split(sys.version)[0] + ': ' + sys.executable date = time.ctime(time.time()) pid = 'PID: %i' % os.getpid() - + head = '%s%s%s\n%s%s%s' % (etype, ' '*(75-len(str(etype))-len(date)), date, pid, ' '*(75-len(str(pid))-len(pyver)), pyver) @@ -407,7 +407,7 @@ def format_outer_frames(context=5, stack_start=None, stack_end=None, filename = filename[:-4] + '.py' if ignore_ipython: # Hack to avoid printing the interals of IPython - if (os.path.basename(filename) == 'iplib.py' + if (os.path.basename(filename) == 'iplib.py' and func_name in ('safe_execfile', 'runcode')): break maybeStart = line_no -1 - context//2 diff --git a/sklearn/externals/joblib/func_inspect.py b/sklearn/externals/joblib/func_inspect.py index 7899e14d620335b4e3c14903aac0098ed04d029b..c6d0a9214504cabe51099524fe32e6011d3c29eb 100755 --- a/sklearn/externals/joblib/func_inspect.py +++ b/sklearn/externals/joblib/func_inspect.py @@ -2,7 +2,7 @@ My own variation on function-specific inspect-like features. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2009 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -13,7 +13,7 @@ import os def get_func_code(func): """ Attempts to retrieve a reliable function code hash. - + The reason we don't use inspect.getsource is that it caches the source, whereas we want this to be modified on the fly when the function is modified. @@ -54,7 +54,7 @@ def get_func_code(func): return repr(func), source_file, -1 -def get_func_name(func, resolv_alias=True, win_characters=True): +def get_func_name(func, resolv_alias=True, win_characters=True): """ Return the function import path (as a list of module names), and a name for the function. @@ -94,7 +94,7 @@ def get_func_name(func, resolv_alias=True, win_characters=True): module = module.split('.') if hasattr(func, 'func_name'): name = func.func_name - elif hasattr(func, '__name__'): + elif hasattr(func, '__name__'): name = func.__name__ else: name = 'unknown' @@ -105,7 +105,7 @@ def get_func_name(func, resolv_alias=True, win_characters=True): if not func.func_globals[name] is func: name = '%s-alias' % name if inspect.ismethod(func): - # We need to add the name of the class + # We need to add the name of the class if hasattr(func, 'im_class'): klass = func.im_class module.append(klass.__name__) @@ -126,7 +126,7 @@ def filter_args(func, ignore_lst, *args, **kwargs): func: callable Function giving the argument specification ignore_lst: list of strings - List of arguments to ignore (either a name of an argument + List of arguments to ignore (either a name of an argument in the function spec, or '*', or '**') *args: list Positional arguments passed to the function. @@ -146,7 +146,7 @@ def filter_args(func, ignore_lst, *args, **kwargs): raise ValueError('ignore_lst must be a list of parameters to ignore ' '%s (type %s) was given' % (ignore_lst, type(ignore_lst))) # Special case for functools.partial objects - if (not inspect.ismethod(func) and not inspect.isfunction(func)): + if (not inspect.ismethod(func) and not inspect.isfunction(func)): if ignore_lst: warnings.warn('Cannot inspect object %s, ignore list will ' 'not work.' % func, stacklevel=2) @@ -165,7 +165,7 @@ def filter_args(func, ignore_lst, *args, **kwargs): # First argument is 'self', it has been removed by Python # we need to add it back: args = [func.im_self, ] + args - # XXX: Maybe I need an inspect.isbuiltin to detect C-level methods, such + # XXX: Maybe I need an inspect.isbuiltin to detect C-level methods, such # as on ndarrays. _, name = get_func_name(func, resolv_alias=False) @@ -186,11 +186,11 @@ def filter_args(func, ignore_lst, *args, **kwargs): # Missing argument raise ValueError('Wrong number of arguments for %s%s:\n' ' %s(%s, %s) was called.' - % (name, + % (name, inspect.formatargspec(*inspect.getargspec(func)), name, repr(args)[1:-1], - ', '.join('%s=%s' % (k, v) + ', '.join('%s=%s' % (k, v) for k, v in kwargs.iteritems()) ) ) @@ -218,7 +218,7 @@ def filter_args(func, ignore_lst, *args, **kwargs): arg_dict.pop(item) else: raise ValueError("Ignore list: argument '%s' is not defined for " - "function %s%s" % + "function %s%s" % (item, name, inspect.formatargspec(arg_names, arg_varargs, @@ -226,5 +226,5 @@ def filter_args(func, ignore_lst, *args, **kwargs): arg_defaults, ))) # XXX: Return a sorted list of pairs? - return arg_dict + return arg_dict diff --git a/sklearn/externals/joblib/hashing.py b/sklearn/externals/joblib/hashing.py index e00a70768105f43c9bfb109631b0637acefa5332..8183382ce2d0b1427684d084e86066a04e62b9db 100755 --- a/sklearn/externals/joblib/hashing.py +++ b/sklearn/externals/joblib/hashing.py @@ -1,9 +1,9 @@ """ -Fast cryptographic hash of Python objects, with a special case for fast +Fast cryptographic hash of Python objects, with a special case for fast hashing of numpy arrays. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2009 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -76,7 +76,7 @@ class NumpyHasher(Hasher): # XXX: There might be a more efficient way of doing this self._hash.update(self.np.getbuffer(obj.flatten())) - # We store the class, to be able to distinguish between + # We store the class, to be able to distinguish between # Objects with the same binary content, but different # classes. if self.coerce_mmap and isinstance(obj, self.np.memmap): @@ -86,7 +86,7 @@ class NumpyHasher(Hasher): klass = self.np.ndarray else: klass = obj.__class__ - # We also return the dtype and the shape, to distinguish + # We also return the dtype and the shape, to distinguish # different views on the same data with different dtypes. # The object will be pickled by the pickler hashed at the end. @@ -95,14 +95,14 @@ class NumpyHasher(Hasher): def hash(obj, hash_name='md5', coerce_mmap=False): - """ Quick calculation of a hash to identify uniquely Python objects + """ Quick calculation of a hash to identify uniquely Python objects containing numpy arrays. - + Parameters ----------- hash_name: 'md5' or 'sha1' - Hashing algorithm used. sha1 is supposedly safer, but md5 is + Hashing algorithm used. sha1 is supposedly safer, but md5 is faster. coerce_mmap: boolean Make no difference between np.memmap and np.ndarray diff --git a/sklearn/externals/joblib/logger.py b/sklearn/externals/joblib/logger.py index eed6baeb76c0075f1d6a613e7070a93d08668bae..368e52a188fe85cc33272fb1b164e398c55869e6 100755 --- a/sklearn/externals/joblib/logger.py +++ b/sklearn/externals/joblib/logger.py @@ -4,7 +4,7 @@ Helpers for logging. This module needs much love to become useful. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2008 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -43,7 +43,7 @@ def short_format_time(t): class Logger(object): """ Base class for logging messages. """ - + def __init__(self, depth=3): """ Parameters @@ -99,7 +99,7 @@ class PrintTime(object): for i in range(1, 9): if os.path.exists(logfile+'.%i' % i): try: - shutil.move(logfile+'.%i' % i, + shutil.move(logfile+'.%i' % i, logfile+'.%i' % (i+1)) except: "No reason failing here" diff --git a/sklearn/externals/joblib/memory.py b/sklearn/externals/joblib/memory.py index d7c22eb404298bc5ede54d822687147acefa3c7d..7486ccc4b830f62536cdd30429a4d0cb15a1f97b 100755 --- a/sklearn/externals/joblib/memory.py +++ b/sklearn/externals/joblib/memory.py @@ -4,7 +4,7 @@ is called with the same input arguments. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2009 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -45,10 +45,10 @@ FIRST_LINE_TEXT = "# first line:" # object, and the interface to persist and query should be separated in # the data store. # -# This would enable creating 'Memory' objects with a different logic for +# This would enable creating 'Memory' objects with a different logic for # pickling that would simply span a MemorizedFunc with the same # store (or do we want to copy it to avoid cross-talks?), for instance to -# implement HDF5 pickling. +# implement HDF5 pickling. # TODO: Same remark for the logger, and probably use the Python logging # mechanism. @@ -76,9 +76,9 @@ class JobLibCollisionWarning(UserWarning): # class `MemorizedFunc` ################################################################################ class MemorizedFunc(Logger): - """ Callable object decorating a function for caching its return value + """ Callable object decorating a function for caching its return value each time it is called. - + All values are cached on the filesystem, in a deep directory structure. Methods are provided to inspect the cache or clean it. @@ -97,14 +97,14 @@ class MemorizedFunc(Logger): arguments. Only used if save_npy was true when the cache was created. verbose: int, optional - The verbosity flag, controls messages that are issued as + The verbosity flag, controls messages that are issued as the function is revaluated. """ #------------------------------------------------------------------------- # Public interface #------------------------------------------------------------------------- - - def __init__(self, func, cachedir, ignore=None, save_npy=True, + + def __init__(self, func, cachedir, ignore=None, save_npy=True, mmap_mode=None, verbose=1, timestamp=None): """ Parameters @@ -124,7 +124,7 @@ class MemorizedFunc(Logger): arguments. Only used if save_npy was true when the cache was created. verbose: int, optional - Verbosity flag, controls the debug messages that are issued + Verbosity flag, controls the debug messages that are issued as functions are revaluated. The higher, the more verbose timestamp: float, optional The reference time from which times in tracing messages @@ -162,7 +162,7 @@ class MemorizedFunc(Logger): # function code has changed output_dir, _ = self.get_output_dir(*args, **kwargs) # FIXME: The statements below should be try/excepted - if not (self._check_previous_func_code(stacklevel=3) and + if not (self._check_previous_func_code(stacklevel=3) and os.path.exists(output_dir)): return self.call(*args, **kwargs) else: @@ -192,13 +192,13 @@ class MemorizedFunc(Logger): depending from it. In addition, when unpickling, we run the __init__ """ - return (self.__class__, (self.func, self.cachedir, self.ignore, + return (self.__class__, (self.func, self.cachedir, self.ignore, self.save_npy, self.mmap_mode, self._verbose)) #------------------------------------------------------------------------- # Private interface #------------------------------------------------------------------------- - + def _get_func_dir(self, mkdir=True): """ Get the directory corresponding to the cache for the function. @@ -210,7 +210,7 @@ class MemorizedFunc(Logger): try: os.makedirs(func_dir) except OSError: - """ Dir exists: we have a race condition here, when using + """ Dir exists: we have a race condition here, when using multiprocessing. """ # XXX: Ugly @@ -225,12 +225,12 @@ class MemorizedFunc(Logger): """ coerce_mmap = (self.mmap_mode is not None) argument_hash = hash(filter_args(self.func, self.ignore, - *args, **kwargs), + *args, **kwargs), coerce_mmap=coerce_mmap) output_dir = os.path.join(self._get_func_dir(self.func), argument_hash) return output_dir, argument_hash - + def _write_func_code(self, filename, func_code, first_line): """ Write the function code and the filename to a file. @@ -240,7 +240,7 @@ class MemorizedFunc(Logger): def _check_previous_func_code(self, stacklevel=2): - """ + """ stacklevel is the depth a which this function is called, to issue useful warnings to the user. """ @@ -252,7 +252,7 @@ class MemorizedFunc(Logger): func_code_file = os.path.join(func_dir, 'func_code.py') try: - if not os.path.exists(func_code_file): + if not os.path.exists(func_code_file): raise IOError old_func_code, old_first_line = \ extract_first_line(file(func_code_file).read()) @@ -263,14 +263,14 @@ class MemorizedFunc(Logger): return True # We have differing code, is this because we are refering to - # differing functions, or because the function we are refering as + # differing functions, or because the function we are refering as # changed? if old_first_line == first_line == -1: _, func_name = get_func_name(self.func, resolv_alias=False, win_characters=False) if not first_line == -1: - func_description = '%s (%s:%i)' % (func_name, + func_description = '%s (%s:%i)' % (func_name, source_file, first_line) else: func_description = func_name @@ -282,7 +282,7 @@ class MemorizedFunc(Logger): # same than the code store, we have a collision: the code in the # file has not changed, but the name we have is pointing to a new # code block. - if (not old_first_line == first_line + if (not old_first_line == first_line and source_file is not None and os.path.exists(source_file)): _, func_name = get_func_name(self.func, resolv_alias=False) @@ -294,7 +294,7 @@ class MemorizedFunc(Logger): warnings.warn(JobLibCollisionWarning( 'Possible name collisions between functions ' "'%s' (%s:%i) and '%s' (%s:%i)" % - (func_name, source_file, old_first_line, + (func_name, source_file, old_first_line, func_name, source_file, first_line)), stacklevel=stacklevel) @@ -305,7 +305,7 @@ class MemorizedFunc(Logger): def clear(self, warn=True): - """ Empty the function's cache. + """ Empty the function's cache. """ func_dir = self._get_func_dir(mkdir=False) if self._verbose and warn: @@ -323,7 +323,7 @@ class MemorizedFunc(Logger): def call(self, *args, **kwargs): - """ Force the execution of the function with the given arguments and + """ Force the execution of the function with the given arguments and persist the output values. """ start_time = time.time() @@ -342,7 +342,7 @@ class MemorizedFunc(Logger): def format_call(self, *args, **kwds): - """ Returns a nicely formatted statement displaying the function + """ Returns a nicely formatted statement displaying the function call with the given arguments. """ path, signature = self.format_signature(self.func, *args, @@ -390,7 +390,7 @@ class MemorizedFunc(Logger): filename = os.path.join(dir, 'output.pkl') if 'numpy' in sys.modules and self.save_npy: - numpy_pickle.dump(output, filename) + numpy_pickle.dump(output, filename) else: output_file = file(filename, 'w') pickle.dump(output, output_file, protocol=2) @@ -433,7 +433,7 @@ class MemorizedFunc(Logger): ) filename = os.path.join(output_dir, 'output.pkl') if self.save_npy: - return numpy_pickle.load(filename, + return numpy_pickle.load(filename, mmap_mode=self.mmap_mode) else: output_file = file(filename, 'r') @@ -444,7 +444,7 @@ class MemorizedFunc(Logger): #------------------------------------------------------------------------- # Private `object` interface #------------------------------------------------------------------------- - + def __repr__(self): return '%s(func=%s, cachedir=%s)' % ( self.__class__.__name__, @@ -460,7 +460,7 @@ class MemorizedFunc(Logger): class Memory(Logger): """ A context object for caching a function's return value each time it is called with the same input arguments. - + All values are cached on the filesystem, in a deep directory structure. @@ -469,7 +469,7 @@ class Memory(Logger): #------------------------------------------------------------------------- # Public interface #------------------------------------------------------------------------- - + def __init__(self, cachedir, save_npy=True, mmap_mode=None, verbose=1): """ @@ -488,7 +488,7 @@ class Memory(Logger): arguments. Only used if save_npy was true when the cache was created. verbose: int, optional - Verbosity flag, controls the debug messages that are issued + Verbosity flag, controls the debug messages that are issued as functions are revaluated. """ # XXX: Bad explaination of the None value of cachedir @@ -527,13 +527,13 @@ class Memory(Logger): Returns ------- decorated_func: MemorizedFunc object - The returned object is a MemorizedFunc object, that is + The returned object is a MemorizedFunc object, that is callable (behaves like a function), but offers extra methods for cache lookup and management. See the documentation for :class:`joblib.memory.MemorizedFunc`. """ if func is None: - # Partial application, to be able to specify extra keyword + # Partial application, to be able to specify extra keyword # arguments in decorators return functools.partial(self.cache, ignore=ignore) if self.cachedir is None: @@ -576,7 +576,7 @@ class Memory(Logger): #------------------------------------------------------------------------- # Private `object` interface #------------------------------------------------------------------------- - + def __repr__(self): return '%s(cachedir=%s)' % ( self.__class__.__name__, @@ -590,7 +590,7 @@ class Memory(Logger): In addition, when unpickling, we run the __init__ """ # We need to remove 'joblib' from the end of cachedir - return (self.__class__, (self.cachedir[:-7], + return (self.__class__, (self.cachedir[:-7], self.save_npy, self.mmap_mode, self._verbose)) diff --git a/sklearn/externals/joblib/my_exceptions.py b/sklearn/externals/joblib/my_exceptions.py index 9f7c9edac8956e10df939dd0494d4c9e08775fa5..f81b149b05de89ec70061569a5c0322313357870 100755 --- a/sklearn/externals/joblib/my_exceptions.py +++ b/sklearn/externals/joblib/my_exceptions.py @@ -32,7 +32,7 @@ class TransportableException(JoblibException): """ An exception containing all the info to wrap an original exception and recreate it. """ - + def __init__(self, message, etype): self.message = message self.etype = etype @@ -41,7 +41,7 @@ class TransportableException(JoblibException): # For pickling return self.__class__, (self.message, self.etype), {} - + _exception_mapping = dict() @@ -55,7 +55,7 @@ def _mk_exception(exception, name=None): # Avoid creating twice the same exception this_exception = _exception_mapping[this_name] else: - this_exception = type(this_name, (exception, JoblibException), + this_exception = type(this_name, (exception, JoblibException), dict(__repr__=JoblibException.__repr__, __str__=JoblibException.__str__), ) @@ -88,7 +88,7 @@ def _mk_common_exceptions(): return namespace -# Updating module locals so that the exceptions pickle right. AFAIK this +# Updating module locals so that the exceptions pickle right. AFAIK this # works only at module-creation time locals().update(_mk_common_exceptions()) diff --git a/sklearn/externals/joblib/numpy_pickle.py b/sklearn/externals/joblib/numpy_pickle.py index 54bc938d26ae08cbc24eea354cc006f5431a4d33..111b90d52ec219cc7b45ead494ab8ec8dd7d65b4 100755 --- a/sklearn/externals/joblib/numpy_pickle.py +++ b/sklearn/externals/joblib/numpy_pickle.py @@ -2,7 +2,7 @@ A pickler to save numpy arrays in separate .npy files. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2009 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -50,7 +50,7 @@ class NumpyPickler(pickle.Pickler): def save(self, obj): """ Subclass the save method, to save ndarray subclasses in npy - files, rather than pickling them. Off course, this is a + files, rather than pickling them. Off course, this is a total abuse of the Pickler class. """ if isinstance(obj, self.np.ndarray): @@ -88,8 +88,8 @@ class NumpyUnpickler(Unpickler): def load_build(self): """ This method is called to set the state of a knewly created - object. - + object. + We capture it to replace our place-holder objects, NDArrayWrapper, by the array we are interested in. We replace directly in the stack of pickler. @@ -116,7 +116,7 @@ class NumpyUnpickler(Unpickler): # Utility functions def dump(value, filename): - """ Persist an arbitrary Python object into a filename, with numpy arrays + """ Persist an arbitrary Python object into a filename, with numpy arrays saved as separate .npy files. See Also @@ -134,7 +134,7 @@ def dump(value, filename): def load(filename, mmap_mode=None): - """ Reconstruct a Python object and the numpy arrays it contains from + """ Reconstruct a Python object and the numpy arrays it contains from a persisted file. This function loads the numpy array files saved separately. If diff --git a/sklearn/externals/joblib/parallel.py b/sklearn/externals/joblib/parallel.py index 312f4ee9a8d6da82843db71151125b7f0c8d2471..5a66abff078a674293de771d959332a6b32d255e 100755 --- a/sklearn/externals/joblib/parallel.py +++ b/sklearn/externals/joblib/parallel.py @@ -54,7 +54,7 @@ class WorkerInterrupt(Exception): class SafeFunction(object): """ Wraps a function to make it exception with full traceback in their representation. - Useful for parallel computing with multiprocessing, for which + Useful for parallel computing with multiprocessing, for which exceptions cannot be captured. """ @@ -109,7 +109,7 @@ class ImmediateApply(object): ################################################################################ class CallBack(object): - """ Callback used by parallel: it is used for progress reporting, and + """ Callback used by parallel: it is used for progress reporting, and to add data to be processed """ def __init__(self, index, parallel): @@ -145,8 +145,8 @@ class CallBack(object): writer = sys.stdout.write writer('[%s]: Done %3i out of %s |elapsed: %s remaining: %s\n' % (self.parallel, - self.index+1, - total, + self.index+1, + total, short_format_time(elapsed_time), short_format_time(remaining_time), )) @@ -214,7 +214,7 @@ class Parallel(Logger): (0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5) >>> i (0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0) - + The progress meter:: >>> from time import sleep @@ -236,7 +236,7 @@ class Parallel(Logger): >>> Parallel(n_jobs=2)(delayed(atoi)(n) for n in ('1', '300', 30)) #doctest: +SKIP #... --------------------------------------------------------------------------- - Sub-process traceback: + Sub-process traceback: --------------------------------------------------------------------------- TypeError Fri Jul 2 20:32:05 2010 PID: 4151 Python 2.6.5: /usr/bin/python @@ -245,14 +245,14 @@ class Parallel(Logger): 398 is chosen from the leading characters of s, 0 for octal, 0x or 399 0X for hexadecimal. If base is 16, a preceding 0x or 0X is 400 accepted. - 401 + 401 402 """ --> 403 return _int(s, base) - 404 - 405 + 404 + 405 406 # Convert string to long integer 407 def atol(s, base=10): - + TypeError: int() can't convert non-string with explicit base ___________________________________________________________________________ @@ -269,7 +269,7 @@ class Parallel(Logger): ... for i in range(6): ... print 'Produced %s' % i ... yield i - + >>> out = Parallel(n_jobs=2, verbose=100, pre_dispatch='1.5*n_jobs')( ... delayed(sqrt)(i) for i in producer()) #doctest: +SKIP Produced 0 @@ -294,7 +294,7 @@ class Parallel(Logger): def dispatch(self, func, args, kwargs): - """ Queue the function for computing, with or without multiprocessing + """ Queue the function for computing, with or without multiprocessing """ if self._pool is None: job = ImmediateApply(func, args, kwargs) @@ -325,7 +325,7 @@ class Parallel(Logger): self._dispatch_amount += 1 while self._dispatch_amount: try: - # XXX: possible race condition shuffling the order of + # XXX: possible race condition shuffling the order of # dispatchs in the next two lines. func, args, kwargs = self._iterable.next() self.dispatch(func, args, kwargs) @@ -352,7 +352,7 @@ class Parallel(Logger): try: self._output.append(job.get()) except tuple(self.exceptions), exception: - if isinstance(exception, + if isinstance(exception, (KeyboardInterrupt, WorkerInterrupt)): # We have captured a user interruption, clean up # everything @@ -361,7 +361,7 @@ class Parallel(Logger): self._pool.terminate() raise exception elif isinstance(exception, TransportableException): - # Capture exception to add information on + # Capture exception to add information on # the local stack in addition to the distant # stack this_report = format_outer_frames( @@ -371,7 +371,7 @@ class Parallel(Logger): report = """Multiprocessing exception: %s --------------------------------------------------------------------------- -Sub-process traceback: +Sub-process traceback: --------------------------------------------------------------------------- %s""" % ( this_report, @@ -418,7 +418,7 @@ Sub-process traceback: try: for function, args, kwargs in iterable: self.dispatch(function, args, kwargs) - + self.retrieve() finally: if n_jobs > 1: diff --git a/sklearn/externals/joblib/test/common.py b/sklearn/externals/joblib/test/common.py index de6050d2d19d7f626737a70752e2c3b0494e705e..53894e3177b558a50dbd308f5786277fa467c7dd 100755 --- a/sklearn/externals/joblib/test/common.py +++ b/sklearn/externals/joblib/test/common.py @@ -11,7 +11,7 @@ try: """ return func -except ImportError: +except ImportError: def with_numpy(func): """ A decorator to skip tests requiring numpy. """ diff --git a/sklearn/externals/joblib/test/test_format_stack.py b/sklearn/externals/joblib/test/test_format_stack.py index 39ee599b0e7910061204e8468f47d7d7a99bbbae..31292ac69c66c476a0ce2f7f6fe7ce48d91398f5 100755 --- a/sklearn/externals/joblib/test/test_format_stack.py +++ b/sklearn/externals/joblib/test/test_format_stack.py @@ -1,8 +1,8 @@ """ -Unit tests for the stack formatting utilities +Unit tests for the stack formatting utilities """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2010 Gael Varoquaux # License: BSD Style, 3 clauses. diff --git a/sklearn/externals/joblib/test/test_func_inspect.py b/sklearn/externals/joblib/test/test_func_inspect.py index a52adf51f2878aa01aca1df4762da3eafa8f2fad..12872ace0108901452af5338cd610626bf7bff4b 100755 --- a/sklearn/externals/joblib/test/test_func_inspect.py +++ b/sklearn/externals/joblib/test/test_func_inspect.py @@ -2,7 +2,7 @@ Test the func_inspect module. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2009 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -14,7 +14,7 @@ from ..func_inspect import filter_args, get_func_name, get_func_code from ..memory import Memory ################################################################################ -# Module-level functions, for tests +# Module-level functions, for tests def f(x, y=0): pass @@ -65,7 +65,7 @@ def test_filter_args(): def test_filter_args_method(): obj = Klass() - nose.tools.assert_equal(filter_args(obj.f, [], 1), + nose.tools.assert_equal(filter_args(obj.f, [], 1), {'x': 1, 'self': obj}) @@ -81,24 +81,24 @@ def test_filter_varargs(): def test_filter_kwargs(): - nose.tools.assert_equal(filter_args(k, [], 1, 2, ee=2), + nose.tools.assert_equal(filter_args(k, [], 1, 2, ee=2), {'*': [1, 2], '**':{'ee':2}}) - nose.tools.assert_equal(filter_args(k, [], 3, 4), + nose.tools.assert_equal(filter_args(k, [], 3, 4), {'*': [3, 4], '**':{}}) def test_filter_args_2(): - nose.tools.assert_equal(filter_args(j, [], 1, 2, ee=2), + nose.tools.assert_equal(filter_args(j, [], 1, 2, ee=2), {'x': 1, 'y': 2, '**':{'ee':2}}) - + nose.tools.assert_raises(ValueError, filter_args, f, 'a', None) # Check that we capture an undefined argument nose.tools.assert_raises(ValueError, filter_args, f, ['a'], None) ff = functools.partial(f, 1) # filter_args has to special-case partial - nose.tools.assert_equal(filter_args(ff, [], 1), + nose.tools.assert_equal(filter_args(ff, [], 1), {'*': [1], '**':{}}) - nose.tools.assert_equal(filter_args(ff, ['y'], 1), + nose.tools.assert_equal(filter_args(ff, ['y'], 1), {'*': [1], '**':{}}) @@ -114,15 +114,15 @@ def test_func_inspect_errors(): nose.tools.assert_equal(get_func_name('a'.lower)[-1], 'lower') nose.tools.assert_equal(get_func_code('a'.lower)[1:], (None, -1)) ff = lambda x: x - nose.tools.assert_equal(get_func_name(ff, win_characters=False)[-1], + nose.tools.assert_equal(get_func_name(ff, win_characters=False)[-1], '<lambda>') - nose.tools.assert_equal(get_func_code(ff)[1], + nose.tools.assert_equal(get_func_code(ff)[1], __file__.replace('.pyc', '.py')) # Simulate a function defined in __main__ ff.__module__ = '__main__' - nose.tools.assert_equal(get_func_name(ff, win_characters=False)[-1], + nose.tools.assert_equal(get_func_name(ff, win_characters=False)[-1], '<lambda>') - nose.tools.assert_equal(get_func_code(ff)[1], + nose.tools.assert_equal(get_func_code(ff)[1], __file__.replace('.pyc', '.py')) @@ -133,7 +133,7 @@ def test_bound_methods(): """ a = Klass() b = Klass() - nose.tools.assert_not_equal(filter_args(a.f, [], 1), + nose.tools.assert_not_equal(filter_args(a.f, [], 1), filter_args(b.f, [], 1)) diff --git a/sklearn/externals/joblib/test/test_logger.py b/sklearn/externals/joblib/test/test_logger.py index ac85a89d14a5cbabafe00c84192014d898300611..187066360dacba8a895770bd3d1a6caae62e7fea 100755 --- a/sklearn/externals/joblib/test/test_logger.py +++ b/sklearn/externals/joblib/test/test_logger.py @@ -2,7 +2,7 @@ Test the logger module. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2009 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -28,7 +28,7 @@ def setup(): if os.path.exists(cachedir): shutil.rmtree(cachedir) env['dir'] = cachedir - + def teardown(): """ Test teardown. @@ -50,14 +50,14 @@ def test_print_time(): # Create a second time, to smoke test log rotation. print_time = PrintTime(logfile=os.path.join(env['dir'], 'test.log')) print_time('Foo') - # And a third time + # And a third time print_time = PrintTime(logfile=os.path.join(env['dir'], 'test.log')) print_time('Foo') printed_text = sys.stderr.getvalue() # Use regexps to be robust to time variations match = r"Foo: 0\..s, 0\.0min\nFoo: 0\..s, 0.0min\nFoo: .\..s, 0.0min\n" if not re.match(match, printed_text): - raise AssertionError('Excepted %s, got %s' % + raise AssertionError('Excepted %s, got %s' % (match, printed_text)) finally: sys.stderr = orig_stderr diff --git a/sklearn/externals/joblib/test/test_memory.py b/sklearn/externals/joblib/test/test_memory.py index 335ab45341d535c7877c504209d1c00c1cd03bc7..14fc3b1b5bdcc69e32ced74f5aca5e0f16f84be1 100755 --- a/sklearn/externals/joblib/test/test_memory.py +++ b/sklearn/externals/joblib/test/test_memory.py @@ -2,7 +2,7 @@ Test the memory module. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2009 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -44,7 +44,7 @@ def setup_module(): print 80*'_' print 'test_memory setup' print 80*'_' - + def _rmtree_onerror(func, path, excinfo): print '!'*79 print 'os function failed:', repr(func) @@ -169,7 +169,7 @@ def test_memory_name_collision(): @memory.cache def name_collision(x): - """ A first function called name_collision + """ A first function called name_collision """ return x @@ -177,7 +177,7 @@ def test_memory_name_collision(): @memory.cache def name_collision(x): - """ A second function called name_collision + """ A second function called name_collision """ return x @@ -221,7 +221,7 @@ def test_memory_warning_lambda_collisions(): def test_memory_warning_collision_detection(): - """ Check that collisions impossible to detect will raise appropriate + """ Check that collisions impossible to detect will raise appropriate warnings. """ memory = Memory(cachedir=env['dir'], verbose=0) @@ -272,7 +272,7 @@ def test_memory_eval(): def count_and_append(x=[]): - """ A function with a side effect in its arguments. + """ A function with a side effect in its arguments. Return the lenght of its argument and append one element. """ @@ -316,7 +316,7 @@ def test_memory_numpy(): def test_memory_exception(): - """ Smoketest the exception handling of Memory. + """ Smoketest the exception handling of Memory. """ memory = Memory(cachedir=env['dir'], verbose=0) class MyException(Exception): diff --git a/sklearn/externals/joblib/test/test_my_exceptions.py b/sklearn/externals/joblib/test/test_my_exceptions.py index 0728a599babe738729f256e173b88dd9a3be84d1..3c9202e57d9db1ed8f8c3245c005b3087d12552d 100755 --- a/sklearn/externals/joblib/test/test_my_exceptions.py +++ b/sklearn/externals/joblib/test/test_my_exceptions.py @@ -7,7 +7,7 @@ from .. import my_exceptions def test_inheritance(): assert_true(isinstance(my_exceptions.JoblibNameError(), NameError)) - assert_true(isinstance(my_exceptions.JoblibNameError(), + assert_true(isinstance(my_exceptions.JoblibNameError(), my_exceptions.JoblibException)) assert_true(my_exceptions.JoblibNameError is my_exceptions._mk_exception(NameError)[0]) diff --git a/sklearn/externals/joblib/test/test_numpy_pickle.py b/sklearn/externals/joblib/test/test_numpy_pickle.py index 6182c89adc99b4b200f26d8ca700604464bd76ff..0d8175c5105fb40abb0dd3ad7ab9f16a895f7412 100755 --- a/sklearn/externals/joblib/test/test_numpy_pickle.py +++ b/sklearn/externals/joblib/test/test_numpy_pickle.py @@ -113,7 +113,7 @@ def test_numpy_persistence(): for item in obj_: yield nose.tools.assert_true, isinstance(item, np.ndarray) # And finally, check that all the values are equal. - yield nose.tools.assert_true, np.all(np.array(obj) == + yield nose.tools.assert_true, np.all(np.array(obj) == np.array(obj_)) diff --git a/sklearn/externals/joblib/test/test_parallel.py b/sklearn/externals/joblib/test/test_parallel.py index 834d7273cb9cfd6525d0cd35b6b45e2819a2cf9f..2599acb393439c3a178716305fcaa0591e02449e 100755 --- a/sklearn/externals/joblib/test/test_parallel.py +++ b/sklearn/externals/joblib/test/test_parallel.py @@ -2,7 +2,7 @@ Test the parallel module. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2010-2011 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -61,12 +61,12 @@ def test_parallel_kwargs(): """ lst = range(10) for n_jobs in (1, 4): - yield (nose.tools.assert_equal, - [f(x, y=1) for x in lst], + yield (nose.tools.assert_equal, + [f(x, y=1) for x in lst], Parallel(n_jobs=n_jobs)(delayed(f)(x, y=1) for x in lst) ) - + def test_parallel_pickling(): """ Check that pmap captures the errors when it is passed an object that cannot be pickled. @@ -74,7 +74,7 @@ def test_parallel_pickling(): def g(x): return x**2 nose.tools.assert_raises(PickleError, - Parallel(), + Parallel(), (delayed(g)(x) for x in range(10)) ) @@ -135,12 +135,12 @@ def test_dispatch_one_job(): yield i Parallel(n_jobs=1)(delayed(consumer)(queue, x) for x in producer()) - nose.tools.assert_equal(queue, - ['Produced 0', 'Consumed 0', - 'Produced 1', 'Consumed 1', - 'Produced 2', 'Consumed 2', - 'Produced 3', 'Consumed 3', - 'Produced 4', 'Consumed 4', + nose.tools.assert_equal(queue, + ['Produced 0', 'Consumed 0', + 'Produced 1', 'Consumed 1', + 'Produced 2', 'Consumed 2', + 'Produced 3', 'Consumed 3', + 'Produced 4', 'Consumed 4', 'Produced 5', 'Consumed 5'] ) nose.tools.assert_equal(len(queue), 12) @@ -161,8 +161,8 @@ def test_dispatch_multiprocessing(): Parallel(n_jobs=2, pre_dispatch=3)(delayed(consumer)(queue, i) for i in producer()) - nose.tools.assert_equal(list(queue)[:4], - ['Produced 0', 'Produced 1', 'Produced 2', + nose.tools.assert_equal(list(queue)[:4], + ['Produced 0', 'Produced 1', 'Produced 2', 'Consumed 0', ]) nose.tools.assert_equal(len(queue), 12) diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py index 4d4771665af1b3e6832b28719bdeff3f022a699c..4214225e1cf8a6965a7c52fff3f1de666b375254 100644 --- a/sklearn/feature_selection/tests/test_feature_select.py +++ b/sklearn/feature_selection/tests/test_feature_select.py @@ -30,9 +30,9 @@ def test_f_classif(): Test whether the F test yields meaningful results on a simple simulated classification problem """ - X, Y = make_classification(n_samples=200, n_features=20, - n_informative=3, n_redundant=2, - n_repeated=0, n_classes=8, + X, Y = make_classification(n_samples=200, n_features=20, + n_informative=3, n_redundant=2, + n_repeated=0, n_classes=8, n_clusters_per_class=1, flip_y=0.0, class_sep=10, shuffle=False, random_state=0) @@ -49,7 +49,7 @@ def test_f_regression(): Test whether the F test yields meaningful results on a simple simulated regression problem """ - X, Y = make_regression(n_samples=200, n_features=20, + X, Y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0) F, pv = f_regression(X, Y) @@ -65,9 +65,9 @@ def test_f_classif_multi_class(): Test whether the F test yields meaningful results on a simple simulated classification problem """ - X, Y = make_classification(n_samples=200, n_features=20, - n_informative=3, n_redundant=2, - n_repeated=0, n_classes=8, + X, Y = make_classification(n_samples=200, n_features=20, + n_informative=3, n_redundant=2, + n_repeated=0, n_classes=8, n_clusters_per_class=1, flip_y=0.0, class_sep=10, shuffle=False, random_state=0) @@ -85,9 +85,9 @@ def test_select_percentile_classif(): gets the correct items in a simple classification problem with the percentile heuristic """ - X, Y = make_classification(n_samples=200, n_features=20, - n_informative=3, n_redundant=2, - n_repeated=0, n_classes=8, + X, Y = make_classification(n_samples=200, n_features=20, + n_informative=3, n_redundant=2, + n_repeated=0, n_classes=8, n_clusters_per_class=1, flip_y=0.0, class_sep=10, shuffle=False, random_state=0) @@ -110,9 +110,9 @@ def test_select_kbest_classif(): gets the correct items in a simple classification problem with the k best heuristic """ - X, Y = make_classification(n_samples=200, n_features=20, - n_informative=3, n_redundant=2, - n_repeated=0, n_classes=8, + X, Y = make_classification(n_samples=200, n_features=20, + n_informative=3, n_redundant=2, + n_repeated=0, n_classes=8, n_clusters_per_class=1, flip_y=0.0, class_sep=10, shuffle=False, random_state=0) @@ -133,9 +133,9 @@ def test_select_fpr_classif(): gets the correct items in a simple classification problem with the fpr heuristic """ - X, Y = make_classification(n_samples=200, n_features=20, - n_informative=3, n_redundant=2, - n_repeated=0, n_classes=8, + X, Y = make_classification(n_samples=200, n_features=20, + n_informative=3, n_redundant=2, + n_repeated=0, n_classes=8, n_clusters_per_class=1, flip_y=0.0, class_sep=10, shuffle=False, random_state=0) @@ -156,9 +156,9 @@ def test_select_fdr_classif(): gets the correct items in a simple classification problem with the fpr heuristic """ - X, Y = make_classification(n_samples=200, n_features=20, - n_informative=3, n_redundant=2, - n_repeated=0, n_classes=8, + X, Y = make_classification(n_samples=200, n_features=20, + n_informative=3, n_redundant=2, + n_repeated=0, n_classes=8, n_clusters_per_class=1, flip_y=0.0, class_sep=10, shuffle=False, random_state=0) @@ -179,9 +179,9 @@ def test_select_fwe_classif(): gets the correct items in a simple classification problem with the fpr heuristic """ - X, Y = make_classification(n_samples=200, n_features=20, - n_informative=3, n_redundant=2, - n_repeated=0, n_classes=8, + X, Y = make_classification(n_samples=200, n_features=20, + n_informative=3, n_redundant=2, + n_repeated=0, n_classes=8, n_clusters_per_class=1, flip_y=0.0, class_sep=10, shuffle=False, random_state=0) @@ -205,7 +205,7 @@ def test_select_percentile_regression(): gets the correct items in a simple regression problem with the percentile heuristic """ - X, Y = make_regression(n_samples=200, n_features=20, + X, Y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0) univariate_filter = SelectPercentile(f_regression, percentile=25) @@ -227,7 +227,7 @@ def test_select_percentile_regression_full(): Test whether the relative univariate feature selection selects all features when '100%' is asked. """ - X, Y = make_regression(n_samples=200, n_features=20, + X, Y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0) univariate_filter = SelectPercentile(f_regression, percentile=100) @@ -246,7 +246,7 @@ def test_select_kbest_regression(): gets the correct items in a simple regression problem with the k best heuristic """ - X, Y = make_regression(n_samples=200, n_features=20, + X, Y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0) univariate_filter = SelectKBest(f_regression, k=5) @@ -266,7 +266,7 @@ def test_select_fpr_regression(): gets the correct items in a simple regression problem with the fpr heuristic """ - X, Y = make_regression(n_samples=200, n_features=20, + X, Y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0) univariate_filter = SelectFpr(f_regression, alpha=0.01) @@ -287,7 +287,7 @@ def test_select_fdr_regression(): gets the correct items in a simple regression problem with the fdr heuristic """ - X, Y = make_regression(n_samples=200, n_features=20, + X, Y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0) univariate_filter = SelectFdr(f_regression, alpha=0.01) @@ -307,7 +307,7 @@ def test_select_fwe_regression(): gets the correct items in a simple regression problem with the fwe heuristic """ - X, Y = make_regression(n_samples=200, n_features=20, + X, Y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0) univariate_filter = SelectFwe(f_regression, alpha=0.01) diff --git a/sklearn/feature_selection/univariate_selection.py b/sklearn/feature_selection/univariate_selection.py index 3350795ea2f90c9269dcc5e981e5daedd7002d85..2b9c9ea6def8a32584f5dc7a6eccb0c84b23970f 100644 --- a/sklearn/feature_selection/univariate_selection.py +++ b/sklearn/feature_selection/univariate_selection.py @@ -379,7 +379,7 @@ class SelectFdr(_AbstractUnivariateFilter): class SelectFwe(_AbstractUnivariateFilter): """ - Filter : Select the p-values corresponding to Family-wise error rate: a + Filter : Select the p-values corresponding to Family-wise error rate: a corrected p-value of alpha """ diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 1c92572cb5273c4cdeaa65cdd90bf15898d033ea..c4a756ca2eac9a9b9f549443991156f4d36f7e6e 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -182,7 +182,7 @@ class GridSearchCV(BaseEstimator): cv : integer or crossvalidation generator, optional If an integer is passed, it is the number of fold (default 3). - Specific crossvalidation objects can be passed, see + Specific crossvalidation objects can be passed, see sklearn.cross_validation module for the list of possible objects refit: boolean diff --git a/sklearn/linear_model/bayes.py b/sklearn/linear_model/bayes.py index 374f6fcf31374376c48b115ccf464ab57b070001..ba2b2f157c5c9e37da1da4b49e7ec5962bca30d1 100644 --- a/sklearn/linear_model/bayes.py +++ b/sklearn/linear_model/bayes.py @@ -120,7 +120,7 @@ class BayesianRidge(LinearModel): fit_intercept=True, normalize=False, overwrite_X=False, verbose=False): self.n_iter = n_iter - self.tol = tol + self.tol = tol self.alpha_1 = alpha_1 self.alpha_2 = alpha_2 self.lambda_1 = lambda_1 diff --git a/sklearn/manifold/tests/test_locally_linear.py b/sklearn/manifold/tests/test_locally_linear.py index 1a89516d623458f657edf623006265f9e8e45ec2..9344bff69aed37a3fa72979c64a7df151c1718e0 100644 --- a/sklearn/manifold/tests/test_locally_linear.py +++ b/sklearn/manifold/tests/test_locally_linear.py @@ -62,7 +62,7 @@ def test_lle_manifold(): assert_lower(reconstruction_error, tol) for solver in eigen_solvers: - clf.set_params(eigen_solver=solver) + clf.set_params(eigen_solver=solver) clf.fit(X) assert clf.embedding_.shape[1] == out_dim reconstruction_error = np.linalg.norm( diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 667ec7533cafc1e4483f39e32ed05faadc4396a3..2c513bbdb86dfb261d0f15e1298d92ec75e29d7d 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -155,8 +155,8 @@ def euclidean_distances(X, Y=None, Y_norm_squared=None, squared=False): distances = np.maximum(distances, 0) if X is Y: - # Ensure that distances between vectors and themselves are set to 0.0. - # This may not be the case due to floating point rounding errors. + # Ensure that distances between vectors and themselves are set to 0.0. + # This may not be the case due to floating point rounding errors. distances.flat[::distances.shape[0] + 1] = 0.0 return distances if squared else np.sqrt(distances) diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py index fd5f3335e8881da7e2b81fa8ce310c4a7c352786..57ed8655358ae8e426de73ef8627337c2b063eb8 100644 --- a/sklearn/svm/tests/test_sparse.py +++ b/sklearn/svm/tests/test_sparse.py @@ -133,7 +133,7 @@ def test_weight(): Test class weights """ - X_, y_ = make_classification(n_samples=200, n_features=100, + X_, y_ = make_classification(n_samples=200, n_features=100, weights=[0.833, 0.167], random_state=0) X_ = scipy.sparse.csr_matrix(X_) diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py index 2f9d42bd7f05c104f3389aafa790b724da3b818f..17813589a5e62209663d0ded3ad0a9e5cc3b26e7 100644 --- a/sklearn/svm/tests/test_svm.py +++ b/sklearn/svm/tests/test_svm.py @@ -254,9 +254,9 @@ def test_weight(): # so all predicted values belong to class 2 assert_array_almost_equal(clf.predict(X), [2] * 6) - X_, y_ = make_classification(n_samples=200, n_features=100, + X_, y_ = make_classification(n_samples=200, n_features=100, weights=[0.833, 0.167], random_state=0) - + for clf in (linear_model.LogisticRegression(), svm.LinearSVC(), svm.SVC()): clf.fit(X_[: 180], y_[: 180], class_weight={0: 5}) y_pred = clf.predict(X_[180:]) diff --git a/sklearn/utils/setup.py b/sklearn/utils/setup.py index be9d635020f20f818358fbc765e701774f6f526b..da5909ffa2cf33fe513f82da5eeb378e59137d9f 100644 --- a/sklearn/utils/setup.py +++ b/sklearn/utils/setup.py @@ -31,7 +31,7 @@ def configuration(parent_package='', top_path=None): **blas_info ) - + config.add_extension('graph_shortest_path', sources=['graph_shortest_path.c'], include_dirs=[numpy.get_include()]) diff --git a/sklearn/utils/tests/test_svd.py b/sklearn/utils/tests/test_svd.py index 2c10e5c43de519a464ddb482dfb55197d688dd13..41f26e4f30252456bbd801edcebcc066df50c69b 100644 --- a/sklearn/utils/tests/test_svd.py +++ b/sklearn/utils/tests/test_svd.py @@ -21,7 +21,7 @@ def test_fast_svd_low_rank(): # generate a matrix X of approximate effective rank `rank` and no noise # component (very structured signal): - X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features, + X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features, effective_rank=rank, tail_strength=0.0, random_state=0) assert_equal(X.shape, (n_samples, n_features)) @@ -58,7 +58,7 @@ def test_fast_svd_low_rank_with_noise(): # generate a matrix X wity structure approximate rank `rank` and an # important noisy component - X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features, + X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features, effective_rank=rank, tail_strength=0.5, random_state=0) assert_equal(X.shape, (n_samples, n_features)) @@ -89,7 +89,7 @@ def test_fast_svd_infinite_rank(): # let us try again without 'low_rank component': just regularly but slowly # decreasing singular values: the rank of the data matrix is infinite - X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features, + X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features, effective_rank=rank, tail_strength=1.0, random_state=0) assert_equal(X.shape, (n_samples, n_features)) @@ -119,7 +119,7 @@ def test_fast_svd_transpose_consistency(): rank = 4 k = 10 - X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features, + X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features, effective_rank=rank, tail_strength=0.5, random_state=0) assert_equal(X.shape, (n_samples, n_features))