From 9d5e16b6668db32f6f2346d809f27af2077a381a Mon Sep 17 00:00:00 2001 From: Olivier Grisel <olivier.grisel@ensta.org> Date: Sat, 6 Nov 2010 15:42:55 +0100 Subject: [PATCH] trailing spaces holocaust --- doc/conf.py | 2 +- doc/sphinxext/gen_rst.py | 14 ++-- doc/sphinxext/numpy_ext_old/docscrape.py | 10 +-- doc/sphinxext/numpy_ext_old/numpydoc.py | 2 +- examples/cluster/plot_segmentation_toy.py | 4 +- examples/feature_selection_pipeline.py | 2 +- examples/gmm/plot_gmm_classifier.py | 6 +- examples/gmm/plot_gmm_pdf.py | 4 +- .../mlcomp_sparse_document_classification.py | 2 +- examples/plot_classification_probability.py | 4 +- examples/plot_lda_qda.py | 2 +- examples/sgd/plot_loss_functions.py | 2 +- examples/sgd/plot_separating_hyperplane.py | 2 +- examples/svm/plot_custom_kernel.py | 4 +- scikits/learn/__init__.py | 6 +- scikits/learn/base.py | 14 ++-- scikits/learn/benchmarks/bench_balltree.py | 24 +++---- .../learn/benchmarks/bench_plot_balltree.py | 2 +- .../learn/cluster/affinity_propagation_.py | 4 +- scikits/learn/cluster/mean_shift_.py | 4 +- scikits/learn/cluster/spectral.py | 22 +++---- scikits/learn/cluster/tests/common.py | 6 +- scikits/learn/datasets/base.py | 5 +- scikits/learn/datasets/samples_generator.py | 2 +- scikits/learn/externals/joblib/__init__.py | 14 ++-- scikits/learn/externals/joblib/disk.py | 6 +- .../learn/externals/joblib/format_stack.py | 22 +++---- .../learn/externals/joblib/func_inspect.py | 26 ++++---- scikits/learn/externals/joblib/hashing.py | 14 ++-- scikits/learn/externals/joblib/logger.py | 6 +- scikits/learn/externals/joblib/memory.py | 64 +++++++++---------- .../learn/externals/joblib/my_exceptions.py | 4 +- .../learn/externals/joblib/numpy_pickle.py | 12 ++-- scikits/learn/externals/joblib/parallel.py | 38 +++++------ scikits/learn/externals/joblib/test/common.py | 2 +- .../joblib/test/test_format_stack.py | 4 +- .../joblib/test/test_func_inspect.py | 10 +-- .../externals/joblib/test/test_hashing.py | 10 +-- .../externals/joblib/test/test_logger.py | 6 +- .../externals/joblib/test/test_memory.py | 14 ++-- .../joblib/test/test_numpy_pickle.py | 2 +- .../externals/joblib/test/test_parallel.py | 10 +-- scikits/learn/feature_extraction/image.py | 6 +- .../feature_extraction/tests/test_image.py | 2 +- .../tests/test_feature_select.py | 24 +++---- scikits/learn/glm/bayes.py | 2 +- scikits/learn/glm/benchmarks/bench_bayes.py | 4 +- scikits/learn/glm/benchmarks/bench_glm.py | 4 +- scikits/learn/glm/tests/test_ridge.py | 8 +-- scikits/learn/hmm.py | 8 +-- scikits/learn/lda.py | 2 +- scikits/learn/neighbors.py | 4 +- scikits/learn/pca.py | 10 +-- scikits/learn/sgd/base.py | 4 +- scikits/learn/sgd/setup.py | 2 +- scikits/learn/sgd/sparse/sgd.py | 12 ++-- scikits/learn/sgd/tests/test_sparse.py | 4 +- scikits/learn/svm/sparse/libsvm.py | 4 +- scikits/learn/tests/test_base.py | 14 ++-- scikits/learn/tests/test_cross_val.py | 2 +- scikits/learn/tests/test_fastica.py | 12 ++-- scikits/learn/tests/test_gmm.py | 2 +- scikits/learn/tests/test_hmm.py | 2 +- scikits/learn/tests/test_neighbors.py | 2 +- scikits/learn/tests/test_pipeline.py | 6 +- scikits/learn/utils/_csgraph.py | 6 +- scikits/learn/utils/bench.py | 2 +- scikits/learn/utils/fixes.py | 6 +- scikits/learn/utils/graph.py | 12 ++-- scikits/learn/utils/sparsetools/setup.py | 2 +- setup.py | 4 +- 71 files changed, 292 insertions(+), 293 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 13767f4afa..ceb2f7adc1 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -22,7 +22,7 @@ sys.path.insert(0, os.path.abspath('sphinxext')) # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.autosummary', +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'sphinx.ext.pngmath', 'gen_rst'] try: diff --git a/doc/sphinxext/gen_rst.py b/doc/sphinxext/gen_rst.py index 3deadb9a2b..b575bf8d87 100644 --- a/doc/sphinxext/gen_rst.py +++ b/doc/sphinxext/gen_rst.py @@ -16,7 +16,7 @@ fileList = [] import matplotlib matplotlib.use('Agg') -import token, tokenize +import token, tokenize rst_template = """ @@ -59,14 +59,14 @@ def extract_docstring(filename): first_par = '' tokens = tokenize.generate_tokens(lines.__iter__().next) for tok_type, tok_content, _, (erow, _), _ in tokens: - tok_type = token.tok_name[tok_type] + tok_type = token.tok_name[tok_type] if tok_type in ('NEWLINE', 'COMMENT', 'NL', 'INDENT', 'DEDENT'): continue elif tok_type == 'STRING': docstring = eval(tok_content) # If the docstring is formatted with several paragraphs, extract # the first one: - paragraphs = '\n'.join(line.rstrip() + paragraphs = '\n'.join(line.rstrip() for line in docstring.split('\n')).split('\n\n') if len(paragraphs) > 0: first_par = paragraphs[0] @@ -77,7 +77,7 @@ def extract_docstring(filename): def generate_example_rst(app): """ Generate the list of examples, as well as the contents of examples. - """ + """ root_dir = os.path.join(app.builder.srcdir, 'auto_examples') example_dir = os.path.abspath(app.builder.srcdir + '/../' + 'examples') if not os.path.exists(example_dir): @@ -114,7 +114,7 @@ def generate_dir_rst(dir, fhindex, example_dir, root_dir): target_dir = os.path.join(root_dir, dir) src_dir = os.path.join(example_dir, dir) if not os.path.exists(os.path.join(src_dir, 'README.txt')): - raise IOError('Example directory %s does not have a README.txt file' + raise IOError('Example directory %s does not have a README.txt file' % src_dir) fhindex.write(""" @@ -126,7 +126,7 @@ def generate_dir_rst(dir, fhindex, example_dir, root_dir): if not os.path.exists(target_dir): os.makedirs(target_dir) for fname in sorted(os.listdir(src_dir)): - if fname.endswith('py'): + if fname.endswith('py'): generate_file_rst(fname, target_dir, src_dir) fhindex.write(' %s\n' % (os.path.join(dir, fname[:-3]))) @@ -154,7 +154,7 @@ def generate_file_rst(fname, target_dir, src_dir): os.makedirs(os.path.join(target_dir, 'images')) image_file = os.path.join(target_dir, 'images', image_name) if (not os.path.exists(image_file) or - os.stat(image_file).st_mtime <= + os.stat(image_file).st_mtime <= os.stat(src_file).st_mtime): print 'plotting %s' % fname import matplotlib.pyplot as plt diff --git a/doc/sphinxext/numpy_ext_old/docscrape.py b/doc/sphinxext/numpy_ext_old/docscrape.py index 904270a52a..a4b7c21bc8 100644 --- a/doc/sphinxext/numpy_ext_old/docscrape.py +++ b/doc/sphinxext/numpy_ext_old/docscrape.py @@ -183,7 +183,7 @@ class NumpyDocString(object): return params - + _name_rgx = re.compile(r"^\s*(:(?P<role>\w+):`(?P<name>[a-zA-Z0-9_.-]+)`|" r" (?P<name2>[a-zA-Z0-9_.-]+))\s*", re.X) def _parse_see_also(self, content): @@ -216,7 +216,7 @@ class NumpyDocString(object): current_func = None rest = [] - + for line in content: if not line.strip(): continue @@ -258,7 +258,7 @@ class NumpyDocString(object): if len(line) > 2: out[line[1]] = strip_each_in(line[2].split(',')) return out - + def _parse_summary(self): """Grab signature (if given) and summary""" if self._is_at_section(): @@ -275,7 +275,7 @@ class NumpyDocString(object): if not self._is_at_section(): self['Extended Summary'] = self._read_to_next_section() - + def _parse(self): self._doc.reset() self._parse_summary() @@ -438,7 +438,7 @@ class FunctionDoc(NumpyDocString): else: func = self._f return func, func_name - + def __str__(self): out = '' diff --git a/doc/sphinxext/numpy_ext_old/numpydoc.py b/doc/sphinxext/numpy_ext_old/numpydoc.py index 2ea41fbb7a..5e979ea9c8 100644 --- a/doc/sphinxext/numpy_ext_old/numpydoc.py +++ b/doc/sphinxext/numpy_ext_old/numpydoc.py @@ -83,7 +83,7 @@ def initialize(app): def setup(app, get_doc_object_=get_doc_object): global get_doc_object get_doc_object = get_doc_object_ - + app.connect('autodoc-process-docstring', mangle_docstrings) app.connect('builder-inited', initialize) app.add_config_value('numpydoc_edit_link', None, True) diff --git a/examples/cluster/plot_segmentation_toy.py b/examples/cluster/plot_segmentation_toy.py index 26eefccd73..20f98ef79d 100644 --- a/examples/cluster/plot_segmentation_toy.py +++ b/examples/cluster/plot_segmentation_toy.py @@ -4,12 +4,12 @@ Spectral clustering for image segmentation =========================================== In this example, an image with connected circles is generated and -spectral clustering is used to separate the circles. +spectral clustering is used to separate the circles. In these settings, the spectral clustering approach solves the problem know as 'normalized graph cuts': the image is seen as a graph of connected voxels, and the spectral clustering algorithm amounts to -choosing graph cuts defining regions while minimizing the ratio of the +choosing graph cuts defining regions while minimizing the ratio of the gradient along the cut, and the volume of the region. As the algorithm tries to balance the volume (ie balance the region diff --git a/examples/feature_selection_pipeline.py b/examples/feature_selection_pipeline.py index 8daf0b82c4..46aeedd174 100644 --- a/examples/feature_selection_pipeline.py +++ b/examples/feature_selection_pipeline.py @@ -17,7 +17,7 @@ from scikits.learn.pipeline import Pipeline X, y = samples_generator.test_dataset_classif(k=5) # ANOVA SVM-C -# 1) anova filter, take 5 best ranked features +# 1) anova filter, take 5 best ranked features anova_filter = SelectKBest(f_regression, k=5) # 2) svm clf = svm.SVC(kernel='linear') diff --git a/examples/gmm/plot_gmm_classifier.py b/examples/gmm/plot_gmm_classifier.py index cab5894faa..edf7d12e2d 100644 --- a/examples/gmm/plot_gmm_classifier.py +++ b/examples/gmm/plot_gmm_classifier.py @@ -15,7 +15,7 @@ overfitting on small datasets and does not generalize well to held out test data. On the plots, train data is shown as dots, while test data is shown as -crosses. The iris dataset is four-dimensional. Only the first two +crosses. The iris dataset is four-dimensional. Only the first two dimensions are shown here, and thus some points are separated in other dimensions. """ @@ -88,7 +88,7 @@ for index, (name, classifier) in enumerate(classifiers.iteritems()): for n, color in enumerate('rgb'): data = iris.data[iris.target == n] - pl.scatter(data[:,0], data[:, 1], 0.8, color=color, + pl.scatter(data[:,0], data[:, 1], 0.8, color=color, label=iris.target_names[n]) # Plot the test data with crosses for n, color in enumerate('rgb'): @@ -110,6 +110,6 @@ for index, (name, classifier) in enumerate(classifiers.iteritems()): pl.title(name) pl.legend(loc='lower right', prop=dict(size=12)) - + pl.show() diff --git a/examples/gmm/plot_gmm_pdf.py b/examples/gmm/plot_gmm_pdf.py index 8ef4ea041d..cec1ec0df7 100644 --- a/examples/gmm/plot_gmm_pdf.py +++ b/examples/gmm/plot_gmm_pdf.py @@ -23,8 +23,8 @@ X_train = np.r_[np.dot(np.random.randn(n_samples, 2), C), clf = gmm.GMM(n_states=2, cvtype='full') clf.fit(X_train) -x = np.linspace(-20.0, 30.0) -y = np.linspace(-20.0, 40.0) +x = np.linspace(-20.0, 30.0) +y = np.linspace(-20.0, 40.0) X, Y = np.meshgrid(x, y) XX = np.c_[X.ravel(), Y.ravel()] Z = np.log(-clf.eval(XX)[0]) diff --git a/examples/mlcomp_sparse_document_classification.py b/examples/mlcomp_sparse_document_classification.py index b10570bdf8..4413eded26 100644 --- a/examples/mlcomp_sparse_document_classification.py +++ b/examples/mlcomp_sparse_document_classification.py @@ -19,7 +19,7 @@ Once downloaded unzip the arhive somewhere on your filesystem. For instance in:: % unzip /path/to/dataset-379-20news-18828_XXXXX.zip You should get a folder ``~/data/mlcomp/379`` with a file named ``metadata`` and -subfolders ``raw``, ``train`` and ``test`` holding the text documents organized +subfolders ``raw``, ``train`` and ``test`` holding the text documents organized by newsgroups. Then set the ``MLCOMP_DATASETS_HOME`` environment variable pointing to diff --git a/examples/plot_classification_probability.py b/examples/plot_classification_probability.py index 9688166153..fffac1ff79 100644 --- a/examples/plot_classification_probability.py +++ b/examples/plot_classification_probability.py @@ -62,12 +62,12 @@ for index, (name, classifier) in enumerate(classifiers.iteritems()): pl.title("Class %d" % k) if k == 0: pl.ylabel(name) - imshow_handle = pl.imshow(probas[:, k].reshape((100, 100)), + imshow_handle = pl.imshow(probas[:, k].reshape((100, 100)), extent=(3, 9, 1, 5), origin='lower') pl.xticks(()) pl.yticks(()) idx = (y_pred == k) - if idx.any(): + if idx.any(): pl.scatter(X[idx, 0], X[idx, 1], marker='o', c='k') ax = pl.axes([0.15, 0.04, 0.7, 0.05]) diff --git a/examples/plot_lda_qda.py b/examples/plot_lda_qda.py index a054b39181..9e035bb3ab 100644 --- a/examples/plot_lda_qda.py +++ b/examples/plot_lda_qda.py @@ -68,7 +68,7 @@ def plot_data(lda, X, y, y_pred, fig_index): xmin, xmax = X[:, 0].min(), X[:, 0].max() ymin, ymax = X[:, 1].min(), X[:, 1].max() - # class 0: dots + # class 0: dots pl.plot(X0_tp[:, 0], X0_tp[:, 1], 'o', color='red') pl.plot(X0_fp[:, 0], X0_fp[:, 1], '.', color='#990000') # dark red diff --git a/examples/sgd/plot_loss_functions.py b/examples/sgd/plot_loss_functions.py index 3cdd603434..814fa84217 100644 --- a/examples/sgd/plot_loss_functions.py +++ b/examples/sgd/plot_loss_functions.py @@ -3,7 +3,7 @@ SGD: Convex Loss Functions ========================== -Plot the convex loss functions supported by `scikits.learn.sgd`. +Plot the convex loss functions supported by `scikits.learn.sgd`. """ print __doc__ diff --git a/examples/sgd/plot_separating_hyperplane.py b/examples/sgd/plot_separating_hyperplane.py index 0c540d4b56..56d0dd2cdc 100644 --- a/examples/sgd/plot_separating_hyperplane.py +++ b/examples/sgd/plot_separating_hyperplane.py @@ -9,7 +9,7 @@ trained using SGD. NOTE: Due to the decreased learning rate of the intercept the hyperplane is forced to run through the origin, thus, -the plotted HP is not the max margin HP. +the plotted HP is not the max margin HP. """ print __doc__ diff --git a/examples/svm/plot_custom_kernel.py b/examples/svm/plot_custom_kernel.py index 872d7e32e9..a33670f581 100644 --- a/examples/svm/plot_custom_kernel.py +++ b/examples/svm/plot_custom_kernel.py @@ -30,11 +30,11 @@ def my_kernel(x, y): """ M = np.array([[2, 0], [0, 1.0]]) return np.dot(np.dot(x, M), y.T) - + h=.02 # step size in the mesh -# we create an instance of SVM and fit out data. +# we create an instance of SVM and fit out data. clf = svm.SVC(kernel=my_kernel) clf.fit(X, Y) diff --git a/scikits/learn/__init__.py b/scikits/learn/__init__.py index 34ff317e21..cd4b9d65b9 100644 --- a/scikits/learn/__init__.py +++ b/scikits/learn/__init__.py @@ -32,19 +32,19 @@ try: class NoseTester(nosetester.NoseTester): """ Subclass numpy's NoseTester to add doctests by default """ - def test(self, label='fast', verbose=1, extra_argv=['--exe'], + def test(self, label='fast', verbose=1, extra_argv=['--exe'], doctests=True, coverage=False): return super(NoseTester, self).test(label=label, verbose=verbose, extra_argv=extra_argv, doctests=doctests, coverage=coverage) - + test = NoseTester().test del nosetester except: pass __all__ = ['cross_val', 'ball_tree', 'cluster', 'covariance', 'datasets', 'gmm', 'glm', - 'logistic', 'lda', 'metrics', 'svm', 'features', 'clone', + 'logistic', 'lda', 'metrics', 'svm', 'features', 'clone', 'test', 'sgd'] __version__ = '0.6.git' diff --git a/scikits/learn/base.py b/scikits/learn/base.py index 58db218cf4..bf2219f754 100644 --- a/scikits/learn/base.py +++ b/scikits/learn/base.py @@ -46,7 +46,7 @@ def clone(estimator, safe=True): for name, param in new_object_params.iteritems(): new_object_params[name] = clone(param, safe=False) new_object = klass(**new_object_params) - + return new_object @@ -69,7 +69,7 @@ def _pprint(params, offset=0, printer=repr): np.set_printoptions(precision=5, threshold=64, edgeitems=2) params_list = list() this_line_length = offset - line_sep = ',\n' + (1+offset/2)*' ' + line_sep = ',\n' + (1 + offset / 2) * ' ' for i, (k, v) in enumerate(params.iteritems()): if type(v) is float: # use str for representing floating point numbers @@ -79,7 +79,7 @@ def _pprint(params, offset=0, printer=repr): else: # use repr of the rest this_repr = '%s=%s' % (k, printer(v)) - if i > 0: + if i > 0: if (this_line_length + len(this_repr) >= 75 or '\n' in this_repr): params_list.append(line_sep) @@ -94,7 +94,7 @@ def _pprint(params, offset=0, printer=repr): lines = ''.join(params_list) # Strip trailing space to avoid nightmare in doctests lines = '\n'.join(l.rstrip(' ') for l in lines.split('\n')) - return lines + return lines ################################################################################ @@ -109,7 +109,7 @@ class BaseEstimator(object): """ - @classmethod + @classmethod def _get_param_names(cls): """ Get parameter names for the estimator """ @@ -156,7 +156,7 @@ class BaseEstimator(object): """ if not params: # Simple optimisation to gain speed (inspect is slow) - return + return valid_params = self._get_params(deep=True) for key, value in params.iteritems(): split = key.split('__', 1) @@ -263,6 +263,6 @@ def _get_sub_estimator(estimator): def is_classifier(estimator): """ Returns True if the given estimator is (probably) a classifier. """ - estimator = _get_sub_estimator(estimator) + estimator = _get_sub_estimator(estimator) return isinstance(estimator, ClassifierMixin) diff --git a/scikits/learn/benchmarks/bench_balltree.py b/scikits/learn/benchmarks/bench_balltree.py index edc02a559f..100d3e28b0 100644 --- a/scikits/learn/benchmarks/bench_balltree.py +++ b/scikits/learn/benchmarks/bench_balltree.py @@ -1,5 +1,5 @@ """ -This script compares the performance of the Ball Tree code with +This script compares the performance of the Ball Tree code with scipy.spatial.cKDTree. Then run the simple timings script: @@ -28,7 +28,7 @@ def compare_nbrs(nbrs1,nbrs2): elif(nbrs1.ndim == 1): N = len(nbrs1) return numpy.all(nbrs1 == nbrs2) - + def test_time(N=1000, D=100, ls=1, k=20): M = numpy.random.random([N,D]) @@ -37,7 +37,7 @@ def test_time(N=1000, D=100, ls=1, k=20): print "%i neighbors of %i points in %i dimensions:" % (k,N,D) print " (leaf size = %i)" % ls print " -------------" - + t0 = time() BT = BallTree(M,ls) print " Ball Tree construction : %.3g sec" % ( time()-t0 ) @@ -45,14 +45,14 @@ def test_time(N=1000, D=100, ls=1, k=20): print " total (construction+query) : %.3g sec" % ( time()-t0 ) print " -------------" - + t0 = time() KDT = cKDTree(M,ls) print " KD tree construction : %.3g sec" % ( time()-t0 ) d,nbrs2 = KDT.query(M,k) print " total (construction+query) : %.3g sec" % ( time()-t0 ) print " -------------" - + print " neighbors match: ", print ( compare_nbrs(nbrs1,nbrs2) ) print " -------------" @@ -62,20 +62,20 @@ if __name__ == '__main__': N,D = map(int,sys.argv[1:]) ls = 20 k = min(20,N) - + elif len(sys.argv)==4: N,D,ls = map(int,sys.argv[1:]) k = min(20,N) elif len(sys.argv)==5: N,D,ls,k = map(int,sys.argv[1:]) - + else: print "usage: bench_balltree.py N D [leafsize=20], [k=20]" exit() - - + + test_time(N,D,ls,k) - - - + + + diff --git a/scikits/learn/benchmarks/bench_plot_balltree.py b/scikits/learn/benchmarks/bench_plot_balltree.py index 4fcf56cc71..e0a54ba03d 100644 --- a/scikits/learn/benchmarks/bench_plot_balltree.py +++ b/scikits/learn/benchmarks/bench_plot_balltree.py @@ -55,4 +55,4 @@ pl.xlabel('number of dimensions') pl.ylabel('time (seconds)') pl.legend() pl.show() - + diff --git a/scikits/learn/cluster/affinity_propagation_.py b/scikits/learn/cluster/affinity_propagation_.py index 53035a1980..aede3a09ad 100644 --- a/scikits/learn/cluster/affinity_propagation_.py +++ b/scikits/learn/cluster/affinity_propagation_.py @@ -1,4 +1,4 @@ -""" Algorithms for clustering : Meanshift, Affinity propagation and spectral +""" Algorithms for clustering : Meanshift, Affinity propagation and spectral clustering. """ @@ -198,7 +198,7 @@ class AffinityPropagation(BaseEstimator): Between Data Points", Science Feb. 2007 The algorithmic complexity of affinity propagation is quadratic - in the number of points. + in the number of points. """ def __init__(self, damping=.5, maxit=200, convit=30, copy=True): diff --git a/scikits/learn/cluster/mean_shift_.py b/scikits/learn/cluster/mean_shift_.py index 6088b66f88..1b9934d098 100644 --- a/scikits/learn/cluster/mean_shift_.py +++ b/scikits/learn/cluster/mean_shift_.py @@ -1,4 +1,4 @@ -""" Algorithms for clustering : Meanshift, Affinity propagation and spectral +""" Algorithms for clustering : Meanshift, Affinity propagation and spectral clustering. Author: Alexandre Gramfort alexandre.gramfort@inria.fr @@ -223,7 +223,7 @@ class MeanShift(BaseEstimator): def fit(self, X, **params): """ Compute MeanShift - + Parameters ----------- X : array [n_samples, n_features] diff --git a/scikits/learn/cluster/spectral.py b/scikits/learn/cluster/spectral.py index e484b1fe61..920c2ac5fc 100644 --- a/scikits/learn/cluster/spectral.py +++ b/scikits/learn/cluster/spectral.py @@ -15,7 +15,7 @@ from .k_means_ import k_means def spectral_embedding(adjacency, k=8, mode=None): """ Spectral embedding: project the sample on the k first - eigen vectors of the graph laplacian. + eigen vectors of the graph laplacian. Parameters ----------- @@ -46,7 +46,7 @@ def spectral_embedding(adjacency, k=8, mode=None): from pyamg import smoothed_aggregation_solver amg_loaded = True except ImportError: - amg_loaded = False + amg_loaded = False n_nodes = adjacency.shape[0] # XXX: Should we check that the matrices given is symmetric @@ -56,9 +56,9 @@ def spectral_embedding(adjacency, k=8, mode=None): mode = ('amg' if amg_loaded else 'arpack') laplacian, dd = graph_laplacian(adjacency, normed=True, return_diag=True) - if (mode == 'arpack' + if (mode == 'arpack' or not sparse.isspmatrix(laplacian) - or n_nodes < 5*k # This is the threshold under which lobpcg has bugs + or n_nodes < 5*k # This is the threshold under which lobpcg has bugs ): # We need to put the diagonal at zero if not sparse.isspmatrix(laplacian): @@ -88,7 +88,7 @@ def spectral_embedding(adjacency, k=8, mode=None): X = np.random.rand(laplacian.shape[0], k) X[:, 0] = 1. / dd.ravel() M = ml.aspreconditioner() - lambdas, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.e-12, + lambdas, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.e-12, largest=False) embedding = diffusion_map.T * dd if embedding.shape[0] == 1: raise ValueError @@ -98,7 +98,7 @@ def spectral_embedding(adjacency, k=8, mode=None): def spectral_clustering(adjacency, k=8, mode=None): - """ Spectral clustering: apply k-means to a projection of the + """ Spectral clustering: apply k-means to a projection of the graph laplacian, finds normalized graph cuts. Parameters @@ -132,7 +132,7 @@ def spectral_clustering(adjacency, k=8, mode=None): ################################################################################ class SpectralClustering(BaseEstimator): - """ Spectral clustering: apply k-means to a projection of the + """ Spectral clustering: apply k-means to a projection of the graph laplacian, finds normalized graph cuts. Parameters @@ -148,7 +148,7 @@ class SpectralClustering(BaseEstimator): ------- fit(X): - Compute spectral clustering + Compute spectral clustering Attributes ---------- @@ -163,11 +163,11 @@ class SpectralClustering(BaseEstimator): self.k = k self.mode = mode - + def fit(self, X, **params): """ Compute the spectral clustering from the adjacency matrix of the graph. - + Parameters ----------- X: array-like or sparse matrix, shape: (p, p) @@ -179,7 +179,7 @@ class SpectralClustering(BaseEstimator): greatly speeds up computation. """ self._set_params(**params) - self.labels_ = spectral_clustering(X, + self.labels_ = spectral_clustering(X, k=self.k, mode=self.mode) return self diff --git a/scikits/learn/cluster/tests/common.py b/scikits/learn/cluster/tests/common.py index f64458737c..ba6bac04f5 100644 --- a/scikits/learn/cluster/tests/common.py +++ b/scikits/learn/cluster/tests/common.py @@ -13,15 +13,15 @@ def generate_clustered_data(seed=0, n_clusters=3, n_features=2, n_samples_per_cluster=20, std=.4): prng = np.random.RandomState(seed) - means = np.array([[ 1, 1, 1, 0], - [-1, -1, 0, 1], + means = np.array([[ 1, 1, 1, 0], + [-1, -1, 0, 1], [ 1, -1, 1, 1], [ -1, 1, 1, 0], ]) X = np.empty((0, n_features)) for i in range(n_clusters): - X = np.r_[X, means[i][:n_features] + X = np.r_[X, means[i][:n_features] + std*prng.randn(n_samples_per_cluster, n_features)] return X diff --git a/scikits/learn/datasets/base.py b/scikits/learn/datasets/base.py index a87a65cbe6..74570e2b8f 100644 --- a/scikits/learn/datasets/base.py +++ b/scikits/learn/datasets/base.py @@ -124,9 +124,8 @@ def load_iris(): >>> data = load_iris() >>> data.target[[10, 25, 50]] array([0, 0, 1]) - >>> data.target_names - array(['setosa', 'versicolor', 'virginica'], - dtype='|S10') + >>> list(data.target_names) + ['setosa', 'versicolor', 'virginica'] """ diff --git a/scikits/learn/datasets/samples_generator.py b/scikits/learn/datasets/samples_generator.py index 8eae1ed7f4..dcac19f7af 100644 --- a/scikits/learn/datasets/samples_generator.py +++ b/scikits/learn/datasets/samples_generator.py @@ -138,7 +138,7 @@ def sparse_uncorrelated(nb_samples=100, nb_features=10): def friedman(nb_samples=100, nb_features=10,noise_std=1): """ - Function creating simulated data with non linearities + Function creating simulated data with non linearities (cf.Friedman 1993) X = NR.normal(0,1) Y = 10*sin(X[:,0]*X[:,1]) + 20*(X[:,2]-0.5)**2 + 10*X[:,3] + 5*X[:,4] diff --git a/scikits/learn/externals/joblib/__init__.py b/scikits/learn/externals/joblib/__init__.py index d415fb9502..66a971ea53 100644 --- a/scikits/learn/externals/joblib/__init__.py +++ b/scikits/learn/externals/joblib/__init__.py @@ -15,9 +15,9 @@ data and has specific optimizations for `numpy` arrays. It is ============================== ============================================== **User documentation**: http://packages.python.org/joblib - + **Download packages**: http://pypi.python.org/pypi/joblib#downloads - + **Source code**: http://github.com/joblib/joblib **Report issues**: http://github.com/joblib/joblib/issues @@ -36,7 +36,7 @@ solution. over, for instance when prototyping computational-heavy jobs (as in scientific development), but hand-crafted solution to aleviate this issue is error-prone and often leads to unreproducible results - + * **Persist to disk transparently**: persisting in an efficient way arbitrary objects containing large data is hard. In addition, hand-written persistence does not link easily the file on disk to the @@ -46,7 +46,7 @@ solution. It strives to address these problems while **leaving your code and your flow control as unmodified as possible** (no framework, no new -paradigms). +paradigms). Main features ------------------ @@ -75,7 +75,7 @@ Main features >>> c = square(a) >>> # The above call did not trigger an evaluation -2) **Embarrassingly parallel helper:** to make is easy to write readable +2) **Embarrassingly parallel helper:** to make is easy to write readable parallel code and debug it quickly: >>> from joblib import Parallel, delayed @@ -88,10 +88,10 @@ Main features progressively acquire better logging mechanism to help track what has been ran, and capture I/O easily. In addition, Joblib will provide a few I/O primitives, to easily define define logging and - display streams, and provide a way of compiling a report. + display streams, and provide a way of compiling a report. We want to be able to quickly inspect what has been run. -.. +.. >>> import shutil ; shutil.rmtree('/tmp/joblib/') """ diff --git a/scikits/learn/externals/joblib/disk.py b/scikits/learn/externals/joblib/disk.py index d2b267f444..b7b7846a6e 100644 --- a/scikits/learn/externals/joblib/disk.py +++ b/scikits/learn/externals/joblib/disk.py @@ -2,7 +2,7 @@ Disk management utilities. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2010 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -12,7 +12,7 @@ import os import shutil def disk_used(path): - """ Return the disk usage in a directory. + """ Return the disk usage in a directory. """ size = 0 for file in os.listdir(path) + ['.']: @@ -60,7 +60,7 @@ def rm_subdirs(path, onerror=None): names = os.listdir(path) except os.error, err: onerror(os.listdir, path, sys.exc_info()) - + for name in names: fullname = os.path.join(path, name) if os.path.isdir(fullname): diff --git a/scikits/learn/externals/joblib/format_stack.py b/scikits/learn/externals/joblib/format_stack.py index 246db3fb1f..c01795e6f2 100644 --- a/scikits/learn/externals/joblib/format_stack.py +++ b/scikits/learn/externals/joblib/format_stack.py @@ -63,7 +63,7 @@ def safe_repr(value): except: return 'UNRECOVERABLE REPR FAILURE' -def eq_repr(value, repr=safe_repr): +def eq_repr(value, repr=safe_repr): return '=%s' % repr(value) @@ -92,7 +92,7 @@ def uniq_stable(elems): ################################################################################ def fix_frame_records_filenames(records): """Try to fix the filenames in each record from inspect.getinnerframes(). - + Particularly, modules loaded from within zip files have useless filenames attached to their code object, and inspect.getinnerframes() just uses it. """ @@ -106,7 +106,7 @@ def fix_frame_records_filenames(records): # __file__. It might also be None if the error occurred during # import. filename = better_fn - fixed_records.append((frame, filename, line_no, func_name, lines, index)) + fixed_records.append((frame, filename, line_no, func_name, lines, index)) return fixed_records @@ -158,7 +158,7 @@ def _format_traceback_lines(lnum, index, lines, lvals=None): if pad >= 3: marker = '-'*(pad-3) + '-> ' elif pad == 2: - marker = '> ' + marker = '> ' elif pad == 1: marker = '>' else: @@ -196,7 +196,7 @@ def format_records(records): #, print_globals=False): # able to remove this try/except when 2.4 becomes a # requirement. Bug details at http://python.org/sf/1005466 print "\nJoblib's exception reporting continues...\n" - + if func == '?': call = '' else: @@ -228,7 +228,7 @@ def format_records(records): #, print_globals=False): there is no way to disambguate partial dotted structures until the full list is known. The caller is responsible for pruning the final list of duplicates before using it.""" - + # build composite names if token == '.': try: @@ -275,7 +275,7 @@ def format_records(records): #, print_globals=False): print ("An unexpected error occurred while tokenizing input\n" "The following traceback may be corrupted or invalid\n" "The error message is: %s\n" % msg) - + # prune names list of duplicates, but keep the right order unique_names = uniq_stable(names) @@ -315,14 +315,14 @@ def format_records(records): #, print_globals=False): else: frames.append('%s%s' % (level,''.join( _format_traceback_lines(lnum, index, lines, lvals)))) - + return frames ################################################################################ def format_exc(etype, evalue, etb, context=5, tb_offset=0): """ Return a nice text document describing the traceback. - + Parameters ----------- etype, evalue, etb: as returned by sys.exc_info @@ -340,7 +340,7 @@ def format_exc(etype, evalue, etb, context=5, tb_offset=0): pyver = 'Python ' + string.split(sys.version)[0] + ': ' + sys.executable date = time.ctime(time.time()) pid = 'PID: %i' % os.getpid() - + head = '%s%s%s\n%s%s%s' % (etype, ' '*(75-len(str(etype))-len(date)), date, pid, ' '*(75-len(str(pid))-len(pyver)), pyver) @@ -407,7 +407,7 @@ def format_outer_frames(context=5, stack_start=None, stack_end=None, filename = filename[:-4] + '.py' if ignore_ipython: # Hack to avoid printing the interals of IPython - if (os.path.basename(filename) == 'iplib.py' + if (os.path.basename(filename) == 'iplib.py' and func_name in ('safe_execfile', 'runcode')): break maybeStart = line_no -1 - context//2 diff --git a/scikits/learn/externals/joblib/func_inspect.py b/scikits/learn/externals/joblib/func_inspect.py index 5ea13f9442..af0ff95daf 100644 --- a/scikits/learn/externals/joblib/func_inspect.py +++ b/scikits/learn/externals/joblib/func_inspect.py @@ -2,7 +2,7 @@ My own variation on function-specific inspect-like features. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2009 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -13,7 +13,7 @@ import os def get_func_code(func): """ Attempts to retrieve a reliable function code hash. - + The reason we don't use inspect.getsource is that it caches the source, whereas we want this to be modified on the fly when the function is modified. @@ -54,7 +54,7 @@ def get_func_code(func): return repr(func), source_file, -1 -def get_func_name(func, resolv_alias=True, win_characters=True): +def get_func_name(func, resolv_alias=True, win_characters=True): """ Return the function import path (as a list of module names), and a name for the function. @@ -94,7 +94,7 @@ def get_func_name(func, resolv_alias=True, win_characters=True): module = module.split('.') if hasattr(func, 'func_name'): name = func.func_name - elif hasattr(func, '__name__'): + elif hasattr(func, '__name__'): name = func.__name__ else: name = 'unknown' @@ -105,7 +105,7 @@ def get_func_name(func, resolv_alias=True, win_characters=True): if not func.func_globals[name] is func: name = '%s-alias' % name if inspect.ismethod(func): - # We need to add the name of the class + # We need to add the name of the class if hasattr(func, 'im_class'): klass = func.im_class module.append(klass.__name__) @@ -126,7 +126,7 @@ def filter_args(func, ignore_lst, *args, **kwargs): func: callable Function giving the argument specification ignore_lst: list of strings - List of arguments to ignore (either a name of an argument + List of arguments to ignore (either a name of an argument in the function spec, or '*', or '**') *args: list Positional arguments passed to the function. @@ -146,7 +146,7 @@ def filter_args(func, ignore_lst, *args, **kwargs): raise ValueError('ignore_lst must be a list of parameters to ignore ' '%s (type %s) was given' % (ignore_lst, type(ignore_lst))) # Special case for functools.partial objects - if (not inspect.ismethod(func) and not inspect.isfunction(func)): + if (not inspect.ismethod(func) and not inspect.isfunction(func)): if ignore_lst: warnings.warn('Cannot inspect object %s, ignore list will ' 'not work.' % func, stacklevel=2) @@ -165,7 +165,7 @@ def filter_args(func, ignore_lst, *args, **kwargs): # First argument is 'self', it has been removed by Python # we need to add it back: args = [func.im_self, ] + args - # XXX: Maybe I need an inspect.isbuiltin to detect C-level methods, such + # XXX: Maybe I need an inspect.isbuiltin to detect C-level methods, such # as on ndarrays. _, name = get_func_name(func, resolv_alias=False) @@ -186,15 +186,15 @@ def filter_args(func, ignore_lst, *args, **kwargs): # Missing argument raise ValueError('Wrong number of arguments for %s%s:\n' ' %s(%s, %s) was called.' - % (name, + % (name, inspect.formatargspec(*inspect.getargspec(func)), name, repr(args)[1:-1], - ', '.join('%s=%s' % (k, v) + ', '.join('%s=%s' % (k, v) for k, v in kwargs.iteritems()) ) ) - + varkwargs = dict() @@ -219,7 +219,7 @@ def filter_args(func, ignore_lst, *args, **kwargs): arg_dict.pop(item) else: raise ValueError("Ignore list: argument '%s' is not defined for " - "function %s%s" % + "function %s%s" % (item, name, inspect.formatargspec(arg_names, arg_varargs, @@ -227,5 +227,5 @@ def filter_args(func, ignore_lst, *args, **kwargs): arg_defaults, ))) # XXX: Return a sorted list of pairs? - return arg_dict + return arg_dict diff --git a/scikits/learn/externals/joblib/hashing.py b/scikits/learn/externals/joblib/hashing.py index e00a707681..8183382ce2 100644 --- a/scikits/learn/externals/joblib/hashing.py +++ b/scikits/learn/externals/joblib/hashing.py @@ -1,9 +1,9 @@ """ -Fast cryptographic hash of Python objects, with a special case for fast +Fast cryptographic hash of Python objects, with a special case for fast hashing of numpy arrays. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2009 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -76,7 +76,7 @@ class NumpyHasher(Hasher): # XXX: There might be a more efficient way of doing this self._hash.update(self.np.getbuffer(obj.flatten())) - # We store the class, to be able to distinguish between + # We store the class, to be able to distinguish between # Objects with the same binary content, but different # classes. if self.coerce_mmap and isinstance(obj, self.np.memmap): @@ -86,7 +86,7 @@ class NumpyHasher(Hasher): klass = self.np.ndarray else: klass = obj.__class__ - # We also return the dtype and the shape, to distinguish + # We also return the dtype and the shape, to distinguish # different views on the same data with different dtypes. # The object will be pickled by the pickler hashed at the end. @@ -95,14 +95,14 @@ class NumpyHasher(Hasher): def hash(obj, hash_name='md5', coerce_mmap=False): - """ Quick calculation of a hash to identify uniquely Python objects + """ Quick calculation of a hash to identify uniquely Python objects containing numpy arrays. - + Parameters ----------- hash_name: 'md5' or 'sha1' - Hashing algorithm used. sha1 is supposedly safer, but md5 is + Hashing algorithm used. sha1 is supposedly safer, but md5 is faster. coerce_mmap: boolean Make no difference between np.memmap and np.ndarray diff --git a/scikits/learn/externals/joblib/logger.py b/scikits/learn/externals/joblib/logger.py index 4cf90ce4f5..4416c5f133 100644 --- a/scikits/learn/externals/joblib/logger.py +++ b/scikits/learn/externals/joblib/logger.py @@ -4,7 +4,7 @@ Helpers for logging. This module needs much love to become useful. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2008 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -31,7 +31,7 @@ def short_format_time(t): class Logger(object): """ Base class for logging messages. """ - + def __init__(self, depth=3): """ Parameters @@ -87,7 +87,7 @@ class PrintTime(object): for i in range(1, 9): if os.path.exists(logfile+'.%i' % i): try: - shutil.move(logfile+'.%i' % i, + shutil.move(logfile+'.%i' % i, logfile+'.%i' % (i+1)) except: "No reason failing here" diff --git a/scikits/learn/externals/joblib/memory.py b/scikits/learn/externals/joblib/memory.py index b043fca3f2..901c4b0027 100644 --- a/scikits/learn/externals/joblib/memory.py +++ b/scikits/learn/externals/joblib/memory.py @@ -4,7 +4,7 @@ is called with the same input arguments. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2009 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -45,10 +45,10 @@ FIRST_LINE_TEXT = "# first line:" # object, and the interface to persist and query should be separated in # the data store. # -# This would enable creating 'Memory' objects with a different logic for +# This would enable creating 'Memory' objects with a different logic for # pickling that would simply span a MemorizedFunc with the same # store (or do we want to copy it to avoid cross-talks?), for instance to -# implement HDF5 pickling. +# implement HDF5 pickling. # TODO: Same remark for the logger, and probably use the Python logging # mechanism. @@ -79,9 +79,9 @@ class JobLibCollisionWarning(UserWarning): # class `Memory` ################################################################################ class MemorizedFunc(Logger): - """ Callable object decorating a function for caching its return value + """ Callable object decorating a function for caching its return value each time it is called. - + All values are cached on the filesystem, in a deep directory structure. Methods are provided to inspect the cache or clean it. @@ -100,14 +100,14 @@ class MemorizedFunc(Logger): arguments. Only used if save_npy was true when the cache was created. verbose: int, optional - The verbosity flag, controls messages that are issued as + The verbosity flag, controls messages that are issued as the function is revaluated. """ #------------------------------------------------------------------------- # Public interface #------------------------------------------------------------------------- - - def __init__(self, func, cachedir, ignore=None, save_npy=True, + + def __init__(self, func, cachedir, ignore=None, save_npy=True, mmap_mode=None, verbose=1): """ Parameters @@ -127,7 +127,7 @@ class MemorizedFunc(Logger): arguments. Only used if save_npy was true when the cache was created. verbose: int, optional - Verbosity flag, controls the debug messages that are issued + Verbosity flag, controls the debug messages that are issued as functions are revaluated. """ Logger.__init__(self) @@ -159,7 +159,7 @@ class MemorizedFunc(Logger): # function code has changed output_dir, _ = self.get_output_dir(*args, **kwargs) # FIXME: The statements below should be try/excepted - if not (self._check_previous_func_code(stacklevel=3) and + if not (self._check_previous_func_code(stacklevel=3) and os.path.exists(output_dir)): return self.call(*args, **kwargs) else: @@ -179,7 +179,7 @@ class MemorizedFunc(Logger): #------------------------------------------------------------------------- # Private interface #------------------------------------------------------------------------- - + def _get_func_dir(self, mkdir=True): """ Get the directory corresponding to the cache for the function. @@ -191,7 +191,7 @@ class MemorizedFunc(Logger): try: os.makedirs(func_dir) except OSError: - """ Dir exists: we have a race condition here, when using + """ Dir exists: we have a race condition here, when using multiprocessing. """ # XXX: Ugly @@ -206,12 +206,12 @@ class MemorizedFunc(Logger): """ coerce_mmap = (self.mmap_mode is not None) argument_hash = hash(filter_args(self.func, self.ignore, - *args, **kwargs), + *args, **kwargs), coerce_mmap=coerce_mmap) output_dir = os.path.join(self._get_func_dir(self.func), argument_hash) return output_dir, argument_hash - + def _write_func_code(self, filename, func_code, first_line): """ Write the function code and the filename to a file. @@ -221,7 +221,7 @@ class MemorizedFunc(Logger): def _check_previous_func_code(self, stacklevel=2): - """ + """ stacklevel is the depth a which this function is called, to issue useful warnings to the user. """ @@ -232,7 +232,7 @@ class MemorizedFunc(Logger): func_dir = self._get_func_dir() func_code_file = os.path.join(func_dir, 'func_code.py') - if not os.path.exists(func_code_file): + if not os.path.exists(func_code_file): self._write_func_code(func_code_file, func_code, first_line) return False old_func_code, old_first_line = \ @@ -241,14 +241,14 @@ class MemorizedFunc(Logger): return True # We have differing code, is this because we are refering to - # differing functions, or because the function we are refering as + # differing functions, or because the function we are refering as # changed? if old_first_line == first_line == -1: _, func_name = get_func_name(self.func, resolv_alias=False, win_characters=False) if not first_line == -1: - func_description = '%s (%s:%i)' % (func_name, + func_description = '%s (%s:%i)' % (func_name, source_file, first_line) else: func_description = func_name @@ -260,7 +260,7 @@ class MemorizedFunc(Logger): # same than the code store, we have a collision: the code in the # file has not changed, but the name we have is pointing to a new # code block. - if (not old_first_line == first_line + if (not old_first_line == first_line and source_file is not None and os.path.exists(source_file)): _, func_name = get_func_name(self.func, resolv_alias=False) @@ -272,7 +272,7 @@ class MemorizedFunc(Logger): warnings.warn(JobLibCollisionWarning( 'Possible name collisions between functions ' "'%s' (%s:%i) and '%s' (%s:%i)" % - (func_name, source_file, old_first_line, + (func_name, source_file, old_first_line, func_name, source_file, first_line)), stacklevel=stacklevel) @@ -283,7 +283,7 @@ class MemorizedFunc(Logger): def clear(self, warn=True): - """ Empty the function's cache. + """ Empty the function's cache. """ func_dir = self._get_func_dir(mkdir=False) if self._verbose and warn: @@ -297,7 +297,7 @@ class MemorizedFunc(Logger): def call(self, *args, **kwargs): - """ Force the execution of the function with the given arguments and + """ Force the execution of the function with the given arguments and persist the output values. """ start_time = time.time() @@ -316,7 +316,7 @@ class MemorizedFunc(Logger): def format_call(self, *args, **kwds): - """ Returns a nicely formatted statement displaying the function + """ Returns a nicely formatted statement displaying the function call with the given arguments. """ path, signature = self.format_signature(self.func, *args, @@ -363,7 +363,7 @@ class MemorizedFunc(Logger): filename = os.path.join(dir, 'output.pkl') if 'numpy' in sys.modules and self.save_npy: - numpy_pickle.dump(output, filename) + numpy_pickle.dump(output, filename) else: output_file = file(filename, 'w') pickle.dump(output, output_file, protocol=2) @@ -391,7 +391,7 @@ class MemorizedFunc(Logger): """ filename = os.path.join(output_dir, 'output.pkl') if self.save_npy: - return numpy_pickle.load(filename, + return numpy_pickle.load(filename, mmap_mode=self.mmap_mode) else: output_file = file(filename, 'r') @@ -402,7 +402,7 @@ class MemorizedFunc(Logger): #------------------------------------------------------------------------- # Private `object` interface #------------------------------------------------------------------------- - + def __repr__(self): return '%s(func=%s, cachedir=%s)' % ( self.__class__.__name__, @@ -418,7 +418,7 @@ class MemorizedFunc(Logger): class Memory(Logger): """ A context object for caching a function's return value each time it is called with the same input arguments. - + All values are cached on the filesystem, in a deep directory structure. @@ -427,7 +427,7 @@ class Memory(Logger): #------------------------------------------------------------------------- # Public interface #------------------------------------------------------------------------- - + def __init__(self, cachedir, save_npy=True, mmap_mode=None, verbose=1): """ @@ -446,7 +446,7 @@ class Memory(Logger): arguments. Only used if save_npy was true when the cache was created. verbose: int, optional - Verbosity flag, controls the debug messages that are issued + Verbosity flag, controls the debug messages that are issued as functions are revaluated. """ # XXX: Bad explaination of the None value of cachedir @@ -469,13 +469,13 @@ class Memory(Logger): Returns ------- decorated_func: MemorizedFunc object - The returned object is a MemorizedFunc object, that is + The returned object is a MemorizedFunc object, that is callable (behaves like a function), but offers extra methods for cache lookup and management. See the documentation for :class:`joblib.memory.MemorizedFunc`. """ if func is None: - # Partial application, to be able to specify extra keyword + # Partial application, to be able to specify extra keyword # arguments in decorators return functools.partial(self.cache, ignore=ignore) if self.cachedir is None: @@ -511,7 +511,7 @@ class Memory(Logger): #------------------------------------------------------------------------- # Private `object` interface #------------------------------------------------------------------------- - + def __repr__(self): return '%s(cachedir=%s)' % ( self.__class__.__name__, diff --git a/scikits/learn/externals/joblib/my_exceptions.py b/scikits/learn/externals/joblib/my_exceptions.py index 5eee46b232..f8f3ffc91b 100644 --- a/scikits/learn/externals/joblib/my_exceptions.py +++ b/scikits/learn/externals/joblib/my_exceptions.py @@ -40,7 +40,7 @@ def _mk_exception(exception, name=None): # Avoid creating twice the same exception this_exception = _exception_mapping[this_name] else: - this_exception = type(this_name, (exception, JoblibException), + this_exception = type(this_name, (exception, JoblibException), dict(__repr__=JoblibException.__repr__, __str__=JoblibException.__str__), ) @@ -64,7 +64,7 @@ def _mk_common_exceptions(): return namespace -# Updating module locals so that the exceptions pickle right. AFAIK this +# Updating module locals so that the exceptions pickle right. AFAIK this # works only at module-creation time locals().update(_mk_common_exceptions()) diff --git a/scikits/learn/externals/joblib/numpy_pickle.py b/scikits/learn/externals/joblib/numpy_pickle.py index bdef0cde0b..1a641ac8e6 100644 --- a/scikits/learn/externals/joblib/numpy_pickle.py +++ b/scikits/learn/externals/joblib/numpy_pickle.py @@ -2,7 +2,7 @@ A pickler to save numpy arrays in separate .npy files. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2009 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -45,7 +45,7 @@ class NumpyPickler(pickle.Pickler): def save(self, obj): """ Subclass the save method, to save ndarray subclasses in npy - files, rather than pickling them. Off course, this is a + files, rather than pickling them. Off course, this is a total abuse of the Pickler class. """ if isinstance(obj, self.np.ndarray): @@ -83,8 +83,8 @@ class NumpyUnpickler(pickle.Unpickler): def load_build(self): """ This method is called to set the state of a knewly created - object. - + object. + We capture it to replace our place-holder objects, NDArrayWrapper, by the array we are interested in. We replace directly in the stack of pickler. @@ -111,7 +111,7 @@ class NumpyUnpickler(pickle.Unpickler): # Utility functions def dump(value, filename): - """ Persist an arbitrary Python object into a filename, with numpy arrays + """ Persist an arbitrary Python object into a filename, with numpy arrays saved as separate .npy files. See Also @@ -129,7 +129,7 @@ def dump(value, filename): def load(filename, mmap_mode=None): - """ Reconstruct a Python object and the numpy arrays it contains from + """ Reconstruct a Python object and the numpy arrays it contains from a persisted file. This function loads the numpy array files saved separately. If diff --git a/scikits/learn/externals/joblib/parallel.py b/scikits/learn/externals/joblib/parallel.py index 0e3088cb5a..fdaae35371 100644 --- a/scikits/learn/externals/joblib/parallel.py +++ b/scikits/learn/externals/joblib/parallel.py @@ -27,7 +27,7 @@ from .my_exceptions import JoblibException, _mk_exception class SafeFunction(object): """ Wraps a function to make it exception with full traceback in their representation. - Useful for parallel computing with multiprocessing, for which + Useful for parallel computing with multiprocessing, for which exceptions cannot be captured. """ @@ -57,8 +57,8 @@ def print_progress(msg, index, total, start_time, n_jobs=1): (total - index - 1.)) sys.stderr.write('[%s]: Done %3i out of %3i |elapsed: %s remaining: %s\n' % (msg, - index+1, - total, + index+1, + total, short_format_time(elapsed_time), short_format_time(remaining_time), )) @@ -103,16 +103,16 @@ class Parallel(Logger): verbose: int, optional The verbosity level. If 1 is given, the elapsed time as well as the estimated remaining time are displayed. - + Notes ----- This object uses the multiprocessing module to compute in parallel the application of a function to many different - arguments. The main functionnality it brings in addition to + arguments. The main functionnality it brings in addition to using the raw multiprocessing API are (see examples for details): - * More readable code, in particular since it avoids + * More readable code, in particular since it avoids constructing list of arguments. * Easier debuging: @@ -136,7 +136,7 @@ class Parallel(Logger): Reshaping the output when the function has several return values: - + >>> from math import modf >>> from joblib import Parallel, delayed >>> r = Parallel(n_jobs=1)(delayed(modf)(i/2.) for i in range(10)) @@ -145,7 +145,7 @@ class Parallel(Logger): (0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5) >>> i (0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0) - + The progress meter:: >>> from time import sleep @@ -157,9 +157,9 @@ class Parallel(Logger): [Parallel(n_jobs=2)]: Done 7 out of 10 |elapsed: 0.4s remaining: 0.2s [Parallel(n_jobs=2)]: Done 9 out of 10 |elapsed: 0.5s remaining: 0.1s - Traceback example, note how the ligne of the error is indicated + Traceback example, note how the ligne of the error is indicated as well as the values of the parameter passed to the function that - triggered the exception, eventhough the traceback happens in the + triggered the exception, eventhough the traceback happens in the child process:: >>> from string import atoi @@ -167,7 +167,7 @@ class Parallel(Logger): >>> Parallel(n_jobs=2)(delayed(atoi)(n) for n in ('1', '300', 30)) #doctest: +SKIP #... --------------------------------------------------------------------------- - Sub-process traceback: + Sub-process traceback: --------------------------------------------------------------------------- TypeError Fri Jul 2 20:32:05 2010 PID: 4151 Python 2.6.5: /usr/bin/python @@ -176,14 +176,14 @@ class Parallel(Logger): 398 is chosen from the leading characters of s, 0 for octal, 0x or 399 0X for hexadecimal. If base is 16, a preceding 0x or 0X is 400 accepted. - 401 + 401 402 """ --> 403 return _int(s, base) - 404 - 405 + 404 + 405 406 # Convert string to long integer 407 def atol(s, base=10): - + TypeError: int() can't convert non-string with explicit base ___________________________________________________________________________ @@ -206,7 +206,7 @@ class Parallel(Logger): if n_jobs is None or multiprocessing is None or n_jobs == 1: n_jobs = 1 - apply = LazyApply + apply = LazyApply else: pool = multiprocessing.Pool(n_jobs) apply = pool.apply_async @@ -219,7 +219,7 @@ class Parallel(Logger): output.append(apply(function, args, kwargs)) if self.verbose and n_jobs == 1: print '[%s]: Done job %3i | elapsed: %s' % ( - self, index, + self, index, short_format_time(time.time() - start_time) ) @@ -233,7 +233,7 @@ class Parallel(Logger): print_progress(self, index, len(jobs), start_time, n_jobs=n_jobs) except JoblibException, exception: - # Capture exception to add information on + # Capture exception to add information on # the local stack in addition to the distant # stack this_report = format_outer_frames( @@ -243,7 +243,7 @@ class Parallel(Logger): report = """Multiprocessing exception: %s --------------------------------------------------------------------------- -Sub-process traceback: +Sub-process traceback: --------------------------------------------------------------------------- %s""" % ( this_report, diff --git a/scikits/learn/externals/joblib/test/common.py b/scikits/learn/externals/joblib/test/common.py index de6050d2d1..53894e3177 100644 --- a/scikits/learn/externals/joblib/test/common.py +++ b/scikits/learn/externals/joblib/test/common.py @@ -11,7 +11,7 @@ try: """ return func -except ImportError: +except ImportError: def with_numpy(func): """ A decorator to skip tests requiring numpy. """ diff --git a/scikits/learn/externals/joblib/test/test_format_stack.py b/scikits/learn/externals/joblib/test/test_format_stack.py index 453bd431ec..db4102348b 100644 --- a/scikits/learn/externals/joblib/test/test_format_stack.py +++ b/scikits/learn/externals/joblib/test/test_format_stack.py @@ -2,7 +2,7 @@ Test the format_stack module. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2010 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -18,7 +18,7 @@ class Nasty(object): __str__ = __repr__ ################################################################################ -# Test safe_repr +# Test safe_repr def test_safe_repr(): """ Smoke test safe_repr on a nasty class. diff --git a/scikits/learn/externals/joblib/test/test_func_inspect.py b/scikits/learn/externals/joblib/test/test_func_inspect.py index 820b0ebaaf..738035f644 100644 --- a/scikits/learn/externals/joblib/test/test_func_inspect.py +++ b/scikits/learn/externals/joblib/test/test_func_inspect.py @@ -2,7 +2,7 @@ Test the func_inspect module. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2009 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -13,7 +13,7 @@ from ..func_inspect import filter_args, get_func_name from ..memory import Memory ################################################################################ -# Module-level functions, for tests +# Module-level functions, for tests def f(x, y=0): pass @@ -60,7 +60,7 @@ def test_filter_args(): def test_filter_args_method(): obj = Klass() - nose.tools.assert_equal(filter_args(obj.f, [], 1), + nose.tools.assert_equal(filter_args(obj.f, [], 1), {'x': 1, 'self': obj}) @@ -75,7 +75,7 @@ def test_filter_varargs(): {'x': 1, 'y': 2, '**':{'ee':2}} def test_tmp(): - nose.tools.assert_equal(filter_args(j, [], 1, 2, ee=2), + nose.tools.assert_equal(filter_args(j, [], 1, 2, ee=2), {'x': 1, 'y': 2, '**':{'ee':2}}) def test_func_name(): @@ -90,7 +90,7 @@ def test_bound_methods(): """ a = Klass() b = Klass() - nose.tools.assert_not_equal(filter_args(a.f, [], 1), + nose.tools.assert_not_equal(filter_args(a.f, [], 1), filter_args(b.f, [], 1)) diff --git a/scikits/learn/externals/joblib/test/test_hashing.py b/scikits/learn/externals/joblib/test/test_hashing.py index eaa2172baf..ab93c69e36 100644 --- a/scikits/learn/externals/joblib/test/test_hashing.py +++ b/scikits/learn/externals/joblib/test/test_hashing.py @@ -2,7 +2,7 @@ Test the hashing module. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2009 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -17,7 +17,7 @@ from ..hashing import hash from .common import np, with_numpy ################################################################################ -# Helper functions for the tests +# Helper functions for the tests def time_func(func, *args): """ Time function func on *args. """ @@ -47,7 +47,7 @@ def test_trival_hash(): """ Smoke test hash on various types. """ obj_list = [1, 1., 1+1j, - 'a', + 'a', (1, ), [1, ], {1:1}, None, ] @@ -93,14 +93,14 @@ def test_hash_memmap(): a = np.asarray(m) for coerce_mmap in (False, True): yield (nose.tools.assert_equal, - hash(a, coerce_mmap=coerce_mmap) + hash(a, coerce_mmap=coerce_mmap) == hash(m, coerce_mmap=coerce_mmap), coerce_mmap) finally: if 'm' in locals(): del m # Force a garbage-collection cycle, to be certain that the - # object is delete, and we don't run in a problem under + # object is delete, and we don't run in a problem under # Windows with a file handle still open. gc.collect() try: diff --git a/scikits/learn/externals/joblib/test/test_logger.py b/scikits/learn/externals/joblib/test/test_logger.py index 33a44b138d..0db7148bc7 100644 --- a/scikits/learn/externals/joblib/test/test_logger.py +++ b/scikits/learn/externals/joblib/test/test_logger.py @@ -2,7 +2,7 @@ Test the logger module. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2009 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -27,7 +27,7 @@ def setup(): if os.path.exists(cachedir): shutil.rmtree(cachedir) env['dir'] = cachedir - + def teardown(): """ Test teardown. @@ -49,7 +49,7 @@ def test_print_time(): # Create a second time, to smoke test log rotation. print_time = PrintTime(logfile=os.path.join(env['dir'], 'test.log')) print_time('Foo') - # And a third time + # And a third time print_time = PrintTime(logfile=os.path.join(env['dir'], 'test.log')) print_time('Foo') # nose.tools.assert_equal(sys.stderr.getvalue(), diff --git a/scikits/learn/externals/joblib/test/test_memory.py b/scikits/learn/externals/joblib/test/test_memory.py index 335ab45341..14fc3b1b5b 100644 --- a/scikits/learn/externals/joblib/test/test_memory.py +++ b/scikits/learn/externals/joblib/test/test_memory.py @@ -2,7 +2,7 @@ Test the memory module. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2009 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -44,7 +44,7 @@ def setup_module(): print 80*'_' print 'test_memory setup' print 80*'_' - + def _rmtree_onerror(func, path, excinfo): print '!'*79 print 'os function failed:', repr(func) @@ -169,7 +169,7 @@ def test_memory_name_collision(): @memory.cache def name_collision(x): - """ A first function called name_collision + """ A first function called name_collision """ return x @@ -177,7 +177,7 @@ def test_memory_name_collision(): @memory.cache def name_collision(x): - """ A second function called name_collision + """ A second function called name_collision """ return x @@ -221,7 +221,7 @@ def test_memory_warning_lambda_collisions(): def test_memory_warning_collision_detection(): - """ Check that collisions impossible to detect will raise appropriate + """ Check that collisions impossible to detect will raise appropriate warnings. """ memory = Memory(cachedir=env['dir'], verbose=0) @@ -272,7 +272,7 @@ def test_memory_eval(): def count_and_append(x=[]): - """ A function with a side effect in its arguments. + """ A function with a side effect in its arguments. Return the lenght of its argument and append one element. """ @@ -316,7 +316,7 @@ def test_memory_numpy(): def test_memory_exception(): - """ Smoketest the exception handling of Memory. + """ Smoketest the exception handling of Memory. """ memory = Memory(cachedir=env['dir'], verbose=0) class MyException(Exception): diff --git a/scikits/learn/externals/joblib/test/test_numpy_pickle.py b/scikits/learn/externals/joblib/test/test_numpy_pickle.py index 6182c89adc..0d8175c510 100644 --- a/scikits/learn/externals/joblib/test/test_numpy_pickle.py +++ b/scikits/learn/externals/joblib/test/test_numpy_pickle.py @@ -113,7 +113,7 @@ def test_numpy_persistence(): for item in obj_: yield nose.tools.assert_true, isinstance(item, np.ndarray) # And finally, check that all the values are equal. - yield nose.tools.assert_true, np.all(np.array(obj) == + yield nose.tools.assert_true, np.all(np.array(obj) == np.array(obj_)) diff --git a/scikits/learn/externals/joblib/test/test_parallel.py b/scikits/learn/externals/joblib/test/test_parallel.py index 808750eaeb..a28f4d1835 100644 --- a/scikits/learn/externals/joblib/test/test_parallel.py +++ b/scikits/learn/externals/joblib/test/test_parallel.py @@ -2,7 +2,7 @@ Test the parallel module. """ -# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> +# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> # Copyright (c) 2010 Gael Varoquaux # License: BSD Style, 3 clauses. @@ -46,12 +46,12 @@ def test_parallel_kwargs(): """ lst = range(10) for n_jobs in (1, 4): - yield (nose.tools.assert_equal, - [f(x, y=1) for x in lst], + yield (nose.tools.assert_equal, + [f(x, y=1) for x in lst], Parallel(n_jobs=n_jobs)(delayed(f)(x, y=1) for x in lst) ) - + def test_parallel_pickling(): """ Check that pmap captures the errors when it is passed an object that cannot be pickled. @@ -59,7 +59,7 @@ def test_parallel_pickling(): def g(x): return x**2 nose.tools.assert_raises(PickleError, - Parallel(), + Parallel(), (delayed(g)(x) for x in range(10)) ) diff --git a/scikits/learn/feature_extraction/image.py b/scikits/learn/feature_extraction/image.py index ebada423dc..4303781b6b 100644 --- a/scikits/learn/feature_extraction/image.py +++ b/scikits/learn/feature_extraction/image.py @@ -15,7 +15,7 @@ from ..utils.fixes import in1d def _make_edges_3d(n_x, n_y, n_z=1): """ Returns a list of edges for a 3D image. - + Parameters =========== n_x: integer @@ -48,7 +48,7 @@ def _compute_gradient_3d(edges, img): # XXX: Why mask the image after computing the weights? def _mask_edges_weights(mask, edges, weights): - """ Given a image mask and the + """ Given a image mask and the """ inds = np.arange(mask.size) inds = inds[mask.ravel()] @@ -71,7 +71,7 @@ def img_to_graph(img, mask=None, img: ndarray, 2D or 3D 2D or 3D image mask : ndarray of booleans, optional - An optional mask of the image, to consider only part of the + An optional mask of the image, to consider only part of the pixels. return_as: np.ndarray or a sparse matrix class, optional The class to use to build the returned adjacency matrix. diff --git a/scikits/learn/feature_extraction/tests/test_image.py b/scikits/learn/feature_extraction/tests/test_image.py index 40a3644b7e..5209e6e356 100644 --- a/scikits/learn/feature_extraction/tests/test_image.py +++ b/scikits/learn/feature_extraction/tests/test_image.py @@ -19,7 +19,7 @@ def test_img_to_graph(): # Negative elements are the diagonal: the elements of the original # image. Positive elements are the values of the gradient, they # shoudl all be equal on grad_x and grad_y - np.testing.assert_array_equal(grad_x.data[grad_x.data > 0], + np.testing.assert_array_equal(grad_x.data[grad_x.data > 0], grad_y.data[grad_y.data > 0]) diff --git a/scikits/learn/feature_selection/tests/test_feature_select.py b/scikits/learn/feature_selection/tests/test_feature_select.py index 656e674a1a..002f15cf7c 100644 --- a/scikits/learn/feature_selection/tests/test_feature_select.py +++ b/scikits/learn/feature_selection/tests/test_feature_select.py @@ -2,7 +2,7 @@ Todo: cross-check the F-value with stats model """ -from ..univariate_selection import (f_classif, f_regression, +from ..univariate_selection import (f_classif, f_regression, SelectPercentile, SelectKBest, SelectFpr, SelectFdr, SelectFwe, GenericUnivariateSelect) @@ -72,7 +72,7 @@ def test_select_percentile_classif(): seed=seed) univariate_filter = SelectPercentile(f_classif, percentile=25) X_r = univariate_filter.fit(X, Y).transform(X) - X_r2 = GenericUnivariateSelect(f_classif, mode='percentile', + X_r2 = GenericUnivariateSelect(f_classif, mode='percentile', param=25).fit(X, Y).transform(X) assert_array_equal(X_r, X_r2) support = univariate_filter.get_support() @@ -94,7 +94,7 @@ def test_select_kbest_classif(): seed=seed) univariate_filter = SelectKBest(f_classif, k=5) X_r = univariate_filter.fit(X, Y).transform(X) - X_r2 = GenericUnivariateSelect(f_classif, mode='k_best', + X_r2 = GenericUnivariateSelect(f_classif, mode='k_best', param=5).fit(X, Y).transform(X) assert_array_equal(X_r, X_r2) support = univariate_filter.get_support() @@ -113,7 +113,7 @@ def test_select_fpr_classif(): seed=seed) univariate_filter = SelectFpr(f_classif, alpha=0.0001) X_r = univariate_filter.fit(X, Y).transform(X) - X_r2 = GenericUnivariateSelect(f_classif, mode='fpr', + X_r2 = GenericUnivariateSelect(f_classif, mode='fpr', param=0.0001).fit(X, Y).transform(X) assert_array_equal(X_r, X_r2) support = univariate_filter.get_support() @@ -132,7 +132,7 @@ def test_select_fdr_classif(): seed=3) univariate_filter = SelectFdr(f_classif, alpha=0.01) X_r = univariate_filter.fit(X, Y).transform(X) - X_r2 = GenericUnivariateSelect(f_classif, mode='fdr', + X_r2 = GenericUnivariateSelect(f_classif, mode='fdr', param=0.01).fit(X, Y).transform(X) assert_array_equal(X_r, X_r2) support = univariate_filter.get_support() @@ -151,7 +151,7 @@ def test_select_fwe_classif(): seed=seed) univariate_filter = SelectFwe(f_classif, alpha=0.01) X_r = univariate_filter.fit(X, Y).transform(X) - X_r2 = GenericUnivariateSelect(f_classif, mode='fwe', + X_r2 = GenericUnivariateSelect(f_classif, mode='fwe', param=0.01).fit(X, Y).transform(X) assert_array_equal(X_r, X_r2) support = univariate_filter.get_support() @@ -173,7 +173,7 @@ def test_select_percentile_regression(): seed=seed) univariate_filter = SelectPercentile(f_regression, percentile=25) X_r = univariate_filter.fit(X, Y).transform(X) - X_r2 = GenericUnivariateSelect(f_regression, mode='percentile', + X_r2 = GenericUnivariateSelect(f_regression, mode='percentile', param=25).fit(X, Y).transform(X) assert_array_equal(X_r, X_r2) support = univariate_filter.get_support() @@ -191,7 +191,7 @@ def test_select_percentile_regression_full(): seed=seed) univariate_filter = SelectPercentile(f_regression, percentile=100) X_r = univariate_filter.fit(X, Y).transform(X) - X_r2 = GenericUnivariateSelect(f_regression, mode='percentile', + X_r2 = GenericUnivariateSelect(f_regression, mode='percentile', param=100).fit(X, Y).transform(X) assert_array_equal(X_r, X_r2) support = univariate_filter.get_support() @@ -209,7 +209,7 @@ def test_select_kbest_regression(): seed=seed) univariate_filter = SelectKBest(f_regression, k=5) X_r = univariate_filter.fit(X, Y).transform(X) - X_r2 = GenericUnivariateSelect(f_regression, mode='k_best', + X_r2 = GenericUnivariateSelect(f_regression, mode='k_best', param=5).fit(X, Y).transform(X) assert_array_equal(X_r, X_r2) support = univariate_filter.get_support() @@ -228,7 +228,7 @@ def test_select_fpr_regression(): seed=seed) univariate_filter = SelectFpr(f_regression, alpha=0.01) X_r = univariate_filter.fit(X, Y).transform(X) - X_r2 = GenericUnivariateSelect(f_regression, mode='fpr', + X_r2 = GenericUnivariateSelect(f_regression, mode='fpr', param=0.01).fit(X, Y).transform(X) assert_array_equal(X_r, X_r2) support = univariate_filter.get_support() @@ -248,7 +248,7 @@ def test_select_fdr_regression(): seed=2) univariate_filter = SelectFdr(f_regression, alpha=0.01) X_r = univariate_filter.fit(X, Y).transform(X) - X_r2 = GenericUnivariateSelect(f_regression, mode='fdr', + X_r2 = GenericUnivariateSelect(f_regression, mode='fdr', param=0.01).fit(X, Y).transform(X) assert_array_equal(X_r, X_r2) support = univariate_filter.get_support() @@ -267,7 +267,7 @@ def test_select_fwe_regression(): seed=seed) univariate_filter = SelectFwe(f_regression, alpha=0.01) X_r = univariate_filter.fit(X, Y).transform(X) - X_r2 = GenericUnivariateSelect(f_regression, mode='fwe', + X_r2 = GenericUnivariateSelect(f_regression, mode='fwe', param=0.01).fit(X, Y).transform(X) assert_array_equal(X_r, X_r2) support = univariate_filter.get_support() diff --git a/scikits/learn/glm/bayes.py b/scikits/learn/glm/bayes.py index 17dde8731a..afd017483f 100644 --- a/scikits/learn/glm/bayes.py +++ b/scikits/learn/glm/bayes.py @@ -116,7 +116,7 @@ class BayesianRidge(LinearModel): def fit(self, X, y, **params): """Fit the model - + Parameters ---------- X : numpy array of shape [n_samples,n_features] diff --git a/scikits/learn/glm/benchmarks/bench_bayes.py b/scikits/learn/glm/benchmarks/bench_bayes.py index 8a7bd40424..665f91aaa6 100644 --- a/scikits/learn/glm/benchmarks/bench_bayes.py +++ b/scikits/learn/glm/benchmarks/bench_bayes.py @@ -13,7 +13,7 @@ from scikits.learn.utils.bench import total_seconds if __name__ == '__main__': import pylab as pl - + n_iter = 20 time_ridge = np.empty(n_iter) @@ -24,7 +24,7 @@ if __name__ == '__main__': n_samples, n_features = 100, 100 - X = np.random.randn(n_samples, n_features) + X = np.random.randn(n_samples, n_features) y = np.random.randn(n_samples) start = datetime.now() diff --git a/scikits/learn/glm/benchmarks/bench_glm.py b/scikits/learn/glm/benchmarks/bench_glm.py index 97d3d30733..d249bd9582 100644 --- a/scikits/learn/glm/benchmarks/bench_glm.py +++ b/scikits/learn/glm/benchmarks/bench_glm.py @@ -28,7 +28,7 @@ if __name__ == '__main__': n, m = 10*i + 3, 10*i + 3 - X = np.random.randn (n, m) + X = np.random.randn (n, m) Y = np.random.randn (n) start = datetime.now() @@ -47,7 +47,7 @@ if __name__ == '__main__': lasso.fit (X, Y) time_lasso[i] = total_seconds(datetime.now() - start) - + pl.xlabel ('Dimesions') pl.ylabel ('Time (in seconds)') diff --git a/scikits/learn/glm/tests/test_ridge.py b/scikits/learn/glm/tests/test_ridge.py index 99e08efa38..b27bd0fabf 100644 --- a/scikits/learn/glm/tests/test_ridge.py +++ b/scikits/learn/glm/tests/test_ridge.py @@ -12,17 +12,17 @@ def test_ridge(): of np.random. """ alpha = 1.0 - + # With more samples than features n_samples, n_features = 6, 5 np.random.seed(0) y = np.random.randn(n_samples) X = np.random.randn(n_samples, n_features) - + ridge = Ridge(alpha=alpha) ridge.fit(X, y) assert ridge.score (X, y) > 0.5 - + # With more features than samples n_samples, n_features = 5, 10 np.random.seed(0) @@ -59,7 +59,7 @@ def test_ridge_vs_lstsq(): ridge = Ridge(alpha=0.) ols = LinearRegression() - + ridge.fit(X, y) ols.fit (X, y) assert np.linalg.norm (ridge.coef_ - ols.coef_) < 1e-10 diff --git a/scikits/learn/hmm.py b/scikits/learn/hmm.py index f760e2c192..b851762384 100644 --- a/scikits/learn/hmm.py +++ b/scikits/learn/hmm.py @@ -808,7 +808,7 @@ class MultinomialHMM(_BaseHMM): [ 0.5, 0.5]]), startprob_prior=1.0, n_states=2, startprob=array([ 0.5, 0.5]), transmat_prior=1.0) - + See Also -------- GaussianHMM : HMM with Gaussian emissions @@ -923,9 +923,9 @@ class GMMHMM(_BaseHMM): ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE GMMHMM(n_mix=10, cvtype='diag', n_states=2, startprob_prior=1.0, startprob=array([ 0.5, 0.5]), - transmat=array([[ 0.5, 0.5], - [ 0.5, 0.5]]), - transmat_prior=1.0, + transmat=array([[ 0.5, 0.5], + [ 0.5, 0.5]]), + transmat_prior=1.0, gmms=[GMM(cvtype='diag', n_states=10), GMM(cvtype='diag', n_states=10)]) See Also diff --git a/scikits/learn/lda.py b/scikits/learn/lda.py index 192f5a1013..3493641695 100644 --- a/scikits/learn/lda.py +++ b/scikits/learn/lda.py @@ -125,7 +125,7 @@ class LDA(BaseEstimator, ClassifierMixin): if store_covariance: cov /= (n_samples - n_classes) self.covariance_ = cov - + means = np.asarray(means) Xc = np.concatenate(Xc, 0) diff --git a/scikits/learn/neighbors.py b/scikits/learn/neighbors.py index 06385b50ff..31230b5d28 100644 --- a/scikits/learn/neighbors.py +++ b/scikits/learn/neighbors.py @@ -96,7 +96,7 @@ class Neighbors(BaseEstimator, ClassifierMixin): (array([ 0.5 , 1.11803399]), array([1, 2])) """ - if k is None: + if k is None: k=self.k return self.ball_tree.query(data, k=k) @@ -132,7 +132,7 @@ class Neighbors(BaseEstimator, ClassifierMixin): [0 1] """ T = np.asanyarray(T) - if k is None: + if k is None: k=self.k return _predict_from_BallTree(self.ball_tree, self.Y, T, k=k) diff --git a/scikits/learn/pca.py b/scikits/learn/pca.py index f972213806..c1455f4b3e 100644 --- a/scikits/learn/pca.py +++ b/scikits/learn/pca.py @@ -16,16 +16,16 @@ def _assess_dimension_(spect, rk, n_samples, dim): ---------- spect: array of shape (n) data spectrum - rk: int, + rk: int, tested rank value - n_samples: int, + n_samples: int, number of samples - dim: int, + dim: int, embedding/emprical dimension Returns ------- - ll: float, + ll: float, The log-likelihood Notes @@ -174,7 +174,7 @@ class ProbabilisticPCA(PCA): Parameters ---------- - X: array of shape(n_samples, n_dim) + X: array of shape(n_samples, n_dim) The data to fit homoscedastic: bool, optional, If True, average variance across remaining dimensions diff --git a/scikits/learn/sgd/base.py b/scikits/learn/sgd/base.py index 5f3051a800..d06ff1f8c6 100644 --- a/scikits/learn/sgd/base.py +++ b/scikits/learn/sgd/base.py @@ -16,7 +16,7 @@ class LinearModel(BaseEstimator): Parameters ---------- loss : str, ('hinge'|'log'|'modifiedhuber') - The loss function to be used. Defaults to 'hinge'. + The loss function to be used. Defaults to 'hinge'. penalty : str, ('l2'|'l1'|'elasticnet') The penalty (aka regularization term) to be used. Defaults to 'l2'. alpha : float @@ -33,7 +33,7 @@ class LinearModel(BaseEstimator): data is assumed to be already centered. Defaults to True. n_iter: int The number of passes over the training data (aka epochs). - Defaults to 5. + Defaults to 5. shuffle: bool Whether or not the training data should be shuffled after each epoch. Defaults to False. diff --git a/scikits/learn/sgd/setup.py b/scikits/learn/sgd/setup.py index 7f26520788..5b05589e17 100644 --- a/scikits/learn/sgd/setup.py +++ b/scikits/learn/sgd/setup.py @@ -7,7 +7,7 @@ def configuration(parent_package='', top_path=None): site_cfg = ConfigParser() site_cfg.read(get_standard_file('site.cfg')) - + # add other directories config.add_subpackage('tests') config.add_subpackage('sparse') diff --git a/scikits/learn/sgd/sparse/sgd.py b/scikits/learn/sgd/sparse/sgd.py index b4a3862c28..af1ab39219 100644 --- a/scikits/learn/sgd/sparse/sgd.py +++ b/scikits/learn/sgd/sparse/sgd.py @@ -21,7 +21,7 @@ class SGD(LinearModel, ClassifierMixin): Parameters ---------- loss : str, ('hinge'|'log'|'modifiedhuber') - The loss function to be used. Defaults to 'hinge'. + The loss function to be used. Defaults to 'hinge'. penalty : str, ('l2'|'l1'|'elasticnet') The penalty (aka regularization term) to be used. Defaults to 'l2'. alpha : float @@ -38,7 +38,7 @@ class SGD(LinearModel, ClassifierMixin): data is assumed to be already centered. Defaults to True. n_iter: int The number of passes over the training data (aka epochs). - Defaults to 5. + Defaults to 5. shuffle: bool Whether or not the training data should be shuffled after each epoch. Defaults to False. @@ -169,7 +169,7 @@ class SGD(LinearModel, ClassifierMixin): if self.coef_.shape != (n_classes, n_features): raise ValueError("Provided coef_ does not match dataset. ") coef_ = self.coef_ - + if self.intercept_ is None \ or isinstance(self.intercept_, float): intercept_ = np.zeros(n_classes, dtype=np.float64, @@ -187,13 +187,13 @@ class SGD(LinearModel, ClassifierMixin): res = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(_train_ova_classifier)(i, c, X_data, X_indices, - X_indptr, Y, self) + X_indptr, Y, self) for i, c in enumerate(self.classes)) for i, coef, intercept in res: coef_[i] = coef intercept_[i] = intercept - + self._set_coef(coef_) self.intercept_ = intercept_ @@ -260,7 +260,7 @@ class SGD(LinearModel, ClassifierMixin): This is important for joblib because otherwise it will crash trying to pickle - the external loss function object. + the external loss function object. """ return SGD,(self.loss, self.penalty, self.alpha, self.rho, self.coef_, self.intercept_, self.fit_intercept, self.n_iter, diff --git a/scikits/learn/sgd/tests/test_sparse.py b/scikits/learn/sgd/tests/test_sparse.py index c5811a3834..d7f560baff 100644 --- a/scikits/learn/sgd/tests/test_sparse.py +++ b/scikits/learn/sgd/tests/test_sparse.py @@ -115,7 +115,7 @@ def test_set_coef(): pass else: assert False - + clf = sgd.sparse.SGD() clf._set_coef(None) assert clf.sparse_coef_ == None @@ -154,7 +154,7 @@ def test_sgd_multiclass_njobs(): def test_set_coef_multiclass(): """Checks coef_ and intercept_ shape for - the warm starts for multi-class problems. + the warm starts for multi-class problems. """ # Provided coef_ does not match dataset. try: diff --git a/scikits/learn/svm/sparse/libsvm.py b/scikits/learn/svm/sparse/libsvm.py index 34be970222..cb1dc9047c 100644 --- a/scikits/learn/svm/sparse/libsvm.py +++ b/scikits/learn/svm/sparse/libsvm.py @@ -89,12 +89,12 @@ class OneClassSVM (SparseBaseLibSVM): """ def __init__(self, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, - cache_size=100.0, eps=1e-3, C=1.0, + cache_size=100.0, eps=1e-3, C=1.0, nu=0.5, p=0.1, shrinking=True, probability=False): SparseBaseLibSVM.__init__(self, 'one_class', kernel, degree, gamma, coef0, cache_size, eps, C, nu, p, shrinking, probability) - + def fit(self, X): super(OneClassSVM, self).fit(X, []) diff --git a/scikits/learn/tests/test_base.py b/scikits/learn/tests/test_base.py index de7808bd5c..074c70bb2c 100644 --- a/scikits/learn/tests/test_base.py +++ b/scikits/learn/tests/test_base.py @@ -41,7 +41,7 @@ def test_clone(): new_selector = clone(selector) assert_true(selector is not new_selector) assert_equal(selector._get_params(), new_selector._get_params()) - + def test_clone_2(): """Tests that clone doesn't copy everything. @@ -49,7 +49,7 @@ def test_clone_2(): We first create an estimator, give it an own attribute, and make a copy of its original state. Then we check that the copy doesn't have the specific attribute we manually added to the initial estimator. - + """ from scikits.learn.feature_selection import SelectFpr, f_classif @@ -57,21 +57,21 @@ def test_clone_2(): selector.own_attribute = "test" new_selector = clone(selector) assert_false(hasattr(new_selector, "own_attribute")) - + def test_repr(): - """ Smoke test the repr of the + """ Smoke test the repr of the """ my_estimator = MyEstimator() repr(my_estimator) test = T(K(), K()) - assert_equal(repr(test), + assert_equal(repr(test), "T(a=K(c=None, d=None), b=K(c=None, d=None))" ) def test_str(): - """ Smoke test the str of the + """ Smoke test the str of the """ my_estimator = MyEstimator() str(my_estimator) @@ -96,6 +96,6 @@ def test_is_classifier(): assert_true(is_classifier(svc)) assert_true(is_classifier(GridSearchCV(svc, {'C': [0.1, 1]}))) assert_true(is_classifier(Pipeline([('svc', svc)]))) - assert_true(is_classifier(Pipeline([('svc_cv', + assert_true(is_classifier(Pipeline([('svc_cv', GridSearchCV(svc, {'C': [0.1, 1]}))]))) diff --git a/scikits/learn/tests/test_cross_val.py b/scikits/learn/tests/test_cross_val.py index 667de59b8d..871fc76d36 100644 --- a/scikits/learn/tests/test_cross_val.py +++ b/scikits/learn/tests/test_cross_val.py @@ -10,7 +10,7 @@ from .. import cross_val class MockClassifier(BaseEstimator): """Dummy classifier to test the cross-validation - + """ def __init__(self, a=0): self.a = a diff --git a/scikits/learn/tests/test_fastica.py b/scikits/learn/tests/test_fastica.py index 2d93702337..a3fad0acdb 100644 --- a/scikits/learn/tests/test_fastica.py +++ b/scikits/learn/tests/test_fastica.py @@ -13,7 +13,7 @@ def center_and_norm(x, axis=-1): Parameters ----------- x: ndarray - Array with an axis of observations (statistical units) measured on + Array with an axis of observations (statistical units) measured on random variables. axis: int, optionnal Axis along which the mean and variance are calculated. @@ -53,13 +53,13 @@ def test_fastica(add_noise=False): # Mixing angle phi = 0.6 - mixing = np.array([[np.cos(phi), np.sin(phi)], + mixing = np.array([[np.cos(phi), np.sin(phi)], [np.sin(phi), -np.cos(phi)]]) m = np.dot(mixing, s) if add_noise: - m += 0.1*np.random.randn(2, 1000) - + m += 0.1*np.random.randn(2, 1000) + center_and_norm(m) algorithm = ['parallel', 'deflation'] @@ -118,14 +118,14 @@ def test_non_square_fastica(add_noise=False): if add_noise: m += 0.1*np.random.randn(6, n_samples) - + center_and_norm(m) k_, mixing_, s_ = fastica.fastica(m, n_comp=2) # Check that the mixing model described in the docstring holds: np.testing.assert_almost_equal(s_, np.dot(np.dot(mixing_, k_), m)) - + center_and_norm(s_) s1_, s2_ = s_ # Check to see if the sources have been estimated diff --git a/scikits/learn/tests/test_gmm.py b/scikits/learn/tests/test_gmm.py index 46d514e10d..215944efe5 100644 --- a/scikits/learn/tests/test_gmm.py +++ b/scikits/learn/tests/test_gmm.py @@ -54,7 +54,7 @@ def test_sample_gaussian(): Test sample generation from gmm.sample_gaussian where covariance is diagonal, spherical and full """ - + n_dim, n_samples = 2, 300 axis = 1 mu = np.random.randint(10) * np.random.rand(n_dim) diff --git a/scikits/learn/tests/test_hmm.py b/scikits/learn/tests/test_hmm.py index b19a5d44f4..69c9bdf74e 100644 --- a/scikits/learn/tests/test_hmm.py +++ b/scikits/learn/tests/test_hmm.py @@ -204,7 +204,7 @@ class TestBaseHMM(SeedRandomNumberGeneratorTestCase): np.zeros((n_states - 2, n_states))) -def train_hmm_and_keep_track_of_log_likelihood(hmm, obs, n_iter=1, **kwargs): +def train_hmm_and_keep_track_of_log_likelihood(hmm, obs, n_iter=1, **kwargs): hmm.fit(obs, n_iter=1, **kwargs) loglikelihoods = [] for n in xrange(n_iter): diff --git a/scikits/learn/tests/test_neighbors.py b/scikits/learn/tests/test_neighbors.py index 05bd276b86..a7dbc990fc 100644 --- a/scikits/learn/tests/test_neighbors.py +++ b/scikits/learn/tests/test_neighbors.py @@ -48,4 +48,4 @@ def test_neighbors_2D(): prediction = knn.predict([[0, .1], [0, -.1], [.1, 0], [-.1, 0]]) assert_array_equal(prediction, [0, 1, 0, 1]) - + diff --git a/scikits/learn/tests/test_pipeline.py b/scikits/learn/tests/test_pipeline.py index 9e8bdbdbdf..edec37420c 100644 --- a/scikits/learn/tests/test_pipeline.py +++ b/scikits/learn/tests/test_pipeline.py @@ -26,12 +26,12 @@ def test_pipeline_init(): assert_raises(TypeError, Pipeline) # Check that we can't instantiate pipelines with objects without fit # method - pipe = assert_raises(AssertionError, Pipeline, + pipe = assert_raises(AssertionError, Pipeline, [('svc', IncorrectT)]) # Smoke test with only an estimator clf = T() pipe = Pipeline([('svc', clf)]) - assert_equal(pipe._get_params(deep=True), + assert_equal(pipe._get_params(deep=True), dict(svc__a=None, svc__b=None, svc=clf)) # Check that params are set @@ -39,7 +39,7 @@ def test_pipeline_init(): assert_equal(clf.a, 0.1) # Smoke test the repr: repr(pipe) - + # Test with two objects clf = SVC() filter1 = SelectKBest(f_classif) diff --git a/scikits/learn/utils/_csgraph.py b/scikits/learn/utils/_csgraph.py index 535ac4cd47..e395c6b1d8 100644 --- a/scikits/learn/utils/_csgraph.py +++ b/scikits/learn/utils/_csgraph.py @@ -54,7 +54,7 @@ def cs_graph_components(x): >>> D[0,1] = D[1,0] = 1 >>> cs_graph_components(D) (3, array([0, 0, 1, 2])) - >>> from scipy.sparse import dok_matrix + >>> from scipy.sparse import dok_matrix >>> cs_graph_components(dok_matrix(D)) (3, array([0, 0, 1, 2])) @@ -63,7 +63,7 @@ def cs_graph_components(x): shape = x.shape except AttributeError: raise ValueError(_msg0) - + if not ((len(x.shape) == 2) and (x.shape[0] == x.shape[1])): raise ValueError(_msg1 % x.shape) @@ -71,7 +71,7 @@ def cs_graph_components(x): x = x.tocsr() else: x = csr_matrix(x) - + label = np.empty((shape[0],), dtype=x.indptr.dtype) n_comp = _cs_graph_components(shape[0], x.indptr, x.indices, label) diff --git a/scikits/learn/utils/bench.py b/scikits/learn/utils/bench.py index f1984019ef..40f1f2a9ec 100644 --- a/scikits/learn/utils/bench.py +++ b/scikits/learn/utils/bench.py @@ -14,4 +14,4 @@ def total_seconds(delta): return delta.seconds + delta.microseconds * mu_sec - + diff --git a/scikits/learn/utils/fixes.py b/scikits/learn/utils/fixes.py index 9bb5a872d8..52557f62fe 100644 --- a/scikits/learn/utils/fixes.py +++ b/scikits/learn/utils/fixes.py @@ -18,15 +18,15 @@ def _unique(ar, return_index=False, return_inverse=False): return np.asarray(items) else: ar = np.asanyarray(ar).flatten() - + if ar.size == 0: if return_inverse and return_index: return ar, np.empty(0, np.bool), np.empty(0, np.bool) elif return_inverse or return_index: return ar, np.empty(0, np.bool) - else: + else: return ar - + if return_inverse or return_index: perm = ar.argsort() aux = ar[perm] diff --git a/scikits/learn/utils/graph.py b/scikits/learn/utils/graph.py index f3b94957ed..00269ed249 100644 --- a/scikits/learn/utils/graph.py +++ b/scikits/learn/utils/graph.py @@ -1,11 +1,11 @@ """ Graph utilities and algorithms -Graphs are represented with their adjacency matrices, preferably using +Graphs are represented with their adjacency matrices, preferably using sparse matrices. """ -# Authors: Aric Hagberg <hagberg@lanl.gov> +# Authors: Aric Hagberg <hagberg@lanl.gov> # Gael Varoquaux <gael.varoquaux@normalesup.org> # License: BSD @@ -55,12 +55,12 @@ def single_source_shortest_path_length(graph, source, cutoff=None): this_level = next_level # advance to next level next_level = set() # and start a new list (fringe) for v in this_level: - if v not in seen: + if v not in seen: seen[v] = level # set the level of vertex v neighbors = np.array(graph.rows[v]) # Restrict to the upper triangle neighbors = neighbors[neighbors > v] - next_level.update(neighbors) + next_level.update(neighbors) if cutoff is not None and cutoff <= level: break level += 1 @@ -131,7 +131,7 @@ def _graph_laplacian_dense(graph, normed=False, return_diag=False): if return_diag: return lap, w return lap - + def graph_laplacian(graph, normed=False, return_diag=False): """ Return the Laplacian of the given graph. @@ -146,4 +146,4 @@ def graph_laplacian(graph, normed=False, return_diag=False): # We have a numpy array return _graph_laplacian_dense(graph, normed=normed, return_diag=return_diag) - + diff --git a/scikits/learn/utils/sparsetools/setup.py b/scikits/learn/utils/sparsetools/setup.py index 57ca85201e..d9cbe508c8 100644 --- a/scikits/learn/utils/sparsetools/setup.py +++ b/scikits/learn/utils/sparsetools/setup.py @@ -8,7 +8,7 @@ def configuration(parent_package='',top_path=None): sources = [ fmt + '_wrap.cxx' ] depends = [ fmt + '.h' ] config.add_extension('_' + fmt, sources=sources, - define_macros=[('__STDC_FORMAT_MACROS', 1)], + define_macros=[('__STDC_FORMAT_MACROS', 1)], depends=depends) return config diff --git a/setup.py b/setup.py index 408c2211f5..daa16909d1 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ descr = """A set of python modules for machine learning and data mining""" import os -DISTNAME = 'scikits.learn' +DISTNAME = 'scikits.learn' DESCRIPTION = 'A set of python modules for machine learning and data mining' LONG_DESCRIPTION = descr MAINTAINER = 'Fabian Pedregosa' @@ -46,7 +46,7 @@ if __name__ == "__main__": download_url = DOWNLOAD_URL, long_description = LONG_DESCRIPTION, zip_safe=False, # the package can run out of an .egg file - classifiers = + classifiers = ['Intended Audience :: Science/Research', 'Intended Audience :: Developers', 'License :: OSI Approved', -- GitLab