From 5d2a7fd193ce0606ca8b6e5358864eda42359ac3 Mon Sep 17 00:00:00 2001 From: Olivier Grisel <olivier.grisel@ensta.org> Date: Tue, 6 Jul 2010 02:18:45 +0200 Subject: [PATCH] small fixes --- examples/mlcomp_document_classification.py | 4 ++-- scikits/learn/datasets/mlcomp.py | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/examples/mlcomp_document_classification.py b/examples/mlcomp_document_classification.py index 2782a393b8..2c126f488b 100644 --- a/examples/mlcomp_document_classification.py +++ b/examples/mlcomp_document_classification.py @@ -58,9 +58,9 @@ print "done in %fs" % (time() - t0) # The documents have been hashed into TF-IDF (Term Frequencies times Inverse # Document Frequencies) vectors of a fixed dimension. # Currently most scikits.learn wrappers or algorithm implementations are unable -# to leverage efficiently a sparse datastracture; hence we use of a this dense +# to leverage efficiently a sparse datastracture; hence we use a dense # representation of a text dataset. Efficient handling of sparse data -# structures should be expected for in an upcoming version of scikits.learn +# structures should be expected in an upcoming version of scikits.learn print "n_samples: %d, n_features: %d" % news_train.data.shape print "Training a linear classification model with L1 penalty... " diff --git a/scikits/learn/datasets/mlcomp.py b/scikits/learn/datasets/mlcomp.py index f6678dbe32..d47ed5be45 100644 --- a/scikits/learn/datasets/mlcomp.py +++ b/scikits/learn/datasets/mlcomp.py @@ -8,7 +8,7 @@ from scikits.learn.datasets.base import Bunch from scikits.learn.features.text import HashingVectorizer -def load_document_classification(dataset_path, metadata, set_, **kw): +def _load_document_classification(dataset_path, metadata, set_, **kw): """Loader implementation for the DocumentClassification format""" target = [] target_names = {} @@ -39,7 +39,7 @@ def load_document_classification(dataset_path, metadata, set_, **kw): LOADERS = { - 'DocumentClassification': load_document_classification, + 'DocumentClassification': _load_document_classification, # TODO: implement the remaining domain formats } @@ -127,5 +127,3 @@ def load_mlcomp(name_or_id, set_="raw", mlcomp_root=None, **kwargs): return loader(dataset_path, metadata, set_=set_, **kwargs) -if __name__ == "__main__": - twentynews = load_mlcomp('20news-18828') -- GitLab