small fixes

5d2a7fd1 · Olivier Grisel · a45a01ed · 5d2a7fd1 · 5d2a7fd1
Commit 5d2a7fd1 authored 14 years ago by Olivier Grisel
--- a/examples/mlcomp_document_classification.py
+++ b/examples/mlcomp_document_classification.py
@@ -58,9 +58,9 @@ print "done in %fs" % (time() - t0)
 # The documents have been hashed into TF-IDF (Term Frequencies times Inverse
 # Document Frequencies) vectors of a fixed dimension.
 # Currently most scikits.learn wrappers or algorithm implementations are unable
-# to leverage efficiently a sparse datastracture; hence we use of a this dense
+# to leverage efficiently a sparse datastracture; hence we use a dense
 # representation of a text dataset. Efficient handling of sparse data
-# structures should be expected for in an upcoming version of scikits.learn
+# structures should be expected in an upcoming version of scikits.learn
 print "n_samples: %d, n_features: %d" % news_train.data.shape

 print "Training a linear classification model with L1 penalty... "

--- a/scikits/learn/datasets/mlcomp.py
+++ b/scikits/learn/datasets/mlcomp.py
@@ -8,7 +8,7 @@ from scikits.learn.datasets.base import Bunch
 from scikits.learn.features.text import HashingVectorizer


-def load_document_classification(dataset_path, metadata, set_, **kw):
+def _load_document_classification(dataset_path, metadata, set_, **kw):
    """Loader implementation for the DocumentClassification format"""
    target = []
    target_names = {}
@@ -39,7 +39,7 @@ def load_document_classification(dataset_path, metadata, set_, **kw):


 LOADERS = {
-    'DocumentClassification': load_document_classification,
+    'DocumentClassification': _load_document_classification,
    # TODO: implement the remaining domain formats
 }

@@ -127,5 +127,3 @@ def load_mlcomp(name_or_id, set_="raw", mlcomp_root=None, **kwargs):
    return loader(dataset_path, metadata, set_=set_, **kwargs)


-if __name__ == "__main__":
-    twentynews = load_mlcomp('20news-18828')