From 5d2a7fd193ce0606ca8b6e5358864eda42359ac3 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Tue, 6 Jul 2010 02:18:45 +0200
Subject: [PATCH] small fixes

---
 examples/mlcomp_document_classification.py | 4 ++--
 scikits/learn/datasets/mlcomp.py           | 6 ++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/examples/mlcomp_document_classification.py b/examples/mlcomp_document_classification.py
index 2782a393b8..2c126f488b 100644
--- a/examples/mlcomp_document_classification.py
+++ b/examples/mlcomp_document_classification.py
@@ -58,9 +58,9 @@ print "done in %fs" % (time() - t0)
 # The documents have been hashed into TF-IDF (Term Frequencies times Inverse
 # Document Frequencies) vectors of a fixed dimension.
 # Currently most scikits.learn wrappers or algorithm implementations are unable
-# to leverage efficiently a sparse datastracture; hence we use of a this dense
+# to leverage efficiently a sparse datastracture; hence we use a dense
 # representation of a text dataset. Efficient handling of sparse data
-# structures should be expected for in an upcoming version of scikits.learn
+# structures should be expected in an upcoming version of scikits.learn
 print "n_samples: %d, n_features: %d" % news_train.data.shape
 
 print "Training a linear classification model with L1 penalty... "
diff --git a/scikits/learn/datasets/mlcomp.py b/scikits/learn/datasets/mlcomp.py
index f6678dbe32..d47ed5be45 100644
--- a/scikits/learn/datasets/mlcomp.py
+++ b/scikits/learn/datasets/mlcomp.py
@@ -8,7 +8,7 @@ from scikits.learn.datasets.base import Bunch
 from scikits.learn.features.text import HashingVectorizer
 
 
-def load_document_classification(dataset_path, metadata, set_, **kw):
+def _load_document_classification(dataset_path, metadata, set_, **kw):
     """Loader implementation for the DocumentClassification format"""
     target = []
     target_names = {}
@@ -39,7 +39,7 @@ def load_document_classification(dataset_path, metadata, set_, **kw):
 
 
 LOADERS = {
-    'DocumentClassification': load_document_classification,
+    'DocumentClassification': _load_document_classification,
     # TODO: implement the remaining domain formats
 }
 
@@ -127,5 +127,3 @@ def load_mlcomp(name_or_id, set_="raw", mlcomp_root=None, **kwargs):
     return loader(dataset_path, metadata, set_=set_, **kwargs)
 
 
-if __name__ == "__main__":
-    twentynews = load_mlcomp('20news-18828')
-- 
GitLab