Skip to content
Snippets Groups Projects
Commit 5d2a7fd1 authored by Olivier Grisel's avatar Olivier Grisel
Browse files

small fixes

parent a45a01ed
No related branches found
No related tags found
No related merge requests found
......@@ -58,9 +58,9 @@ print "done in %fs" % (time() - t0)
# The documents have been hashed into TF-IDF (Term Frequencies times Inverse
# Document Frequencies) vectors of a fixed dimension.
# Currently most scikits.learn wrappers or algorithm implementations are unable
# to leverage efficiently a sparse datastracture; hence we use of a this dense
# to leverage efficiently a sparse datastracture; hence we use a dense
# representation of a text dataset. Efficient handling of sparse data
# structures should be expected for in an upcoming version of scikits.learn
# structures should be expected in an upcoming version of scikits.learn
print "n_samples: %d, n_features: %d" % news_train.data.shape
print "Training a linear classification model with L1 penalty... "
......
......@@ -8,7 +8,7 @@ from scikits.learn.datasets.base import Bunch
from scikits.learn.features.text import HashingVectorizer
def load_document_classification(dataset_path, metadata, set_, **kw):
def _load_document_classification(dataset_path, metadata, set_, **kw):
"""Loader implementation for the DocumentClassification format"""
target = []
target_names = {}
......@@ -39,7 +39,7 @@ def load_document_classification(dataset_path, metadata, set_, **kw):
LOADERS = {
'DocumentClassification': load_document_classification,
'DocumentClassification': _load_document_classification,
# TODO: implement the remaining domain formats
}
......@@ -127,5 +127,3 @@ def load_mlcomp(name_or_id, set_="raw", mlcomp_root=None, **kwargs):
return loader(dataset_path, metadata, set_=set_, **kwargs)
if __name__ == "__main__":
twentynews = load_mlcomp('20news-18828')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment