From 7525e14b31dc93c926b9e2154b1c83daf460af68 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel <mathieu@mblondel.org> Date: Thu, 21 Oct 2010 12:44:45 +0900 Subject: [PATCH] Remove Sparse prefix from class names. --- scikits/learn/datasets/mlcomp.py | 3 ++- .../feature_extraction/sparse/__init__.py | 4 ++-- .../learn/feature_extraction/sparse/text.py | 14 +++++++------- .../feature_extraction/tests/test_text.py | 19 ++++++++++++------- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/scikits/learn/datasets/mlcomp.py b/scikits/learn/datasets/mlcomp.py index fa14da4015..6bdc20fa99 100644 --- a/scikits/learn/datasets/mlcomp.py +++ b/scikits/learn/datasets/mlcomp.py @@ -6,7 +6,8 @@ import os import numpy as np from scikits.learn.datasets.base import load_text_files from scikits.learn.feature_extraction.text import HashingVectorizer -from scikits.learn.feature_extraction.sparse.text import SparseHashingVectorizer +from scikits.learn.feature_extraction.sparse.text import HashingVectorizer as \ + SparseCountVectorizer def _load_document_classification(dataset_path, metadata, set_=None): diff --git a/scikits/learn/feature_extraction/sparse/__init__.py b/scikits/learn/feature_extraction/sparse/__init__.py index 890ce8f7f1..fa0f61b5bd 100644 --- a/scikits/learn/feature_extraction/sparse/__init__.py +++ b/scikits/learn/feature_extraction/sparse/__init__.py @@ -1,3 +1,3 @@ -from .text import SparseCountVectorizer, SparseTfidfTransformer, \ - SparseVectorizer, SparseHashingVectorizer +from .text import CountVectorizer, TfidfTransformer, Vectorizer, \ + HashingVectorizer diff --git a/scikits/learn/feature_extraction/sparse/text.py b/scikits/learn/feature_extraction/sparse/text.py index 1ba7dd7174..290f4d9f4b 100644 --- a/scikits/learn/feature_extraction/sparse/text.py +++ b/scikits/learn/feature_extraction/sparse/text.py @@ -11,12 +11,12 @@ import scipy.sparse as sp from ..text import BaseCountVectorizer, BaseTfidfTransformer, BaseVectorizer, \ DEFAULT_ANALYZER -class SparseCountVectorizer(BaseCountVectorizer): +class CountVectorizer(BaseCountVectorizer): def _init_matrix(self, shape): return sp.dok_matrix(shape, dtype=self.dtype) -class SparseTfidfTransformer(BaseTfidfTransformer): +class TfidfTransformer(BaseTfidfTransformer): def fit(self, X, y=None): """ @@ -73,11 +73,11 @@ class SparseTfidfTransformer(BaseTfidfTransformer): return X -class SparseVectorizer(BaseVectorizer): +class Vectorizer(BaseVectorizer): """ Convert a collection of raw documents to a sparse matrix. - Equivalent to SparseCountVectorizer followed by SparseTfidfTransformer. + Equivalent to CountVectorizer followed by TfidfTransformer. """ def __init__(self, @@ -85,10 +85,10 @@ class SparseVectorizer(BaseVectorizer): use_tf=True, use_idf=True, normalize=False): - self.tc = SparseCountVectorizer(analyzer, dtype=np.float64) - self.tfidf = SparseTfidfTransformer(use_tf, use_idf, normalize) + self.tc = CountVectorizer(analyzer, dtype=np.float64) + self.tfidf = TfidfTransformer(use_tf, use_idf, normalize) -class SparseHashingVectorizer(object): +class HashingVectorizer(object): """Compute term freq vectors using hashed term space in a sparse matrix The logic is the same as HashingVectorizer but it is possible to use much diff --git a/scikits/learn/feature_extraction/tests/test_text.py b/scikits/learn/feature_extraction/tests/test_text.py index f741ce6a48..2ce2e325b1 100644 --- a/scikits/learn/feature_extraction/tests/test_text.py +++ b/scikits/learn/feature_extraction/tests/test_text.py @@ -1,14 +1,19 @@ from scikits.learn.feature_extraction.text import CharNGramAnalyzer +from scikits.learn.feature_extraction.text import WordNGramAnalyzer +from scikits.learn.feature_extraction.text import strip_accents + from scikits.learn.feature_extraction.text import CountVectorizer -from scikits.learn.feature_extraction.text import HashingVectorizer from scikits.learn.feature_extraction.text import TfidfTransformer from scikits.learn.feature_extraction.text import Vectorizer -from scikits.learn.feature_extraction.sparse.text import SparseCountVectorizer -from scikits.learn.feature_extraction.sparse.text import SparseHashingVectorizer -from scikits.learn.feature_extraction.sparse.text import SparseTfidfTransformer -from scikits.learn.feature_extraction.sparse.text import SparseVectorizer -from scikits.learn.feature_extraction.text import WordNGramAnalyzer -from scikits.learn.feature_extraction.text import strip_accents +from scikits.learn.feature_extraction.text import HashingVectorizer + +import scikits.learn.feature_extraction.sparse.text as st + +SparseCountVectorizer = st.CountVectorizer +SparseTfidfTransformer = st.TfidfTransformer +SparseVectorizer = st.Vectorizer +SparseHashingVectorizer = st.HashingVectorizer + from scikits.learn.grid_search import GridSearchCV from scikits.learn.pipeline import Pipeline from scikits.learn.svm import LinearSVC as DenseLinearSVC -- GitLab