From 03880d75b751673e9a149975a7a2166c9a0100e5 Mon Sep 17 00:00:00 2001 From: Mathieu Blondel <mathieu@mblondel.org> Date: Fri, 22 Oct 2010 15:30:44 +0900 Subject: [PATCH] Fix hierarchy inconsistency for sparse module. --- scikits/learn/datasets/mlcomp.py | 2 +- .../feature_extraction/sparse/__init__.py | 3 --- .../feature_extraction/tests/test_text.py | 2 +- .../learn/feature_extraction/text/__init__.py | 6 ++++++ .../{text.py => text/dense.py} | 2 +- .../{sparse/text.py => text/sparse.py} | 2 +- .../preprocessing/sparse/src/_preprocessing.c | 18 +++++++++--------- 7 files changed, 19 insertions(+), 16 deletions(-) delete mode 100644 scikits/learn/feature_extraction/sparse/__init__.py create mode 100644 scikits/learn/feature_extraction/text/__init__.py rename scikits/learn/feature_extraction/{text.py => text/dense.py} (99%) rename scikits/learn/feature_extraction/{sparse/text.py => text/sparse.py} (98%) diff --git a/scikits/learn/datasets/mlcomp.py b/scikits/learn/datasets/mlcomp.py index 6bdc20fa99..4c08d371b1 100644 --- a/scikits/learn/datasets/mlcomp.py +++ b/scikits/learn/datasets/mlcomp.py @@ -6,7 +6,7 @@ import os import numpy as np from scikits.learn.datasets.base import load_text_files from scikits.learn.feature_extraction.text import HashingVectorizer -from scikits.learn.feature_extraction.sparse.text import HashingVectorizer as \ +from scikits.learn.feature_extraction.text.sparse import HashingVectorizer as \ SparseCountVectorizer diff --git a/scikits/learn/feature_extraction/sparse/__init__.py b/scikits/learn/feature_extraction/sparse/__init__.py deleted file mode 100644 index fa0f61b5bd..0000000000 --- a/scikits/learn/feature_extraction/sparse/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ - -from .text import CountVectorizer, TfidfTransformer, Vectorizer, \ - HashingVectorizer diff --git a/scikits/learn/feature_extraction/tests/test_text.py b/scikits/learn/feature_extraction/tests/test_text.py index d6124d833e..bf5ce70b1e 100644 --- a/scikits/learn/feature_extraction/tests/test_text.py +++ b/scikits/learn/feature_extraction/tests/test_text.py @@ -7,7 +7,7 @@ from scikits.learn.feature_extraction.text import TfidfTransformer from scikits.learn.feature_extraction.text import Vectorizer from scikits.learn.feature_extraction.text import HashingVectorizer -import scikits.learn.feature_extraction.sparse.text as st +import scikits.learn.feature_extraction.text.sparse as st SparseCountVectorizer = st.CountVectorizer SparseTfidfTransformer = st.TfidfTransformer diff --git a/scikits/learn/feature_extraction/text/__init__.py b/scikits/learn/feature_extraction/text/__init__.py new file mode 100644 index 0000000000..8919688b39 --- /dev/null +++ b/scikits/learn/feature_extraction/text/__init__.py @@ -0,0 +1,6 @@ + +from .dense import ENGLISH_STOP_WORDS, strip_accents, strip_tags, \ + DefaultPreprocessor, DEFAULT_PREPROCESSOR, \ + WordNGramAnalyzer, CharNGramAnalyzer, DEFAULT_ANALYZER, \ + CountVectorizer, TfidfTransformer, Vectorizer, \ + HashingVectorizer diff --git a/scikits/learn/feature_extraction/text.py b/scikits/learn/feature_extraction/text/dense.py similarity index 99% rename from scikits/learn/feature_extraction/text.py rename to scikits/learn/feature_extraction/text/dense.py index a618f7ef94..a923c496bf 100644 --- a/scikits/learn/feature_extraction/text.py +++ b/scikits/learn/feature_extraction/text/dense.py @@ -9,7 +9,7 @@ import re import unicodedata import numpy as np import scipy.sparse as sp -from ..base import BaseEstimator +from ...base import BaseEstimator ENGLISH_STOP_WORDS = set([ "a", "about", "above", "across", "after", "afterwards", "again", "against", diff --git a/scikits/learn/feature_extraction/sparse/text.py b/scikits/learn/feature_extraction/text/sparse.py similarity index 98% rename from scikits/learn/feature_extraction/sparse/text.py rename to scikits/learn/feature_extraction/text/sparse.py index fe57e9fac7..c49214d106 100644 --- a/scikits/learn/feature_extraction/sparse/text.py +++ b/scikits/learn/feature_extraction/text/sparse.py @@ -8,7 +8,7 @@ from collections import defaultdict import numpy as np import scipy.sparse as sp -from ..text import BaseCountVectorizer, BaseTfidfTransformer, BaseVectorizer, \ +from .dense import BaseCountVectorizer, BaseTfidfTransformer, BaseVectorizer, \ DEFAULT_ANALYZER from ...preprocessing.sparse import Normalizer diff --git a/scikits/learn/preprocessing/sparse/src/_preprocessing.c b/scikits/learn/preprocessing/sparse/src/_preprocessing.c index d72abee66f..2d7f64d2b8 100644 --- a/scikits/learn/preprocessing/sparse/src/_preprocessing.c +++ b/scikits/learn/preprocessing/sparse/src/_preprocessing.c @@ -1,4 +1,4 @@ -/* Generated by Cython 0.11.2 on Thu Oct 21 17:42:23 2010 */ +/* Generated by Cython 0.11.2 on Thu Oct 21 19:51:38 2010 */ #define PY_SSIZE_T_CLEAN #include "Python.h" @@ -1166,7 +1166,7 @@ static PyObject *__pyx_pf_14_preprocessing_normalize_length_axis1_sparse(PyObjec * cdef np.ndarray[INTEGER, ndim=1] X_indices = X.indices * cdef np.ndarray[INTEGER, ndim=1] X_indptr = X.indptr # <<<<<<<<<<<<<< * - * # the column indices for row i are stored in indices[indptr[i]:indices[i+1]] + * cdef unsigned int i */ __pyx_t_1 = PyObject_GetAttr(__pyx_v_X, __pyx_kp_indptr); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -1185,7 +1185,7 @@ static PyObject *__pyx_pf_14_preprocessing_normalize_length_axis1_sparse(PyObjec __pyx_v_X_indptr = ((PyArrayObject *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":59 + /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":57 * cdef double sum_ * * for i in xrange(n_samples): # <<<<<<<<<<<<<< @@ -1195,7 +1195,7 @@ static PyObject *__pyx_pf_14_preprocessing_normalize_length_axis1_sparse(PyObjec for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_v_n_samples; __pyx_t_2+=1) { __pyx_v_i = __pyx_t_2; - /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":60 + /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":58 * * for i in xrange(n_samples): * sum_ = 0.0 # <<<<<<<<<<<<<< @@ -1204,7 +1204,7 @@ static PyObject *__pyx_pf_14_preprocessing_normalize_length_axis1_sparse(PyObjec */ __pyx_v_sum_ = 0.0; - /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":62 + /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":60 * sum_ = 0.0 * * for j in xrange(X_indptr[i], X_indptr[i+1]): # <<<<<<<<<<<<<< @@ -1216,7 +1216,7 @@ static PyObject *__pyx_pf_14_preprocessing_normalize_length_axis1_sparse(PyObjec for (__pyx_t_8 = (*__Pyx_BufPtrStrided1d(__pyx_t_14_preprocessing_INTEGER *, __pyx_bstruct_X_indptr.buf, __pyx_t_6, __pyx_bstride_0_X_indptr)); __pyx_t_8 < (*__Pyx_BufPtrStrided1d(__pyx_t_14_preprocessing_INTEGER *, __pyx_bstruct_X_indptr.buf, __pyx_t_7, __pyx_bstride_0_X_indptr)); __pyx_t_8+=1) { __pyx_v_j = __pyx_t_8; - /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":63 + /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":61 * * for j in xrange(X_indptr[i], X_indptr[i+1]): * sum_ += (X_data[j] * X_data[j]) # <<<<<<<<<<<<<< @@ -1228,7 +1228,7 @@ static PyObject *__pyx_pf_14_preprocessing_normalize_length_axis1_sparse(PyObjec __pyx_v_sum_ += ((*__Pyx_BufPtrStrided1d(__pyx_t_14_preprocessing_DOUBLE *, __pyx_bstruct_X_data.buf, __pyx_t_9, __pyx_bstride_0_X_data)) * (*__Pyx_BufPtrStrided1d(__pyx_t_14_preprocessing_DOUBLE *, __pyx_bstruct_X_data.buf, __pyx_t_10, __pyx_bstride_0_X_data))); } - /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":65 + /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":63 * sum_ += (X_data[j] * X_data[j]) * * sum_ = sqrt(sum_) # <<<<<<<<<<<<<< @@ -1237,7 +1237,7 @@ static PyObject *__pyx_pf_14_preprocessing_normalize_length_axis1_sparse(PyObjec */ __pyx_v_sum_ = sqrt(__pyx_v_sum_); - /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":67 + /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":65 * sum_ = sqrt(sum_) * * for j in xrange(X_indptr[i], X_indptr[i+1]): # <<<<<<<<<<<<<< @@ -1249,7 +1249,7 @@ static PyObject *__pyx_pf_14_preprocessing_normalize_length_axis1_sparse(PyObjec for (__pyx_t_12 = (*__Pyx_BufPtrStrided1d(__pyx_t_14_preprocessing_INTEGER *, __pyx_bstruct_X_indptr.buf, __pyx_t_8, __pyx_bstride_0_X_indptr)); __pyx_t_12 < (*__Pyx_BufPtrStrided1d(__pyx_t_14_preprocessing_INTEGER *, __pyx_bstruct_X_indptr.buf, __pyx_t_11, __pyx_bstride_0_X_indptr)); __pyx_t_12+=1) { __pyx_v_j = __pyx_t_12; - /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":68 + /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":66 * * for j in xrange(X_indptr[i], X_indptr[i+1]): * X_data[j] /= sum_ # <<<<<<<<<<<<<< -- GitLab