From 03880d75b751673e9a149975a7a2166c9a0100e5 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Fri, 22 Oct 2010 15:30:44 +0900
Subject: [PATCH] Fix hierarchy inconsistency for sparse module.

---
 scikits/learn/datasets/mlcomp.py               |  2 +-
 .../feature_extraction/sparse/__init__.py      |  3 ---
 .../feature_extraction/tests/test_text.py      |  2 +-
 .../learn/feature_extraction/text/__init__.py  |  6 ++++++
 .../{text.py => text/dense.py}                 |  2 +-
 .../{sparse/text.py => text/sparse.py}         |  2 +-
 .../preprocessing/sparse/src/_preprocessing.c  | 18 +++++++++---------
 7 files changed, 19 insertions(+), 16 deletions(-)
 delete mode 100644 scikits/learn/feature_extraction/sparse/__init__.py
 create mode 100644 scikits/learn/feature_extraction/text/__init__.py
 rename scikits/learn/feature_extraction/{text.py => text/dense.py} (99%)
 rename scikits/learn/feature_extraction/{sparse/text.py => text/sparse.py} (98%)

diff --git a/scikits/learn/datasets/mlcomp.py b/scikits/learn/datasets/mlcomp.py
index 6bdc20fa99..4c08d371b1 100644
--- a/scikits/learn/datasets/mlcomp.py
+++ b/scikits/learn/datasets/mlcomp.py
@@ -6,7 +6,7 @@ import os
 import numpy as np
 from scikits.learn.datasets.base import load_text_files
 from scikits.learn.feature_extraction.text import HashingVectorizer
-from scikits.learn.feature_extraction.sparse.text import HashingVectorizer as \
+from scikits.learn.feature_extraction.text.sparse import HashingVectorizer as \
                                                          SparseCountVectorizer
 
 
diff --git a/scikits/learn/feature_extraction/sparse/__init__.py b/scikits/learn/feature_extraction/sparse/__init__.py
deleted file mode 100644
index fa0f61b5bd..0000000000
--- a/scikits/learn/feature_extraction/sparse/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-
-from .text import CountVectorizer, TfidfTransformer, Vectorizer, \
-                  HashingVectorizer
diff --git a/scikits/learn/feature_extraction/tests/test_text.py b/scikits/learn/feature_extraction/tests/test_text.py
index d6124d833e..bf5ce70b1e 100644
--- a/scikits/learn/feature_extraction/tests/test_text.py
+++ b/scikits/learn/feature_extraction/tests/test_text.py
@@ -7,7 +7,7 @@ from scikits.learn.feature_extraction.text import TfidfTransformer
 from scikits.learn.feature_extraction.text import Vectorizer
 from scikits.learn.feature_extraction.text import HashingVectorizer
 
-import scikits.learn.feature_extraction.sparse.text as st
+import scikits.learn.feature_extraction.text.sparse as st
 
 SparseCountVectorizer = st.CountVectorizer
 SparseTfidfTransformer = st.TfidfTransformer
diff --git a/scikits/learn/feature_extraction/text/__init__.py b/scikits/learn/feature_extraction/text/__init__.py
new file mode 100644
index 0000000000..8919688b39
--- /dev/null
+++ b/scikits/learn/feature_extraction/text/__init__.py
@@ -0,0 +1,6 @@
+
+from .dense import ENGLISH_STOP_WORDS, strip_accents, strip_tags, \
+                   DefaultPreprocessor, DEFAULT_PREPROCESSOR, \
+                   WordNGramAnalyzer, CharNGramAnalyzer, DEFAULT_ANALYZER, \
+                   CountVectorizer, TfidfTransformer, Vectorizer, \
+                   HashingVectorizer
diff --git a/scikits/learn/feature_extraction/text.py b/scikits/learn/feature_extraction/text/dense.py
similarity index 99%
rename from scikits/learn/feature_extraction/text.py
rename to scikits/learn/feature_extraction/text/dense.py
index a618f7ef94..a923c496bf 100644
--- a/scikits/learn/feature_extraction/text.py
+++ b/scikits/learn/feature_extraction/text/dense.py
@@ -9,7 +9,7 @@ import re
 import unicodedata
 import numpy as np
 import scipy.sparse as sp
-from ..base import BaseEstimator
+from ...base import BaseEstimator
 
 ENGLISH_STOP_WORDS = set([
     "a", "about", "above", "across", "after", "afterwards", "again", "against",
diff --git a/scikits/learn/feature_extraction/sparse/text.py b/scikits/learn/feature_extraction/text/sparse.py
similarity index 98%
rename from scikits/learn/feature_extraction/sparse/text.py
rename to scikits/learn/feature_extraction/text/sparse.py
index fe57e9fac7..c49214d106 100644
--- a/scikits/learn/feature_extraction/sparse/text.py
+++ b/scikits/learn/feature_extraction/text/sparse.py
@@ -8,7 +8,7 @@ from collections import defaultdict
 import numpy as np
 import scipy.sparse as sp
 
-from ..text import BaseCountVectorizer, BaseTfidfTransformer, BaseVectorizer, \
+from .dense import BaseCountVectorizer, BaseTfidfTransformer, BaseVectorizer, \
                    DEFAULT_ANALYZER
 
 from ...preprocessing.sparse import Normalizer
diff --git a/scikits/learn/preprocessing/sparse/src/_preprocessing.c b/scikits/learn/preprocessing/sparse/src/_preprocessing.c
index d72abee66f..2d7f64d2b8 100644
--- a/scikits/learn/preprocessing/sparse/src/_preprocessing.c
+++ b/scikits/learn/preprocessing/sparse/src/_preprocessing.c
@@ -1,4 +1,4 @@
-/* Generated by Cython 0.11.2 on Thu Oct 21 17:42:23 2010 */
+/* Generated by Cython 0.11.2 on Thu Oct 21 19:51:38 2010 */
 
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
@@ -1166,7 +1166,7 @@ static PyObject *__pyx_pf_14_preprocessing_normalize_length_axis1_sparse(PyObjec
  *     cdef np.ndarray[INTEGER, ndim=1] X_indices = X.indices
  *     cdef np.ndarray[INTEGER, ndim=1] X_indptr = X.indptr             # <<<<<<<<<<<<<<
  * 
- *     # the column indices for row i are stored in indices[indptr[i]:indices[i+1]]
+ *     cdef unsigned int i
  */
   __pyx_t_1 = PyObject_GetAttr(__pyx_v_X, __pyx_kp_indptr); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
@@ -1185,7 +1185,7 @@ static PyObject *__pyx_pf_14_preprocessing_normalize_length_axis1_sparse(PyObjec
   __pyx_v_X_indptr = ((PyArrayObject *)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":59
+  /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":57
  *     cdef double sum_
  * 
  *     for i in xrange(n_samples):             # <<<<<<<<<<<<<<
@@ -1195,7 +1195,7 @@ static PyObject *__pyx_pf_14_preprocessing_normalize_length_axis1_sparse(PyObjec
   for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_v_n_samples; __pyx_t_2+=1) {
     __pyx_v_i = __pyx_t_2;
 
-    /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":60
+    /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":58
  * 
  *     for i in xrange(n_samples):
  *         sum_ = 0.0             # <<<<<<<<<<<<<<
@@ -1204,7 +1204,7 @@ static PyObject *__pyx_pf_14_preprocessing_normalize_length_axis1_sparse(PyObjec
  */
     __pyx_v_sum_ = 0.0;
 
-    /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":62
+    /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":60
  *         sum_ = 0.0
  * 
  *         for j in xrange(X_indptr[i], X_indptr[i+1]):             # <<<<<<<<<<<<<<
@@ -1216,7 +1216,7 @@ static PyObject *__pyx_pf_14_preprocessing_normalize_length_axis1_sparse(PyObjec
     for (__pyx_t_8 = (*__Pyx_BufPtrStrided1d(__pyx_t_14_preprocessing_INTEGER *, __pyx_bstruct_X_indptr.buf, __pyx_t_6, __pyx_bstride_0_X_indptr)); __pyx_t_8 < (*__Pyx_BufPtrStrided1d(__pyx_t_14_preprocessing_INTEGER *, __pyx_bstruct_X_indptr.buf, __pyx_t_7, __pyx_bstride_0_X_indptr)); __pyx_t_8+=1) {
       __pyx_v_j = __pyx_t_8;
 
-      /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":63
+      /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":61
  * 
  *         for j in xrange(X_indptr[i], X_indptr[i+1]):
  *             sum_ += (X_data[j] * X_data[j])             # <<<<<<<<<<<<<<
@@ -1228,7 +1228,7 @@ static PyObject *__pyx_pf_14_preprocessing_normalize_length_axis1_sparse(PyObjec
       __pyx_v_sum_ += ((*__Pyx_BufPtrStrided1d(__pyx_t_14_preprocessing_DOUBLE *, __pyx_bstruct_X_data.buf, __pyx_t_9, __pyx_bstride_0_X_data)) * (*__Pyx_BufPtrStrided1d(__pyx_t_14_preprocessing_DOUBLE *, __pyx_bstruct_X_data.buf, __pyx_t_10, __pyx_bstride_0_X_data)));
     }
 
-    /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":65
+    /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":63
  *             sum_ += (X_data[j] * X_data[j])
  * 
  *         sum_ = sqrt(sum_)             # <<<<<<<<<<<<<<
@@ -1237,7 +1237,7 @@ static PyObject *__pyx_pf_14_preprocessing_normalize_length_axis1_sparse(PyObjec
  */
     __pyx_v_sum_ = sqrt(__pyx_v_sum_);
 
-    /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":67
+    /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":65
  *         sum_ = sqrt(sum_)
  * 
  *         for j in xrange(X_indptr[i], X_indptr[i+1]):             # <<<<<<<<<<<<<<
@@ -1249,7 +1249,7 @@ static PyObject *__pyx_pf_14_preprocessing_normalize_length_axis1_sparse(PyObjec
     for (__pyx_t_12 = (*__Pyx_BufPtrStrided1d(__pyx_t_14_preprocessing_INTEGER *, __pyx_bstruct_X_indptr.buf, __pyx_t_8, __pyx_bstride_0_X_indptr)); __pyx_t_12 < (*__Pyx_BufPtrStrided1d(__pyx_t_14_preprocessing_INTEGER *, __pyx_bstruct_X_indptr.buf, __pyx_t_11, __pyx_bstride_0_X_indptr)); __pyx_t_12+=1) {
       __pyx_v_j = __pyx_t_12;
 
-      /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":68
+      /* "/home/mathieu/Desktop/projects/scikit-learn/scikits/learn/preprocessing/sparse/src/_preprocessing.pyx":66
  * 
  *         for j in xrange(X_indptr[i], X_indptr[i+1]):
  *             X_data[j] /= sum_             # <<<<<<<<<<<<<<
-- 
GitLab