diff --git a/scikits/learn/feature_extraction/sparse/text.py b/scikits/learn/feature_extraction/sparse/text.py
index 9f51097d411f2295550272725ef44c8c22190061..fe57e9fac76a370484059c7d6b5e080550a6321a 100644
--- a/scikits/learn/feature_extraction/sparse/text.py
+++ b/scikits/learn/feature_extraction/sparse/text.py
@@ -13,16 +13,34 @@ from ..text import BaseCountVectorizer, BaseTfidfTransformer, BaseVectorizer, \
 
 from ...preprocessing.sparse import Normalizer
 
+
 class CountVectorizer(BaseCountVectorizer):
+    """Convert a collection of raw documents to a matrix of token counts
+
+    This implementation produces a sparse representation of the counts using
+    a scipy.sparse.dok_matrix (dictionary of keys).
+
+    Parameters
+    ----------
+    analyzer: WordNGramAnalyzer or CharNGramAnalyzer, optional
+
+    vocabulary: dict, optional
+        A dictionary where keys are tokens and values are indices in the
+        matrix.
+        This is useful in order to fix the vocabulary in advance.
+
+    dtype: type, optional
+        Type of the matrix returned by fit_transform() or transform().
+    """
 
     def _init_matrix(self, shape):
         return sp.dok_matrix(shape, dtype=self.dtype)
 
+
 class TfidfTransformer(BaseTfidfTransformer):
 
     def fit(self, X, y=None):
-        """
-        Learn the IDF vector (global term weights).
+        """Learn the IDF vector (global term weights)
 
         Parameters
         ----------
@@ -41,8 +59,7 @@ class TfidfTransformer(BaseTfidfTransformer):
         return self
 
     def transform(self, X, copy=True):
-        """
-        Transform a count matrix to a TF or TF-IDF representation.
+        """Transform a count matrix to a TF or TF-IDF representation
 
         Parameters
         ----------
@@ -67,9 +84,9 @@ class TfidfTransformer(BaseTfidfTransformer):
 
         return X
 
+
 class Vectorizer(BaseVectorizer):
-    """
-    Convert a collection of raw documents to a sparse matrix.
+    """Convert a collection of raw documents to a sparse matrix
 
     Equivalent to CountVectorizer followed by TfidfTransformer.
     """
@@ -81,6 +98,10 @@ class Vectorizer(BaseVectorizer):
         self.tc = CountVectorizer(analyzer, dtype=np.float64)
         self.tfidf = TfidfTransformer(use_tf, use_idf)
 
+
+# TODO: refactor the HashingVectorizer implementation to reuse the
+# BaseVectorizer infrastructure as mush as possible and align the API
+
 class HashingVectorizer(object):
     """Compute term freq vectors using hashed term space in a sparse matrix
 
diff --git a/scikits/learn/feature_extraction/text.py b/scikits/learn/feature_extraction/text.py
index d9a6b0e74fc0892263855ce87ef688cd235e4632..acd91178bee2e8e665b3fcae9c6ff8833fb89e81 100644
--- a/scikits/learn/feature_extraction/text.py
+++ b/scikits/learn/feature_extraction/text.py
@@ -167,7 +167,7 @@ class BaseCountVectorizer(BaseEstimator):
     """Convert a collection of raw documents to a matrix of token counts
 
     This class can't be used directly, use either CountVectorizer or
-    SparseCountVectorizer.
+    sparse.CountVectorizer.
 
     Parameters
     ----------
@@ -292,14 +292,14 @@ class CountVectorizer(BaseCountVectorizer):
     """Convert a collection of raw documents to a matrix of token counts
 
     This implementation produces a dense representation of the counts using
-    numpy array.
+    a numpy array.
 
     If you do not provide an a-priori dictionary and you do not use
     an analyzer that does some kind of feature selection then the number of
     features (the vocabulary size found by analysing the data) might be very
     large and the count vectors might not fit in memory.
 
-    For this case it is either recommended to use the SparseCountVectorizer
+    For this case it is either recommended to use the sparse.CountVectorizer
     variant of this class or a HashingVectorizer that will reduce the
     dimensionality to an arbitrary number by using random projection.
 
@@ -322,13 +322,15 @@ class CountVectorizer(BaseCountVectorizer):
 
 
 class BaseTfidfTransformer(BaseEstimator):
-    """
-    Transform a count matrix to a TF (term-frequency)
-    or TF-IDF (term-frequency inverse-document-frequency)
-    representation.
+    """Transform a count matrix to a TF or TF-IDF representation
+
+    TF means term-frequency while TF-IDF means term-frequency times inverse
+    document-frequency:
+
+      http://en.wikipedia.org/wiki/TF-IDF
 
     This class can't be used directly, use either TfidfTransformer or
-    SparseTfidfTransformer.
+    sparse.TfidfTransformer.
 
     Parameters
     ----------
@@ -347,7 +349,30 @@ class BaseTfidfTransformer(BaseEstimator):
 
 
 class TfidfTransformer(BaseTfidfTransformer):
-    # TODO: write docstring!
+    """Transform a count matrix to a TF or TF-IDF representation
+
+    TF means term-frequency while TF-IDF means term-frequency times inverse
+    document-frequency:
+
+      http://en.wikipedia.org/wiki/TF-IDF
+
+    The goal of using TF-IDF instead of the raw frequencies of occurrence of a
+    token in a given document is to scale down the impact of tokens that occur
+    very frequently in a given corpus and that are hence empirically less
+    informative than feature that occur in a small fraction of the training
+    corpus.
+
+    TF-IDF can be seen as a smooth alternative to the stop words filtering.
+
+    Parameters
+    ----------
+
+    use_tf: boolean
+        enable term-frequency normalization
+
+    use_idf: boolean
+        enable inverse-document-frequency reweighting
+    """
 
     def fit(self, X, y=None):
         """Learn the IDF vector (global term weights)
@@ -394,7 +419,7 @@ class BaseVectorizer(BaseEstimator):
     """Convert a collection of raw documents to a matrix
 
     This class can't be used directly, use either Vectorizer or
-    SparseVectorizer.
+    sparse.Vectorizer.
     """
 
     def fit(self, raw_documents):