diff --git a/examples/mlcomp_document_classification.py b/examples/mlcomp_document_classification.py
deleted file mode 100644
index e1936abd3533f3e45d1c6327ae5c3cacea026473..0000000000000000000000000000000000000000
--- a/examples/mlcomp_document_classification.py
+++ /dev/null
@@ -1,104 +0,0 @@
-"""
-================================
-Classification of text documents
-================================
-
-This is an example showing how the scikit-learn can be used to classify
-documents by topics using a bag-of-words approach.
-
-The dataset used in this example is the 20 newsgroups dataset and should be
-downloaded from the http://mlcomp.org (free registration required):
-
-  http://mlcomp.org/datasets/379
-
-Once downloaded unzip the arhive somewhere on your filesystem. For instance in::
-
-  % mkdir -p ~/data/mlcomp
-  % cd  ~/data/mlcomp
-  % unzip /path/to/dataset-379-20news-18828_XXXXX.zip
-
-You should get a folder ``~/data/mlcomp/379`` with a file named ``metadata`` and
-subfolders ``raw``, ``train`` and ``test`` holding the text documents organized by
-newsgroups.
-
-Then set the ``MLCOMP_DATASETS_HOME`` environment variable pointing to
-the root folder holding the uncompressed archive::
-
-  % export MLCOMP_DATASETS_HOME="~/data/mlcomp"
-
-Then you are ready to run this example using your favorite python shell::
-
-  % ipython examples/mlcomp_document_classification.py
-
-"""
-# Author: Olivier Grisel <olivier.grisel@ensta.org>
-# License: Simplified BSD
-
-from time import time
-import sys
-import os
-import numpy as np
-import pylab as pl
-
-from scikits.learn.datasets import load_mlcomp
-from scikits.learn.svm import LinearSVC
-from scikits.learn.metrics import confusion_matrix
-from scikits.learn.metrics import classification_report
-
-if 'MLCOMP_DATASETS_HOME' not in os.environ:
-    print "Please follow those instructions to get started:"
-    print __doc__
-    sys.exit(0)
-
-# Load the training set
-print "Loading 20 newsgroups training set... "
-t0 = time()
-news_train = load_mlcomp('20news-18828', 'train')
-print "done in %fs" % (time() - t0)
-
-# The documents have been hashed into TF-IDF (Term Frequencies times Inverse
-# Document Frequencies) vectors of a fixed dimension.
-# Currently most scikits.learn wrappers or algorithm implementations are unable
-# to leverage efficiently a sparse datastracture; hence we use a dense
-# representation of a text dataset. Efficient handling of sparse data
-# structures should be expected in an upcoming version of scikits.learn
-print "n_samples: %d, n_features: %d" % news_train.data.shape
-
-print "Training a linear classification model with L1 penalty... "
-parameters = {
-    'loss': 'l1',
-    'penalty': 'l2',
-    'C': 10,
-    'dual': True,
-    'eps': 1e-4,
-}
-print "parameters:", parameters
-t0 = time()
-clf = LinearSVC(**parameters).fit(news_train.data, news_train.target)
-print "done in %fs" % (time() - t0)
-print "Percentage of non zeros coef: %f" % (np.mean(clf.coef_ != 0) * 100)
-
-print "Loading 20 newsgroups test set... "
-t0 = time()
-news_test = load_mlcomp('20news-18828', 'test')
-print "done in %fs" % (time() - t0)
-
-print "Predicting the labels of the test set..."
-t0 = time()
-pred = clf.predict(news_test.data)
-print "done in %fs" % (time() - t0)
-
-print "Classification report on test set:"
-print classification_report(news_test.target, pred,
-                            class_names=news_test.target_names)
-
-
-cm = confusion_matrix(news_test.target, pred)
-print "Confusion matrix:"
-print cm
-
-# Show confusion matrix
-pl.matshow(cm)
-pl.title('Confusion matrix')
-pl.colorbar()
-pl.show()
diff --git a/scikits/learn/datasets/mlcomp.py b/scikits/learn/datasets/mlcomp.py
index 4ea38494df254332ced437ce55b04758f0b4f1f9..0096990ff9c5fbb5c71a743a07850db40c27dca5 100644
--- a/scikits/learn/datasets/mlcomp.py
+++ b/scikits/learn/datasets/mlcomp.py
@@ -5,9 +5,6 @@
 import os
 import numpy as np
 from scikits.learn.datasets.base import load_files
-from scikits.learn.feature_extraction.text import HashingVectorizer
-from scikits.learn.feature_extraction.text.sparse import HashingVectorizer as \
-                                                         SparseCountVectorizer
 
 
 def _load_document_classification(dataset_path, metadata, set_=None):
diff --git a/scikits/learn/feature_extraction/tests/test_text.py b/scikits/learn/feature_extraction/tests/test_text.py
index bf5ce70b1e81f9b4310b44ae25944304cbb226d7..f477694d3fb6d5f9e09535348605d6bea9ca955f 100644
--- a/scikits/learn/feature_extraction/tests/test_text.py
+++ b/scikits/learn/feature_extraction/tests/test_text.py
@@ -5,14 +5,12 @@ from scikits.learn.feature_extraction.text import strip_accents
 from scikits.learn.feature_extraction.text import CountVectorizer
 from scikits.learn.feature_extraction.text import TfidfTransformer
 from scikits.learn.feature_extraction.text import Vectorizer
-from scikits.learn.feature_extraction.text import HashingVectorizer
 
 import scikits.learn.feature_extraction.text.sparse as st
 
 SparseCountVectorizer = st.CountVectorizer
 SparseTfidfTransformer = st.TfidfTransformer
 SparseVectorizer = st.Vectorizer
-SparseHashingVectorizer = st.HashingVectorizer
 
 from scikits.learn.grid_search import GridSearchCV
 from scikits.learn.pipeline import Pipeline
@@ -108,72 +106,12 @@ def test_char_ngram_analyzer():
     assert_equal(cnga.analyze(text)[-5:], expected)
 
 
-def test_dense_hashed_tf_idf():
-    hv = HashingVectorizer(dim=1000, probes=3)
-    hv.vectorize(JUNK_FOOD_DOCS)
-    hv.vectorize(NOTJUNK_FOOD_DOCS)
-
-    # extract the TF-IDF data
-    X = hv.get_tfidf()
-    assert_equal(X.shape, (11, 1000))
-
-    # label junk food as -1, the others as +1
-    y = np.ones(X.shape[0])
-    y[:6] = -1
-
-    # train and test a classifier
-    clf = DenseLinearSVC(C=10).fit(X[1:-1], y[1:-1])
-    assert_equal(clf.predict([X[0]]), [-1])
-    assert_equal(clf.predict([X[-1]]), [1])
-
-
-def test_sparse_hashed_tf_idf():
-    hv = SparseHashingVectorizer(dim=1000000, probes=3)
-    hv.vectorize(JUNK_FOOD_DOCS)
-    hv.vectorize(NOTJUNK_FOOD_DOCS)
-
-    # extract the TF-IDF data
-    X = hv.get_tfidf()
-    assert_equal(X.shape, (11, 1000000))
-
-    # label junk food as -1, the others as +1
-    y = np.ones(X.shape[0])
-    y[:6] = -1
-
-    # train and test a classifier
-    clf = SparseLinearSVC(C=10).fit(X[1:-1], y[1:-1])
-    assert_equal(clf.predict(X[0, :]), [-1])
-    assert_equal(clf.predict(X[-1, :]), [1])
-
-
-def test_dense_sparse_hashed_tf_idf_sanity():
-
-    hv = HashingVectorizer(dim=100, probes=3)
-    shv = SparseHashingVectorizer(dim=100, probes=3)
-
-    hv.vectorize(JUNK_FOOD_DOCS)
-    shv.vectorize(JUNK_FOOD_DOCS)
-
-    # check that running TF IDF estimates are the same
-    dense_tf_idf = hv.get_tfidf()
-    sparse_tfidf = shv.get_tfidf().todense()
-
-    assert_array_almost_equal(dense_tf_idf, sparse_tfidf)
-
-    # check that incremental behaviour stays the same
-    hv.vectorize(NOTJUNK_FOOD_DOCS)
-    shv.vectorize(NOTJUNK_FOOD_DOCS)
-
-    dense_tf_idf = hv.get_tfidf()
-    sparse_tfidf = shv.get_tfidf().todense()
-
-    assert_array_almost_equal(dense_tf_idf, sparse_tfidf)
-
 def toarray(a):
     if hasattr(a, "toarray"):
         a = a.toarray()
     return a
 
+
 def _test_vectorizer(cv_class, tf_class, v_class):
     # results to be compared
     res = []
diff --git a/scikits/learn/feature_extraction/text/__init__.py b/scikits/learn/feature_extraction/text/__init__.py
index 8919688b392842b3af97ead4517f838b12233e63..585ea1c806a29ce5c6c81a0e8c8cd4d943a9e0ba 100644
--- a/scikits/learn/feature_extraction/text/__init__.py
+++ b/scikits/learn/feature_extraction/text/__init__.py
@@ -2,5 +2,4 @@
 from .dense import ENGLISH_STOP_WORDS, strip_accents, strip_tags, \
                    DefaultPreprocessor, DEFAULT_PREPROCESSOR, \
                    WordNGramAnalyzer, CharNGramAnalyzer, DEFAULT_ANALYZER, \
-                   CountVectorizer, TfidfTransformer, Vectorizer, \
-                   HashingVectorizer
+                   CountVectorizer, TfidfTransformer, Vectorizer
diff --git a/scikits/learn/feature_extraction/text/dense.py b/scikits/learn/feature_extraction/text/dense.py
index a923c496bf8505272f38f29a7fd00b4b572a68f4..777191633654966fa2b45f0530eaaf09d3c0d87e 100644
--- a/scikits/learn/feature_extraction/text/dense.py
+++ b/scikits/learn/feature_extraction/text/dense.py
@@ -481,100 +481,3 @@ class Vectorizer(BaseVectorizer):
         self.tfidf = TfidfTransformer(use_tf, use_idf)
 
 
-# TODO: refactor the HashingVectorizer implementation to reuse the
-# BaseVectorizer infrastructure as mush as possible and align the API
-
-class HashingVectorizer(object):
-    """Compute term frequencies vectors using hashed term space
-
-    See the Hashing-trick related papers referenced by John Langford on this
-    page to get a grasp on the usefulness of this representation:
-
-      http://hunch.net/~jl/projects/hash_reps/index.html
-
-    dim is the number of buckets, higher dim means lower collision rate but
-    also higher memory requirements and higher processing times on the
-    resulting tfidf vectors.
-
-    Documents is a sequence of lists of tokens to initialize the DF estimates.
-
-    TODO handle bigrams in a smart way such as demonstrated here:
-
-      http://streamhacker.com/2010/05/24/text-classification-sentiment-analysis-stopwords-collocations/
-
-    """
-    # TODO: implement me using the murmurhash that might be faster: but profile
-    # me first :)
-
-    def __init__(self, dim=5000, probes=1, use_idf=True,
-                 analyzer=DEFAULT_ANALYZER):
-        self.dim = dim
-        self.probes = probes
-        self.analyzer = analyzer
-        self.use_idf = use_idf
-
-        # start counts at one to avoid zero division while
-        # computing IDF
-        self.df_counts = np.ones(dim, dtype=long)
-        self.tf_vectors = None
-
-    def hash_sign(self, token, probe=0):
-        """Compute the hash of token with number proble and hashed sign"""
-        h = hash(token + (probe * u"#"))
-        return abs(h) % self.dim, 1.0 if h % 2 == 0 else -1.0
-
-    def _sample_document(self, text, tf_vector, update_estimates=True):
-        """Extract features from text and update running freq estimates"""
-        tokens = self.analyzer.analyze(text)
-        for token in tokens:
-            # TODO add support for cooccurence tokens in a sentence
-            # window
-            for probe in xrange(self.probes):
-                i, incr = self.hash_sign(token, probe)
-                tf_vector[i] += incr
-        tf_vector /= len(tokens) * self.probes
-
-        if update_estimates and self.use_idf:
-            # update the running DF estimate
-            self.df_counts += tf_vector != 0.0
-        return tf_vector
-
-    def get_idf(self):
-        n_samples = float(len(self.tf_vectors))
-        return np.log(n_samples / self.df_counts)
-
-    def get_tfidf(self):
-        """Compute the TF-log(IDF) vectors of the sampled documents"""
-        if self.tf_vectors is None:
-            return None
-        return self.tf_vectors * self.get_idf()
-
-    def vectorize(self, text_documents):
-        """Vectorize a batch of documents in python utf-8 strings or unicode"""
-        tf_vectors = np.zeros((len(text_documents), self.dim))
-        for i, text in enumerate(text_documents):
-            self._sample_document(text, tf_vectors[i])
-
-        if self.tf_vectors is None:
-            self.tf_vectors = tf_vectors
-        else:
-            self.tf_vectors = np.vstack((self.tf_vectors, tf_vectors))
-
-    def vectorize_files(self, document_filepaths):
-        """Vectorize a batch of documents stored in utf-8 text files"""
-        tf_vectors = np.zeros((len(document_filepaths), self.dim))
-        for i, filepath in enumerate(document_filepaths):
-            self._sample_document(file(filepath).read(), tf_vectors[i])
-
-        if self.tf_vectors is None:
-            self.tf_vectors = tf_vectors
-        else:
-            self.tf_vectors = np.vstack((self.tf_vectors, tf_vectors))
-
-    def get_vectors(self):
-        if self.use_idf:
-            return self.get_tfidf()
-        else:
-            return self.tf_vectors
-
-
diff --git a/scikits/learn/feature_extraction/text/sparse.py b/scikits/learn/feature_extraction/text/sparse.py
index c49214d106cf2100212974604d332d390499a664..55bc123cc1621c9047acb3da5412ef97e4c0a075 100644
--- a/scikits/learn/feature_extraction/text/sparse.py
+++ b/scikits/learn/feature_extraction/text/sparse.py
@@ -99,95 +99,3 @@ class Vectorizer(BaseVectorizer):
         self.tfidf = TfidfTransformer(use_tf, use_idf)
 
 
-# TODO: refactor the HashingVectorizer implementation to reuse the
-# BaseVectorizer infrastructure as mush as possible and align the API
-
-class HashingVectorizer(object):
-    """Compute term freq vectors using hashed term space in a sparse matrix
-
-    The logic is the same as HashingVectorizer but it is possible to use much
-    larger dimension vectors without memory issues thanks to the usage of
-    scipy.sparse datastructure to store the tf vectors.
-
-    This function requires scipy 0.7 or higher.
-    """
-
-    def __init__(self, dim=100000, probes=1, use_idf=True,
-                 analyzer=DEFAULT_ANALYZER):
-        self.dim = dim
-        self.probes = probes
-        self.analyzer = analyzer
-        self.use_idf = use_idf
-
-        # start counts at one to avoid zero division while
-        # computing IDF
-        self.df_counts = np.ones(dim, dtype=long)
-        self.tf_vectors = None
-
-    def hash_sign(self, token, probe=0):
-        h = hash(token + (probe * u"#"))
-        return abs(h) % self.dim, 1.0 if h % 2 == 0 else -1.0
-
-    def _sample_document(self, text, tf_vectors, idx=0, update_estimates=True):
-        """Extract features from text and update running freq estimates"""
-
-        tokens = self.analyzer.analyze(text)
-        counts = defaultdict(lambda: 0.0)
-        for token in tokens:
-            # TODO add support for cooccurence tokens in a sentence
-            # window
-            for probe in xrange(self.probes):
-                i, incr = self.hash_sign(token, probe)
-                counts[i] += incr
-        for k, v in counts.iteritems():
-            if v == 0.0:
-                # can happen if equally frequent conflicting features
-                continue
-            tf_vectors[idx, k] = v / (len(tokens) * self.probes)
-
-            if update_estimates and self.use_idf:
-                # update the running DF estimate
-                self.df_counts[k] += 1
-
-    def get_idf(self):
-        n_samples = float(self.tf_vectors.shape[0])
-        return np.log(n_samples / self.df_counts)
-
-    def get_tfidf(self):
-        """Compute the TF-log(IDF) vectors of the sampled documents"""
-        coo = self.tf_vectors.tocoo()
-        tf_idf = sp.lil_matrix(coo.shape)
-        idf = self.get_idf()
-        data, row, col = coo.data, coo.row, coo.col
-        for i in xrange(len(data)):
-            tf_idf[row[i], col[i]] = data[i] * idf[col[i]]
-        return tf_idf.tocsr()
-
-    def vectorize(self, text_documents):
-        """Vectorize a batch of documents in python utf-8 strings or unicode"""
-        tf_vectors = sp.dok_matrix((len(text_documents), self.dim))
-        for i, text in enumerate(text_documents):
-            self._sample_document(text, tf_vectors, i)
-
-        if self.tf_vectors is None:
-            self.tf_vectors = tf_vectors
-        else:
-            self.tf_vectors = sp.vstack((self.tf_vectors, tf_vectors))
-
-    def vectorize_files(self, document_filepaths):
-        """Vectorize a batch of utf-8 text files"""
-        tf_vectors = sp.dok_matrix((len(document_filepaths), self.dim))
-        for i, filepath in enumerate(document_filepaths):
-            self._sample_document(file(filepath).read(), tf_vectors, i)
-
-        if self.tf_vectors is None:
-            self.tf_vectors = tf_vectors
-        else:
-            self.tf_vectors = sp.vstack((self.tf_vectors, tf_vectors))
-
-    def get_vectors(self):
-        if self.use_idf:
-            return self.get_tfidf()
-        else:
-            return self.tf_vectors
-