diff --git a/doc/datasets/twenty_newsgroups.rst b/doc/datasets/twenty_newsgroups.rst
index c4fd379e2111ac07ae2cba8011bc9b9206f99134..f60f405b94a443483fc9925df2ae9302dc152236 100644
--- a/doc/datasets/twenty_newsgroups.rst
+++ b/doc/datasets/twenty_newsgroups.rst
@@ -99,6 +99,9 @@ zero features)::
   >>> vectors.nnz / vectors.shape[0]
   118
 
+``sklearn.datasets.fetch_20newsgroups_tfidf`` is a function which returns 
+ready-to-use tfidf features instead of file names.
+
 .. _`20 newsgroups website`: http://people.csail.mit.edu/jrennie/20Newsgroups/
 .. _`TF-IDF`: http://en.wikipedia.org/wiki/Tf-idf
 
diff --git a/sklearn/datasets/twenty_newsgroups.py b/sklearn/datasets/twenty_newsgroups.py
index 5ce9cdb4add899e44ab0260da4d08cc9fe2803ce..e8ebaf8f8dc711552adf9c244c85e5873dc8247d 100644
--- a/sklearn/datasets/twenty_newsgroups.py
+++ b/sklearn/datasets/twenty_newsgroups.py
@@ -97,7 +97,7 @@ def download_20newsgroups(target_dir, cache_path):
 
 def fetch_20newsgroups(data_home=None, subset='train', categories=None,
                       shuffle=True, random_state=42, download_if_missing=True):
-    """Load the filenames of the 20 newsgroups dataset
+    """Load the filenames of the 20 newsgroups dataset.
 
     Parameters
     ----------
@@ -225,6 +225,7 @@ def fetch_20newsgroups_tfidf(subset="train", data_home=None):
     data_home = get_data_home(data_home=data_home)
     mem = Memory(cachedir=data_home, verbose=False)
 
+    # we shuffle but use a fixed seed for the memoization
     data_train = fetch_20newsgroups(data_home=data_home,
                                     subset='train',
                                     categories=None,