diff --git a/scikits/learn/datasets/mlcomp.py b/scikits/learn/datasets/mlcomp.py
index bde4a7f1ffb10cec865e268e68723f7f1f1ebe4f..f6a79eb8f94a1a7a690bf457e94efe0d5b89b655 100644
--- a/scikits/learn/datasets/mlcomp.py
+++ b/scikits/learn/datasets/mlcomp.py
@@ -11,6 +11,7 @@ def load_document_classification(dataset_path, metadata, set_, **kw):
     """Loader implementation for the DocumentClassification format"""
     target = []
     target_names = {}
+    filenames = []
     vectorizer = kw.get('vectorizer', HashingVectorizer())
 
     dataset_path = os.path.join(dataset_path, set_)
@@ -23,9 +24,11 @@ def load_document_classification(dataset_path, metadata, set_, **kw):
                      for d in sorted(os.listdir(folder_path))]
         vectorizer.vectorize(documents)
         target.extend(len(documents) * [label])
+        filenames.extend(documents)
 
     return Bunch(data=vectorizer.get_vectors(), target=target,
-                 target_names=target_names, DESCR=metadata.get('description'))
+                 target_names=target_names, filenames=filenames,
+                 DESCR=metadata.get('description'))
 
 
 LOADERS = {