From fa6fafcfdbd3cea4583fd63f45f7b80e76de74e7 Mon Sep 17 00:00:00 2001
From: Nelson Liu <nelson.liu.2009@gmail.com>
Date: Wed, 5 Oct 2016 10:59:26 -0700
Subject: [PATCH] [MRG + 1] fix test_20news_vectorized (#7431)

Run test if dataset has already been downloaded rather than always skipping it
---
 sklearn/datasets/tests/test_20news.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/sklearn/datasets/tests/test_20news.py b/sklearn/datasets/tests/test_20news.py
index b13dd215f9..948d16f57a 100644
--- a/sklearn/datasets/tests/test_20news.py
+++ b/sklearn/datasets/tests/test_20news.py
@@ -57,23 +57,29 @@ def test_20news_length_consistency():
 
 
 def test_20news_vectorized():
-    # This test is slow.
-    raise SkipTest("Test too slow.")
+    try:
+        datasets.fetch_20newsgroups(subset='all',
+                                    download_if_missing=False)
+    except IOError:
+        raise SkipTest("Download 20 newsgroups to run this test")
 
+    # test subset = train
     bunch = datasets.fetch_20newsgroups_vectorized(subset="train")
     assert_true(sp.isspmatrix_csr(bunch.data))
-    assert_equal(bunch.data.shape, (11314, 107428))
+    assert_equal(bunch.data.shape, (11314, 130107))
     assert_equal(bunch.target.shape[0], 11314)
     assert_equal(bunch.data.dtype, np.float64)
 
+    # test subset = test
     bunch = datasets.fetch_20newsgroups_vectorized(subset="test")
     assert_true(sp.isspmatrix_csr(bunch.data))
-    assert_equal(bunch.data.shape, (7532, 107428))
+    assert_equal(bunch.data.shape, (7532, 130107))
     assert_equal(bunch.target.shape[0], 7532)
     assert_equal(bunch.data.dtype, np.float64)
 
-    bunch = datasets.fetch_20newsgroups_vectorized(subset="all")
+    # test subset = all
+    bunch = datasets.fetch_20newsgroups_vectorized(subset='all')
     assert_true(sp.isspmatrix_csr(bunch.data))
-    assert_equal(bunch.data.shape, (11314 + 7532, 107428))
+    assert_equal(bunch.data.shape, (11314 + 7532, 130107))
     assert_equal(bunch.target.shape[0], 11314 + 7532)
     assert_equal(bunch.data.dtype, np.float64)
-- 
GitLab