From fa6fafcfdbd3cea4583fd63f45f7b80e76de74e7 Mon Sep 17 00:00:00 2001 From: Nelson Liu <nelson.liu.2009@gmail.com> Date: Wed, 5 Oct 2016 10:59:26 -0700 Subject: [PATCH] [MRG + 1] fix test_20news_vectorized (#7431) Run test if dataset has already been downloaded rather than always skipping it --- sklearn/datasets/tests/test_20news.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/sklearn/datasets/tests/test_20news.py b/sklearn/datasets/tests/test_20news.py index b13dd215f9..948d16f57a 100644 --- a/sklearn/datasets/tests/test_20news.py +++ b/sklearn/datasets/tests/test_20news.py @@ -57,23 +57,29 @@ def test_20news_length_consistency(): def test_20news_vectorized(): - # This test is slow. - raise SkipTest("Test too slow.") + try: + datasets.fetch_20newsgroups(subset='all', + download_if_missing=False) + except IOError: + raise SkipTest("Download 20 newsgroups to run this test") + # test subset = train bunch = datasets.fetch_20newsgroups_vectorized(subset="train") assert_true(sp.isspmatrix_csr(bunch.data)) - assert_equal(bunch.data.shape, (11314, 107428)) + assert_equal(bunch.data.shape, (11314, 130107)) assert_equal(bunch.target.shape[0], 11314) assert_equal(bunch.data.dtype, np.float64) + # test subset = test bunch = datasets.fetch_20newsgroups_vectorized(subset="test") assert_true(sp.isspmatrix_csr(bunch.data)) - assert_equal(bunch.data.shape, (7532, 107428)) + assert_equal(bunch.data.shape, (7532, 130107)) assert_equal(bunch.target.shape[0], 7532) assert_equal(bunch.data.dtype, np.float64) - bunch = datasets.fetch_20newsgroups_vectorized(subset="all") + # test subset = all + bunch = datasets.fetch_20newsgroups_vectorized(subset='all') assert_true(sp.isspmatrix_csr(bunch.data)) - assert_equal(bunch.data.shape, (11314 + 7532, 107428)) + assert_equal(bunch.data.shape, (11314 + 7532, 130107)) assert_equal(bunch.target.shape[0], 11314 + 7532) assert_equal(bunch.data.dtype, np.float64) -- GitLab