diff --git a/sklearn/datasets/california_housing.py b/sklearn/datasets/california_housing.py index c109fee6185d8bff5b918712ea46a4f2b5050eb8..8a74ad9e60e35275ee28e01dd9a02c98754cffb7 100644 --- a/sklearn/datasets/california_housing.py +++ b/sklearn/datasets/california_housing.py @@ -87,8 +87,12 @@ def fetch_california_housing(data_home=None, download_if_missing=True): data_home = get_data_home(data_home=data_home) if not exists(data_home): makedirs(data_home) + filepath = _pkl_filepath(data_home, TARGET_FILENAME) if not exists(filepath): + if not download_if_missing: + raise IOError("Data not found and `download_if_missing` is False") + print('downloading Cal. housing from %s to %s' % (DATA_URL, data_home)) archive_fileobj = BytesIO(urlopen(DATA_URL).read()) fileobj = tarfile.open( diff --git a/sklearn/datasets/covtype.py b/sklearn/datasets/covtype.py index f7cb1ed03f36b1a1fb0500d6064f03f7692c2b14..6e0b4d2d0d21c739093d6debb36f008c559cddfd 100644 --- a/sklearn/datasets/covtype.py +++ b/sklearn/datasets/covtype.py @@ -99,6 +99,9 @@ def fetch_covtype(data_home=None, download_if_missing=True, joblib.dump(X, samples_path, compress=9) joblib.dump(y, targets_path, compress=9) + elif not available: + if not download_if_missing: + raise IOError("Data not found and `download_if_missing` is False") try: X, y diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py index 824809a80edd67dd874f064fefa27a22e0349925..03bf3f8d8fdef9177886ac3336c0de30c1e8caed 100644 --- a/sklearn/datasets/kddcup99.py +++ b/sklearn/datasets/kddcup99.py @@ -345,6 +345,9 @@ def _fetch_brute_kddcup99(subset=None, data_home=None, joblib.dump(X, samples_path, compress=0) joblib.dump(y, targets_path, compress=0) + elif not available: + if not download_if_missing: + raise IOError("Data not found and `download_if_missing` is False") try: X, y diff --git a/sklearn/datasets/olivetti_faces.py b/sklearn/datasets/olivetti_faces.py index e74d65d60e18de6e971a50fa36823b8565434c4a..5f3af040dc1a41ab0e1a0442b66b1dff7720129c 100644 --- a/sklearn/datasets/olivetti_faces.py +++ b/sklearn/datasets/olivetti_faces.py @@ -111,6 +111,9 @@ def fetch_olivetti_faces(data_home=None, shuffle=False, random_state=0, makedirs(data_home) filepath = _pkl_filepath(data_home, TARGET_FILENAME) if not exists(filepath): + if not download_if_missing: + raise IOError("Data not found and `download_if_missing` is False") + print('downloading Olivetti faces from %s to %s' % (DATA_URL, data_home)) fhandle = urlopen(DATA_URL) @@ -121,6 +124,7 @@ def fetch_olivetti_faces(data_home=None, shuffle=False, random_state=0, del mfile else: faces = joblib.load(filepath) + # We want floating point data, but float32 is enough (there is only # one byte of precision in the original uint8s anyway) faces = np.float32(faces) diff --git a/sklearn/datasets/species_distributions.py b/sklearn/datasets/species_distributions.py index 6af36e6745d33ea6dc4a287df40bbfe69f7807cb..330c535620b7d4e84df26ffe293865beb0b46036 100644 --- a/sklearn/datasets/species_distributions.py +++ b/sklearn/datasets/species_distributions.py @@ -222,6 +222,9 @@ def fetch_species_distributions(data_home=None, archive_path = _pkl_filepath(data_home, DATA_ARCHIVE_NAME) if not exists(archive_path): + if not download_if_missing: + raise IOError("Data not found and `download_if_missing` is False") + print('Downloading species data from %s to %s' % (SAMPLES_URL, data_home)) X = np.load(BytesIO(urlopen(SAMPLES_URL).read())) diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py index f32511d7c9aa85fd623213bd9c3553b1ed7f9d4d..c980bb86fc8700db7914160f496edfa1cba4bee0 100644 --- a/sklearn/datasets/tests/test_covtype.py +++ b/sklearn/datasets/tests/test_covtype.py @@ -3,7 +3,6 @@ Skipped if covtype is not already downloaded to data_home. """ -import errno from sklearn.datasets import fetch_covtype from sklearn.utils.testing import assert_equal, SkipTest @@ -15,9 +14,8 @@ def fetch(*args, **kwargs): def test_fetch(): try: data1 = fetch(shuffle=True, random_state=42) - except IOError as e: - if e.errno == errno.ENOENT: - raise SkipTest("Covertype dataset can not be loaded.") + except IOError: + raise SkipTest("Covertype dataset can not be loaded.") data2 = fetch(shuffle=True, random_state=37) diff --git a/sklearn/datasets/tests/test_kddcup99.py b/sklearn/datasets/tests/test_kddcup99.py index 414c89763c1e867b5c221314201c20d41ebeee26..498b98f4e67ed95e7b81d2d80d1578817e76628c 100644 --- a/sklearn/datasets/tests/test_kddcup99.py +++ b/sklearn/datasets/tests/test_kddcup99.py @@ -5,7 +5,6 @@ The test is skipped if the data wasn't previously fetched and saved to scikit-learn data folder. """ -import errno from sklearn.datasets import fetch_kddcup99 from sklearn.utils.testing import assert_equal, SkipTest @@ -13,9 +12,8 @@ from sklearn.utils.testing import assert_equal, SkipTest def test_percent10(): try: data = fetch_kddcup99(download_if_missing=False) - except IOError as e: - if e.errno == errno.ENOENT: - raise SkipTest("kddcup99 dataset can not be loaded.") + except IOError: + raise SkipTest("kddcup99 dataset can not be loaded.") assert_equal(data.data.shape, (494021, 41)) assert_equal(data.target.shape, (494021,))