From 63583fe658886cc5eb48a2ba9a541d5f6be7194b Mon Sep 17 00:00:00 2001
From: Ralf Gommers <ralf.gommers@gmail.com>
Date: Wed, 30 Nov 2016 06:43:32 +1300
Subject: [PATCH] BUG: for several datasets, ``download_if_missing`` keyword
 was ignored. (#7944)

---
 sklearn/datasets/california_housing.py    | 4 ++++
 sklearn/datasets/covtype.py               | 3 +++
 sklearn/datasets/kddcup99.py              | 3 +++
 sklearn/datasets/olivetti_faces.py        | 4 ++++
 sklearn/datasets/species_distributions.py | 3 +++
 sklearn/datasets/tests/test_covtype.py    | 6 ++----
 sklearn/datasets/tests/test_kddcup99.py   | 6 ++----
 7 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/sklearn/datasets/california_housing.py b/sklearn/datasets/california_housing.py
index c109fee618..8a74ad9e60 100644
--- a/sklearn/datasets/california_housing.py
+++ b/sklearn/datasets/california_housing.py
@@ -87,8 +87,12 @@ def fetch_california_housing(data_home=None, download_if_missing=True):
     data_home = get_data_home(data_home=data_home)
     if not exists(data_home):
         makedirs(data_home)
+
     filepath = _pkl_filepath(data_home, TARGET_FILENAME)
     if not exists(filepath):
+        if not download_if_missing:
+            raise IOError("Data not found and `download_if_missing` is False")
+
         print('downloading Cal. housing from %s to %s' % (DATA_URL, data_home))
         archive_fileobj = BytesIO(urlopen(DATA_URL).read())
         fileobj = tarfile.open(
diff --git a/sklearn/datasets/covtype.py b/sklearn/datasets/covtype.py
index f7cb1ed03f..6e0b4d2d0d 100644
--- a/sklearn/datasets/covtype.py
+++ b/sklearn/datasets/covtype.py
@@ -99,6 +99,9 @@ def fetch_covtype(data_home=None, download_if_missing=True,
 
         joblib.dump(X, samples_path, compress=9)
         joblib.dump(y, targets_path, compress=9)
+    elif not available:
+        if not download_if_missing:
+            raise IOError("Data not found and `download_if_missing` is False")
 
     try:
         X, y
diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py
index 824809a80e..03bf3f8d8f 100644
--- a/sklearn/datasets/kddcup99.py
+++ b/sklearn/datasets/kddcup99.py
@@ -345,6 +345,9 @@ def _fetch_brute_kddcup99(subset=None, data_home=None,
 
         joblib.dump(X, samples_path, compress=0)
         joblib.dump(y, targets_path, compress=0)
+    elif not available:
+        if not download_if_missing:
+            raise IOError("Data not found and `download_if_missing` is False")
 
     try:
         X, y
diff --git a/sklearn/datasets/olivetti_faces.py b/sklearn/datasets/olivetti_faces.py
index e74d65d60e..5f3af040dc 100644
--- a/sklearn/datasets/olivetti_faces.py
+++ b/sklearn/datasets/olivetti_faces.py
@@ -111,6 +111,9 @@ def fetch_olivetti_faces(data_home=None, shuffle=False, random_state=0,
         makedirs(data_home)
     filepath = _pkl_filepath(data_home, TARGET_FILENAME)
     if not exists(filepath):
+        if not download_if_missing:
+            raise IOError("Data not found and `download_if_missing` is False")
+
         print('downloading Olivetti faces from %s to %s'
               % (DATA_URL, data_home))
         fhandle = urlopen(DATA_URL)
@@ -121,6 +124,7 @@ def fetch_olivetti_faces(data_home=None, shuffle=False, random_state=0,
         del mfile
     else:
         faces = joblib.load(filepath)
+
     # We want floating point data, but float32 is enough (there is only
     # one byte of precision in the original uint8s anyway)
     faces = np.float32(faces)
diff --git a/sklearn/datasets/species_distributions.py b/sklearn/datasets/species_distributions.py
index 6af36e6745..330c535620 100644
--- a/sklearn/datasets/species_distributions.py
+++ b/sklearn/datasets/species_distributions.py
@@ -222,6 +222,9 @@ def fetch_species_distributions(data_home=None,
     archive_path = _pkl_filepath(data_home, DATA_ARCHIVE_NAME)
 
     if not exists(archive_path):
+        if not download_if_missing:
+            raise IOError("Data not found and `download_if_missing` is False")
+
         print('Downloading species data from %s to %s' % (SAMPLES_URL,
                                                           data_home))
         X = np.load(BytesIO(urlopen(SAMPLES_URL).read()))
diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py
index f32511d7c9..c980bb86fc 100644
--- a/sklearn/datasets/tests/test_covtype.py
+++ b/sklearn/datasets/tests/test_covtype.py
@@ -3,7 +3,6 @@
 Skipped if covtype is not already downloaded to data_home.
 """
 
-import errno
 from sklearn.datasets import fetch_covtype
 from sklearn.utils.testing import assert_equal, SkipTest
 
@@ -15,9 +14,8 @@ def fetch(*args, **kwargs):
 def test_fetch():
     try:
         data1 = fetch(shuffle=True, random_state=42)
-    except IOError as e:
-        if e.errno == errno.ENOENT:
-            raise SkipTest("Covertype dataset can not be loaded.")
+    except IOError:
+        raise SkipTest("Covertype dataset can not be loaded.")
 
     data2 = fetch(shuffle=True, random_state=37)
 
diff --git a/sklearn/datasets/tests/test_kddcup99.py b/sklearn/datasets/tests/test_kddcup99.py
index 414c89763c..498b98f4e6 100644
--- a/sklearn/datasets/tests/test_kddcup99.py
+++ b/sklearn/datasets/tests/test_kddcup99.py
@@ -5,7 +5,6 @@ The test is skipped if the data wasn't previously fetched and saved to
 scikit-learn data folder.
 """
 
-import errno
 from sklearn.datasets import fetch_kddcup99
 from sklearn.utils.testing import assert_equal, SkipTest
 
@@ -13,9 +12,8 @@ from sklearn.utils.testing import assert_equal, SkipTest
 def test_percent10():
     try:
         data = fetch_kddcup99(download_if_missing=False)
-    except IOError as e:
-        if e.errno == errno.ENOENT:
-            raise SkipTest("kddcup99 dataset can not be loaded.")
+    except IOError:
+        raise SkipTest("kddcup99 dataset can not be loaded.")
 
     assert_equal(data.data.shape, (494021, 41))
     assert_equal(data.target.shape, (494021,))
-- 
GitLab