diff --git a/doc/whats_new.rst b/doc/whats_new.rst index a4154de9bd27ff2cddd22253291babb9b3ce7712..eef97a178e7bb7d5ee407297019742b3462c5d57 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -230,11 +230,16 @@ Enhancements (`#6846 <https://github.com/scikit-learn/scikit-learn/pull/6846>`_) By `Sebastian Säger`_ and `YenChen Lin`_. - - Added new return type ``(data, target)`` : tuple option to - :func:`load_iris` dataset, - (`#7049 <https://github.com/scikit-learn/scikit-learn/pull/7049>`_) + - Added parameter ``return_X_y`` and return type ``(data, target) : tuple`` option to + :func:`load_iris` dataset + `#7049 <https://github.com/scikit-learn/scikit-learn/pull/7049>`_, :func:`load_breast_cancer` dataset - (`#7152 <https://github.com/scikit-learn/scikit-learn/pull/7152>`_) by + `#7152 <https://github.com/scikit-learn/scikit-learn/pull/7152>`_, + :func:`load_digits` dataset, + :func:`load_diabetes` dataset, + :func:`load_linnerud` dataset, + :func:`load_boston` dataset + `#7154 <https://github.com/scikit-learn/scikit-learn/pull/7154>`_ by `Manvendra Singh`_. Bug fixes diff --git a/sklearn/datasets/base.py b/sklearn/datasets/base.py index 3d6107268b1321e67c3000a37b5d63cb65de9afb..b5c7b4cd4fe7e96cc132ef33b36ba5257950b803 100644 --- a/sklearn/datasets/base.py +++ b/sklearn/datasets/base.py @@ -264,7 +264,7 @@ def load_iris(return_X_y=False): If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. - .. versionadded:: 0.18 + .. versionadded:: 0.18 Returns ------- @@ -277,7 +277,7 @@ def load_iris(return_X_y=False): (data, target) : tuple if ``return_X_y`` is True - .. versionadded:: 0.18 + .. versionadded:: 0.18 Examples -------- @@ -338,7 +338,7 @@ def load_breast_cancer(return_X_y=False): If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. - .. versionadded:: 0.18 + .. versionadded:: 0.18 Returns ------- @@ -351,7 +351,7 @@ def load_breast_cancer(return_X_y=False): (data, target) : tuple if ``return_X_y`` is True - .. versionadded:: 0.18 + .. versionadded:: 0.18 The copy of UCI ML Breast Cancer Wisconsin (Diagnostic) dataset is downloaded from: @@ -411,7 +411,7 @@ def load_breast_cancer(return_X_y=False): feature_names=feature_names) -def load_digits(n_class=10): +def load_digits(n_class=10, return_X_y=False): """Load and return the digits dataset (classification). Each datapoint is a 8x8 image of a digit. @@ -431,6 +431,12 @@ def load_digits(n_class=10): n_class : integer, between 0 and 10, optional (default=10) The number of classes to return. + return_X_y : boolean, default=False. + If True, returns ``(data, target)`` instead of a Bunch object. + See below for more information about the `data` and `target` object. + + .. versionadded:: 0.18 + Returns ------- data : Bunch @@ -440,6 +446,10 @@ def load_digits(n_class=10): sample, 'target_names', the meaning of the labels, and 'DESCR', the full description of the dataset. + (data, target) : tuple if ``return_X_y`` is True + + .. versionadded:: 0.18 + Examples -------- To load the data and visualize the images:: @@ -458,7 +468,7 @@ def load_digits(n_class=10): delimiter=',') with open(join(module_path, 'descr', 'digits.rst')) as f: descr = f.read() - target = data[:, -1] + target = data[:, -1].astype(np.int) flat_data = data[:, :-1] images = flat_data.view() images.shape = (-1, 8, 8) @@ -468,14 +478,17 @@ def load_digits(n_class=10): flat_data, target = flat_data[idx], target[idx] images = images[idx] + if return_X_y: + return flat_data, target + return Bunch(data=flat_data, - target=target.astype(np.int), + target=target, target_names=np.arange(10), images=images, DESCR=descr) -def load_diabetes(): +def load_diabetes(return_X_y=False): """Load and return the diabetes dataset (regression). ============== ================== @@ -487,20 +500,36 @@ def load_diabetes(): Read more in the :ref:`User Guide <datasets>`. + Parameters + ---------- + return_X_y : boolean, default=False. + If True, returns ``(data, target)`` instead of a Bunch object. + See below for more information about the `data` and `target` object. + + .. versionadded:: 0.18 + Returns ------- data : Bunch Dictionary-like object, the interesting attributes are: 'data', the data to learn and 'target', the regression target for each sample. + + (data, target) : tuple if ``return_X_y`` is True + + .. versionadded:: 0.18 """ base_dir = join(dirname(__file__), 'data') data = np.loadtxt(join(base_dir, 'diabetes_data.csv.gz')) target = np.loadtxt(join(base_dir, 'diabetes_target.csv.gz')) + + if return_X_y: + return data, target + return Bunch(data=data, target=target) -def load_linnerud(): +def load_linnerud(return_X_y=False): """Load and return the linnerud dataset (multivariate regression). Samples total: 20 @@ -508,6 +537,14 @@ def load_linnerud(): Features: integer Targets: integer + Parameters + ---------- + return_X_y : boolean, default=False. + If True, returns ``(data, target)`` instead of a Bunch object. + See below for more information about the `data` and `target` object. + + .. versionadded:: 0.18 + Returns ------- data : Bunch @@ -515,6 +552,10 @@ def load_linnerud(): 'targets', the two multivariate datasets, with 'data' corresponding to the exercise and 'targets' corresponding to the physiological measurements, as well as 'feature_names' and 'target_names'. + + (data, target) : tuple if ``return_X_y`` is True + + .. versionadded:: 0.18 """ base_dir = join(dirname(__file__), 'data/') # Read data @@ -529,13 +570,16 @@ def load_linnerud(): with open(dirname(__file__) + '/descr/linnerud.rst') as f: descr = f.read() + if return_X_y: + return data_exercise, data_physiological + return Bunch(data=data_exercise, feature_names=header_exercise, target=data_physiological, target_names=header_physiological, DESCR=descr) -def load_boston(): +def load_boston(return_X_y=False): """Load and return the boston house-prices dataset (regression). ============== ============== @@ -545,6 +589,14 @@ def load_boston(): Targets real 5. - 50. ============== ============== + Parameters + ---------- + return_X_y : boolean, default=False. + If True, returns ``(data, target)`` instead of a Bunch object. + See below for more information about the `data` and `target` object. + + .. versionadded:: 0.18 + Returns ------- data : Bunch @@ -552,6 +604,10 @@ def load_boston(): 'data', the data to learn, 'target', the regression targets, and 'DESCR', the full description of the dataset. + (data, target) : tuple if ``return_X_y`` is True + + .. versionadded:: 0.18 + Examples -------- >>> from sklearn.datasets import load_boston @@ -580,6 +636,9 @@ def load_boston(): data[i] = np.asarray(d[:-1], dtype=np.float64) target[i] = np.asarray(d[-1], dtype=np.float64) + if return_X_y: + return data, target + return Bunch(data=data, target=target, # last column is target value diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py index 6be9b399e122eb3c99219f9a47cf89e8ce14c8f9..523d5fcd8c46bed077a9ac6e87afe4bd7dd7db16 100644 --- a/sklearn/datasets/tests/test_base.py +++ b/sklearn/datasets/tests/test_base.py @@ -128,6 +128,13 @@ def test_load_digits(): assert_equal(digits.data.shape, (1797, 64)) assert_equal(numpy.unique(digits.target).size, 10) + # test return_X_y option + X_y_tuple = load_digits(return_X_y=True) + bunch = load_digits() + assert_true(isinstance(X_y_tuple, tuple)) + assert_array_equal(X_y_tuple[0], bunch.data) + assert_array_equal(X_y_tuple[1], bunch.target) + def test_load_digits_n_class_lt_10(): digits = load_digits(9) @@ -165,6 +172,13 @@ def test_load_diabetes(): assert_equal(res.data.shape, (442, 10)) assert_true(res.target.size, 442) + # test return_X_y option + X_y_tuple = load_diabetes(return_X_y=True) + bunch = load_diabetes() + assert_true(isinstance(X_y_tuple, tuple)) + assert_array_equal(X_y_tuple[0], bunch.data) + assert_array_equal(X_y_tuple[1], bunch.target) + def test_load_linnerud(): res = load_linnerud() @@ -173,6 +187,12 @@ def test_load_linnerud(): assert_equal(len(res.target_names), 3) assert_true(res.DESCR) + # test return_X_y option + X_y_tuple = load_linnerud(return_X_y=True) + bunch = load_linnerud() + assert_true(isinstance(X_y_tuple, tuple)) + assert_array_equal(X_y_tuple[0], bunch.data) + assert_array_equal(X_y_tuple[1], bunch.target) def test_load_iris(): res = load_iris() @@ -211,6 +231,12 @@ def test_load_boston(): assert_equal(res.feature_names.size, 13) assert_true(res.DESCR) + # test return_X_y option + X_y_tuple = load_boston(return_X_y=True) + bunch = load_boston() + assert_true(isinstance(X_y_tuple, tuple)) + assert_array_equal(X_y_tuple[0], bunch.data) + assert_array_equal(X_y_tuple[1], bunch.target) def test_loads_dumps_bunch(): bunch = Bunch(x="x")