diff --git a/scikits/learn/datasets/faithful/__init__.py b/scikits/learn/datasets/faithful/__init__.py index 429a66aac7307dec9e1f926dd89476ec98e4a8a9..58177e96c0218c012b0a792406300fe1dcbc7a1a 100644 --- a/scikits/learn/datasets/faithful/__init__.py +++ b/scikits/learn/datasets/faithful/__init__.py @@ -6,4 +6,3 @@ copyright = _faith.COPYRIGHT source = _faith.SOURCE load = _faith.load -load_data = _faith.load_data diff --git a/scikits/learn/datasets/faithful/data.py b/scikits/learn/datasets/faithful/data.py index 77aed611eb3b8f8ac6120083d7f747e2acba595f..4da277cf23362eb9c4947630e6b77f1aff96d8cf 100644 --- a/scikits/learn/datasets/faithful/data.py +++ b/scikits/learn/datasets/faithful/data.py @@ -27,24 +27,27 @@ DESCRLONG = """According to Azzalini and Bowman's article, those data were recorded continuously from 1th August to 15th August 1985. Some of the durations times are labelled as L, M or S (Large, Small, Medium). -According to Azzalini and Bowman's paper: "because the unbroken sequence +According to Azzalini and Bowman's paper: 'because the unbroken sequence required measurements to be taken at night, some duration times are recorded as L (long), S (short) and M (medium). Other data sets do not contain a con- -tinuous stream of data, making it difficult to deal with time series features." +tinuous stream of data, making it difficult to deal with time series features' """ -NOTE = """Eruptions time in minutes, waiting time to next eruption in +NOTE = """Eruptions time in minutes, waiting time to next eruption in minutes""" import numpy as np - def load(): """load the actual data and returns them. :returns: - data: recordarray - a record array of the data. + waiting: array + waiting time until next eruption, in minutes + duration: array + duration of eruption, in minutes """ from faithful import waiting, duration - return {'data': np.array(zip(waiting, duration))} + waiting = np.array(waiting, np.float) + duration = np.array(duration, np.float) + return waiting, duration diff --git a/scikits/learn/datasets/iris/data.py b/scikits/learn/datasets/iris/data.py index ce470280edd25192eaa771ee912af7ba2434895c..444a9ecbc7b7c6a434bf961c6362b0875049b382 100644 --- a/scikits/learn/datasets/iris/data.py +++ b/scikits/learn/datasets/iris/data.py @@ -94,6 +94,9 @@ Missing Attribute Values: None Class Distribution: 33.3% for each of 3 classes. """ +import numpy as np + + def load(): """load the iris data and returns them. @@ -111,32 +114,15 @@ def load(): know their class name. >>>> d = load() - >>>> ind = [10, 25, 50] - >>>> lind = d['label'][ind] # returns the label index of each sample - >>>> d['class'][lind] # returns the class name of each sample + # >>>> ind = [10, 25, 50] + # >>>> lind = d['label'][ind] # returns the label index of each sample + # >>>> d['class'][lind] # returns the class name of each sample """ - import numpy - from iris import SL, SW, PL, PW, LABELS, LI2LN - PW = numpy.array(PW).astype(numpy.float) - PL = numpy.array(PL).astype(numpy.float) - SW = numpy.array(SW).astype(numpy.float) - SL = numpy.array(SL).astype(numpy.float) - data = {} - data['data'] = numpy.empty(len(PW), - [('petal width', numpy.int), - ('petal length', numpy.int), - ('sepal width', numpy.int), - ('sepal length', numpy.int)]) - - data['data']['petal width'] = numpy.round(PW * 10) - data['data']['petal length'] = numpy.round(PL * 10) - data['data']['sepal width'] = numpy.round(SW * 10) - data['data']['sepal length'] = numpy.round(SL * 10) - data['label'] = numpy.array(LABELS).astype(numpy.int) - data['class'] = numpy.empty(len(LI2LN), - 'S%d' % numpy.max([len(i) for i in LI2LN.values()])) - for i,c in LI2LN.items(): - data['class'][i] = c - - return data + from iris import SL, SW, PL, PW, labels, LI2LN + PW = np.array(PW, dtype=np.float) + PL = np.array(PL, dtype=np.float) + SW = np.array(SW, dtype=np.float) + SL = np.array(SL, dtype=np.float) + labels = np.array(labels, dtype=np.int) + return PW, PL, SW, SL, labels diff --git a/scikits/learn/datasets/iris/iris.py b/scikits/learn/datasets/iris/iris.py index 94da7409f794167b518d88c8269b1feb4f764689..13ff411eac62740b94583fdc3c0ec517295ec6e3 100644 --- a/scikits/learn/datasets/iris/iris.py +++ b/scikits/learn/datasets/iris/iris.py @@ -60,7 +60,7 @@ PW = ['0.2', '0.2', '0.2', '0.2', '0.2', '0.4', '0.3', '0.2', '0.2', '0.1', '2.0', '2.2', '1.5', '1.4', '2.3', '2.4', '1.8', '1.8', '2.1', '2.4', '2.3', '1.9', '2.3', '2.5', '2.3', '1.9', '2.0', '2.3', '1.8'] -LABELS = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +labels = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,