diff --git a/scikits/learn/datasets/faithful/COPYING b/scikits/learn/datasets/faithful/COPYING new file mode 100644 index 0000000000000000000000000000000000000000..465e793f163956fa6218b782b8197e06677cdcdd --- /dev/null +++ b/scikits/learn/datasets/faithful/COPYING @@ -0,0 +1,34 @@ +# The code and descriptive text is copyrighted and offered under the terms of +# the BSD License from the authors; see below. However, the actual dataset may +# have a different origin and intellectual property status. See the SOURCE and +# COPYRIGHT variables for this information. + +# Copyright (c) 2007 David Cournapeau <cournape@gmail.com> +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the author nor the names of any contributors may be used +# to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/scikits/learn/datasets/faithful/README b/scikits/learn/datasets/faithful/README new file mode 100644 index 0000000000000000000000000000000000000000..de148e6d6beb179a1409047ae3c00a44cf49859f --- /dev/null +++ b/scikits/learn/datasets/faithful/README @@ -0,0 +1,6 @@ +Each OldfaithfulC*.txt is one column of the datasets as presented in Azzalini +and Bowman. The Oldfaithful.txt is simply a cat of all thos files: this is just +to make checking easier. The data in the txt are *exactly* the same than the +ones in Azzalini and Bowman: again, post processing them in python is easy +(converting the time in seconds, etc...), and having exactly the data of the +reference makes it easier to check. diff --git a/scikits/learn/datasets/faithful/__init__.py b/scikits/learn/datasets/faithful/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..429a66aac7307dec9e1f926dd89476ec98e4a8a9 --- /dev/null +++ b/scikits/learn/datasets/faithful/__init__.py @@ -0,0 +1,9 @@ +#! /usr/bin/env python +# Last Change: Fri Jun 08 12:00 PM 2007 J +import data as _faith +__doc__ = _faith.DESCRSHORT +copyright = _faith.COPYRIGHT +source = _faith.SOURCE + +load = _faith.load +load_data = _faith.load_data diff --git a/scikits/learn/datasets/faithful/data.py b/scikits/learn/datasets/faithful/data.py new file mode 100644 index 0000000000000000000000000000000000000000..77aed611eb3b8f8ac6120083d7f747e2acba595f --- /dev/null +++ b/scikits/learn/datasets/faithful/data.py @@ -0,0 +1,50 @@ +# encoding: utf-8 +"""Old faithful dataset.""" + +__docformat__ = 'restructuredtext' + +COPYRIGHT = """See SOURCE. Pr Azzalini has explicitely given his consent for +the use of those data in scipy.""" +TITLE = "Old Faithful Geyser Data" +SOURCE = """AZZALINI A., BOWMAN A. W. (1990). A look at some data on the +Old Faithful Geyser. Applied Statistics (Journal of the Royal Statistical +Society series C), vol. 39, pp. 357-365. Data collected by the Yellowstone Park +geologist, R. A. Hutchinson. + +References: + - Härdle, W. (1991) Smoothing Techniques with Implementation in S. New + York: Springer. + - Azzalini, A. and Bowman, A. W. (1990). A look at some data on the Old +Faithful geyser. Applied Statistics, 39, 357--365. + +Those data are exactly the ones from Azzalini and Bowman's article.""" + +DESCRSHORT = """Waiting time between eruptions and the duration of the +eruption for the Old Faithful geyser in Yellowstone National Park, Wyoming, +USA. Waiting times and duration time are in seconds""" + +DESCRLONG = """According to Azzalini and Bowman's article, those data +were recorded continuously from 1th August to 15th August 1985. + +Some of the durations times are labelled as L, M or S (Large, Small, Medium). +According to Azzalini and Bowman's paper: "because the unbroken sequence +required measurements to be taken at night, some duration times are recorded as +L (long), S (short) and M (medium). Other data sets do not contain a con- +tinuous stream of data, making it difficult to deal with time series features." +""" + +NOTE = """Eruptions time in minutes, waiting time to next eruption in +minutes""" + +import numpy as np + + +def load(): + """load the actual data and returns them. + + :returns: + data: recordarray + a record array of the data. + """ + from faithful import waiting, duration + return {'data': np.array(zip(waiting, duration))} diff --git a/scikits/learn/datasets/faithful/faithful.py b/scikits/learn/datasets/faithful/faithful.py new file mode 100644 index 0000000000000000000000000000000000000000..317cb8cd288107e0d30abc50ff4b57131581946e --- /dev/null +++ b/scikits/learn/datasets/faithful/faithful.py @@ -0,0 +1,42 @@ +duration = [3.6, 1.8, 3.333, 2.283, 4.533, 2.883, 4.7, +3.6, 1.95, 4.35, 1.833, 3.917, 4.2, 1.75, 4.7, 2.167, 1.75, 4.8, 1.6, 4.25, +1.8, 1.75, 3.45, 3.067, 4.533, 3.6, 1.967, 4.083, 3.85, 4.433, 4.3, 4.467, +3.367, 4.033, 3.833, 2.017, 1.867, 4.833, 1.833, 4.783, 4.35, 1.883, 4.567, +1.75, 4.533, 3.317, 3.833, 2.1, 4.633, 2, 4.8, 4.716, 1.833, 4.833, 1.733, +4.883, 3.717, 1.667, 4.567, 4.317, 2.233, 4.5, 1.75, 4.8, 1.817, 4.4, 4.167, +4.7, 2.067, 4.7, 4.033, 1.967, 4.5, 4, 1.983, 5.067, 2.017, 4.567, 3.883, +3.6, 4.133, 4.333, 4.1, 2.633, 4.067, 4.933, 3.95, 4.517, 2.167, 4, 2.2, +4.333, 1.867, 4.817, 1.833, 4.3, 4.667, 3.75, 1.867, 4.9, 2.483, 4.367, +2.1, 4.5, 4.05, 1.867, 4.7, 1.783, 4.85, 3.683, 4.733, 2.3, 4.9, 4.417, +1.7, 4.633, 2.317, 4.6, 1.817, 4.417, 2.617, 4.067, 4.25, 1.967, 4.6, 3.767, +1.917, 4.5, 2.267, 4.65, 1.867, 4.167, 2.8, 4.333, 1.833, 4.383, 1.883, +4.933, 2.033, 3.733, 4.233, 2.233, 4.533, 4.817, 4.333, 1.983, 4.633, 2.017, +5.1, 1.8, 5.033, 4, 2.4, 4.6, 3.567, 4, 4.5, 4.083, 1.8, 3.967, 2.2, 4.15, +2, 3.833, 3.5, 4.583, 2.367, 5, 1.933, 4.617, 1.917, 2.083, 4.583, 3.333, +4.167, 4.333, 4.5, 2.417, 4, 4.167, 1.883, 4.583, 4.25, 3.767, 2.033, 4.433, +4.083, 1.833, 4.417, 2.183, 4.8, 1.833, 4.8, 4.1, 3.966, 4.233, 3.5, 4.366, +2.25, 4.667, 2.1, 4.35, 4.133, 1.867, 4.6, 1.783, 4.367, 3.85, 1.933, 4.5, +2.383, 4.7, 1.867, 3.833, 3.417, 4.233, 2.4, 4.8, 2, 4.15, 1.867, 4.267, +1.75, 4.483, 4, 4.117, 4.083, 4.267, 3.917, 4.55, 4.083, 2.417, 4.183, +2.217, 4.45, 1.883, 1.85, 4.283, 3.95, 2.333, 4.15, 2.35, 4.933, 2.9, 4.583, +3.833, 2.083, 4.367, 2.133, 4.35, 2.2, 4.45, 3.567, 4.5, 4.15, 3.817, 3.917, +4.45, 2, 4.283, 4.767, 4.533, 1.85, 4.25, 1.983, 2.25, 4.75, 4.117, 2.15, +4.417, 1.817, 4.467] + +waiting = [79, 54, 74, 62, 85, 55, 88, 85, 51, 85, +54, 84, 78, 47, 83, 52, 62, 84, 52, 79, 51, 47, 78, 69, 74, 83, 55, 76, +78, 79, 73, 77, 66, 80, 74, 52, 48, 80, 59, 90, 80, 58, 84, 58, 73, 83, +64, 53, 82, 59, 75, 90, 54, 80, 54, 83, 71, 64, 77, 81, 59, 84, 48, 82, +60, 92, 78, 78, 65, 73, 82, 56, 79, 71, 62, 76, 60, 78, 76, 83, 75, 82, +70, 65, 73, 88, 76, 80, 48, 86, 60, 90, 50, 78, 63, 72, 84, 75, 51, 82, +62, 88, 49, 83, 81, 47, 84, 52, 86, 81, 75, 59, 89, 79, 59, 81, 50, 85, +59, 87, 53, 69, 77, 56, 88, 81, 45, 82, 55, 90, 45, 83, 56, 89, 46, 82, +51, 86, 53, 79, 81, 60, 82, 77, 76, 59, 80, 49, 96, 53, 77, 77, 65, 81, +71, 70, 81, 93, 53, 89, 45, 86, 58, 78, 66, 76, 63, 88, 52, 93, 49, 57, +77, 68, 81, 81, 73, 50, 85, 74, 55, 77, 83, 83, 51, 78, 84, 46, 83, 55, +81, 57, 76, 84, 77, 81, 87, 77, 51, 78, 60, 82, 91, 53, 78, 46, 77, 84, +49, 83, 71, 80, 49, 75, 64, 76, 53, 94, 55, 76, 50, 82, 54, 75, 78, 79, +78, 78, 70, 79, 70, 54, 86, 50, 90, 54, 54, 77, 79, 64, 75, 47, 86, 63, +85, 82, 57, 82, 67, 74, 54, 83, 73, 73, 88, 80, 71, 83, 56, 79, 78, 84, +58, 83, 43, 60, 75, 81, 46, 90, 46, 74] +