diff --git a/scikits/learn/datasets/base.py b/scikits/learn/datasets/base.py index 13266b13f318ce4db3c60a76c8a292c9e9c84be0..9447a4581735e2629c7091a2e29ea5cc26d12146 100644 --- a/scikits/learn/datasets/base.py +++ b/scikits/learn/datasets/base.py @@ -23,7 +23,6 @@ import numpy as np from ..utils import check_random_state -############################################################################### class Bunch(dict): """ Container object for datasets: dictionnary-like object that @@ -114,7 +113,7 @@ def load_files(container_path, description=None, categories=None, load_content : boolean Whether to load or not the content of the different files. If - true a 'data' attribute containing the text information is present + true a 'data' attribute containing the text information is present in the data structure returned. If not, a filenames attribute gives the path to the files. @@ -185,7 +184,7 @@ def load_iris(): Returns ------- data : Bunch - Dictionnary-like object, the interesting attributes are: + Dictionary-like object, the interesting attributes are: 'data', the data to learn, 'target', the classification labels, 'target_names', the meaning of the labels, and 'DESCR', the full description of the dataset. @@ -233,7 +232,7 @@ def load_digits(n_class=10): Returns ------- data : Bunch - Dictionnary-like object, the interesting attributes are: + Dictionary-like object, the interesting attributes are: 'data', the data to learn, `images`, the images corresponding to each sample, 'target', the classification labels for each sample, 'target_names', the meaning of the labels, and 'DESCR', @@ -277,7 +276,7 @@ def load_diabetes(): Returns ------- data : Bunch - Dictionnary-like object, the interesting attributes are: + Dictionary-like object, the interesting attributes are: 'data', the data to learn and 'target', the labels for each sample. @@ -295,7 +294,7 @@ def load_linnerud(): Returns ------- data : Bunch - Dictionnary-like object, the interesting attributes are: + Dictionary-like object, the interesting attributes are: 'data_exercise' and 'data_physiological', the two multivariate datasets, as well as 'header_exercise' and 'header_physiological', the corresponding headers. @@ -319,13 +318,14 @@ def load_linnerud(): header_physiological=header_physiological, DESCR=fdescr.read()) + def load_boston(): - """load the boston house prices dataset and returns it. + """Load the Boston house prices dataset and return it. Returns ------- data : Bunch - Dictionnary-like object, the interesting attributes are: + Dictionary-like object, the interesting attributes are: 'data', the data to learn, 'target', the classification labels, 'target_names', the meaning of the labels, and 'DESCR', the full description of the dataset. @@ -335,21 +335,23 @@ def load_boston(): >>> from scikits.learn.datasets import load_boston >>> data = load_boston() - """ + """ + module_path = dirname(__file__) - data_file = csv.reader(open(join(module_path, 'data', 'boston_house_prices.csv'))) + data_file = csv.reader(open(join(module_path, 'data', + 'boston_house_prices.csv'))) fdescr = open(join(module_path, 'descr', 'boston_house_prices.rst')) temp = data_file.next() n_samples = int(temp[0]) n_features = int(temp[1]) data = np.empty((n_samples, n_features)) - target = np.empty((n_samples,) ) - temp = data_file.next() # names of features - feature_names = np.array(temp) + target = np.empty((n_samples,)) + temp = data_file.next() # names of features + feature_names = np.array(temp) for i, d in enumerate(data_file): data[i] = np.asanyarray(d[:-1], dtype=np.float) - target[i] = np.asanyarray(d[-1], dtype=np.float) - + target[i] = np.asanyarray(d[-1], dtype=np.float) + return Bunch(data=data, target=target, feature_names=feature_names, DESCR=fdescr.read()) diff --git a/scikits/learn/datasets/descr/boston_house_prices.rst b/scikits/learn/datasets/descr/boston_house_prices.rst index b6bc8ede0e438aeaaf15629621bf58fed4e30dc8..c0c8b29c551980552f0e73d6caa831c3eab96866 100644 --- a/scikits/learn/datasets/descr/boston_house_prices.rst +++ b/scikits/learn/datasets/descr/boston_house_prices.rst @@ -36,4 +36,4 @@ Attribute Information: MEDV Median value of owner-occupied homes in $1000's Summary Statistics: - TODO \ No newline at end of file + TODO