From 5792b54bc6cd90b698d29116da7948db53adfe8d Mon Sep 17 00:00:00 2001 From: Fabian Pedregosa <fabian.pedregosa@inria.fr> Date: Wed, 6 Jan 2010 08:46:59 +0000 Subject: [PATCH] Set correct name for datasets packages (data->datasets) in the setup, and trivial changes in datasets info message From: cdavid <cdavid@cb17146a-f446-4be1-a4f7-bd7c5bb65646> git-svn-id: https://scikit-learn.svn.sourceforge.net/svnroot/scikit-learn/trunk@220 22fbfee3-77ab-4535-9bad-27d1bd3bc7d8 --- scikits/learn/datasets/setup.py | 2 +- scikits/learn/utils/attrselect.py | 30 +++++++++++++++++++++--------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/scikits/learn/datasets/setup.py b/scikits/learn/datasets/setup.py index b8e587a6fc..4d18e8e113 100644 --- a/scikits/learn/datasets/setup.py +++ b/scikits/learn/datasets/setup.py @@ -2,7 +2,7 @@ def configuration(parent_package='',top_path=None): from numpy.distutils.misc_util import Configuration - config = Configuration('data',parent_package,top_path) + config = Configuration('datasets',parent_package,top_path) config.add_subpackage('oldfaithful') config.add_subpackage('pendigits') config.add_subpackage('iris') diff --git a/scikits/learn/utils/attrselect.py b/scikits/learn/utils/attrselect.py index 39eb9b62f2..b5526ee6a1 100644 --- a/scikits/learn/utils/attrselect.py +++ b/scikits/learn/utils/attrselect.py @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Sat Jul 21 07:00 PM 2007 J +# Last Change: Sat Jul 21 09:00 PM 2007 J """This module implements function to extract attributes and/or classes from datasets.""" @@ -8,7 +8,7 @@ import numpy as N def print_dataset_info(data, label = None, cl = None): # Attributes info attr = get_attributes(data) - msg = "data Iris has: \n" + "\t%d attributes: " % len(attr) + msg = "data has: \n" + "\t%d attributes: " % len(attr) if len(attr) > 0: msg += ", ".join([i for i in attr[:-1]]) msg += " and " + attr[-1] @@ -20,18 +20,22 @@ def print_dataset_info(data, label = None, cl = None): if len(cl) > 0: msg += "\t%d classes: " % len(cl) msg += ", ".join([i for i in cl]) - else: - msg += "\tNo classes" + else: + msg += "\tNo classes" - msg += '\n' - # Number of samples - ns = len(data) - msg += "\t%d samples in the dataset:\n" % ns + msg += '\n' + + # Number of samples + ns = len(data) + if label is not None: if cl is not None and len(cl) > 0: + msg += "\t%d samples in the dataset:\n" % ns c2ind = get_c2ind(cl, label) msg += "".join(["\t\t%d samples for class %s\n" \ % (len(c2ind[cname]), cname) \ for cname in cl]) + else: + msg += "\t%d samples in the dataset\n" % ns print msg @@ -89,7 +93,7 @@ def get_c2ind(cl, label): return c2ind if __name__ == '__main__': - from scikits.learn.datasets import iris, german + from scikits.learn.datasets import iris, german, pendigits, oldfaithful d = iris.load() data, lab, cl = d['data'], d['label'], d['class'] print_dataset_info(data, lab, cl) @@ -97,3 +101,11 @@ if __name__ == '__main__': d = german.load() data, lab, cl = d['data'], d['label'], d['class'] print_dataset_info(data, lab, cl) + + d = oldfaithful.load() + data = d['data'] + print_dataset_info(data) + + d = pendigits.load() + data, lab, cl = d['data'], d['label'], d['class'] + print_dataset_info(data, lab, cl) -- GitLab