diff --git a/scikits/learn/datasets/setup.py b/scikits/learn/datasets/setup.py index b8e587a6fc50ece88b386dd0423dca11f14bf062..4d18e8e113b0cb431741e8dd756db1781e2663c5 100644 --- a/scikits/learn/datasets/setup.py +++ b/scikits/learn/datasets/setup.py @@ -2,7 +2,7 @@ def configuration(parent_package='',top_path=None): from numpy.distutils.misc_util import Configuration - config = Configuration('data',parent_package,top_path) + config = Configuration('datasets',parent_package,top_path) config.add_subpackage('oldfaithful') config.add_subpackage('pendigits') config.add_subpackage('iris') diff --git a/scikits/learn/utils/attrselect.py b/scikits/learn/utils/attrselect.py index 39eb9b62f2f8b208f2f7c744aeca9a5c113285fa..b5526ee6a1866f77609cb0ed46db5d16df32993c 100644 --- a/scikits/learn/utils/attrselect.py +++ b/scikits/learn/utils/attrselect.py @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Sat Jul 21 07:00 PM 2007 J +# Last Change: Sat Jul 21 09:00 PM 2007 J """This module implements function to extract attributes and/or classes from datasets.""" @@ -8,7 +8,7 @@ import numpy as N def print_dataset_info(data, label = None, cl = None): # Attributes info attr = get_attributes(data) - msg = "data Iris has: \n" + "\t%d attributes: " % len(attr) + msg = "data has: \n" + "\t%d attributes: " % len(attr) if len(attr) > 0: msg += ", ".join([i for i in attr[:-1]]) msg += " and " + attr[-1] @@ -20,18 +20,22 @@ def print_dataset_info(data, label = None, cl = None): if len(cl) > 0: msg += "\t%d classes: " % len(cl) msg += ", ".join([i for i in cl]) - else: - msg += "\tNo classes" + else: + msg += "\tNo classes" - msg += '\n' - # Number of samples - ns = len(data) - msg += "\t%d samples in the dataset:\n" % ns + msg += '\n' + + # Number of samples + ns = len(data) + if label is not None: if cl is not None and len(cl) > 0: + msg += "\t%d samples in the dataset:\n" % ns c2ind = get_c2ind(cl, label) msg += "".join(["\t\t%d samples for class %s\n" \ % (len(c2ind[cname]), cname) \ for cname in cl]) + else: + msg += "\t%d samples in the dataset\n" % ns print msg @@ -89,7 +93,7 @@ def get_c2ind(cl, label): return c2ind if __name__ == '__main__': - from scikits.learn.datasets import iris, german + from scikits.learn.datasets import iris, german, pendigits, oldfaithful d = iris.load() data, lab, cl = d['data'], d['label'], d['class'] print_dataset_info(data, lab, cl) @@ -97,3 +101,11 @@ if __name__ == '__main__': d = german.load() data, lab, cl = d['data'], d['label'], d['class'] print_dataset_info(data, lab, cl) + + d = oldfaithful.load() + data = d['data'] + print_dataset_info(data) + + d = pendigits.load() + data, lab, cl = d['data'], d['label'], d['class'] + print_dataset_info(data, lab, cl)