From d0828deba072cc9d343f24c91bdd23c75d27ceda Mon Sep 17 00:00:00 2001 From: Olivier Grisel <olivier.grisel@ensta.org> Date: Mon, 1 Nov 2010 12:37:06 +0100 Subject: [PATCH] showcase the new classification report in the examples --- examples/mlcomp_document_classification.py | 7 ++++++- examples/mlcomp_sparse_document_classification.py | 7 ++++++- examples/sgd/mlcomp_sparse_document_classification_sgd.py | 7 +++++-- scikits/learn/datasets/mlcomp.py | 4 ++-- scikits/learn/metrics.py | 1 + 5 files changed, 20 insertions(+), 6 deletions(-) diff --git a/examples/mlcomp_document_classification.py b/examples/mlcomp_document_classification.py index bdb6d87fb8..e1936abd35 100644 --- a/examples/mlcomp_document_classification.py +++ b/examples/mlcomp_document_classification.py @@ -43,6 +43,7 @@ import pylab as pl from scikits.learn.datasets import load_mlcomp from scikits.learn.svm import LinearSVC from scikits.learn.metrics import confusion_matrix +from scikits.learn.metrics import classification_report if 'MLCOMP_DATASETS_HOME' not in os.environ: print "Please follow those instructions to get started:" @@ -86,7 +87,11 @@ print "Predicting the labels of the test set..." t0 = time() pred = clf.predict(news_test.data) print "done in %fs" % (time() - t0) -print "Classification accuracy: %f" % (np.mean(pred == news_test.target) * 100) + +print "Classification report on test set:" +print classification_report(news_test.target, pred, + class_names=news_test.target_names) + cm = confusion_matrix(news_test.target, pred) print "Confusion matrix:" diff --git a/examples/mlcomp_sparse_document_classification.py b/examples/mlcomp_sparse_document_classification.py index 639678cf29..77093fd9ab 100644 --- a/examples/mlcomp_sparse_document_classification.py +++ b/examples/mlcomp_sparse_document_classification.py @@ -45,6 +45,7 @@ import pylab as pl from scikits.learn.datasets import load_mlcomp from scikits.learn.svm.sparse import LinearSVC from scikits.learn.metrics import confusion_matrix +from scikits.learn.metrics import classification_report if 'MLCOMP_DATASETS_HOME' not in os.environ: print "Please follow those instructions to get started:" @@ -87,7 +88,11 @@ print "Predicting the labels of the test set..." t0 = time() pred = clf.predict(news_test.data) print "done in %fs" % (time() - t0) -print "Classification accuracy: %f" % (np.mean(pred == news_test.target) * 100) + +print "Classification report on test set:" +print classification_report(news_test.target, pred, + class_names=news_test.target_names) + cm = confusion_matrix(news_test.target, pred) print "Confusion matrix:" diff --git a/examples/sgd/mlcomp_sparse_document_classification_sgd.py b/examples/sgd/mlcomp_sparse_document_classification_sgd.py index d56af4fdf3..d1cdb281da 100644 --- a/examples/sgd/mlcomp_sparse_document_classification_sgd.py +++ b/examples/sgd/mlcomp_sparse_document_classification_sgd.py @@ -44,6 +44,7 @@ import numpy as np from scikits.learn.datasets import load_mlcomp from scikits.learn.metrics import confusion_matrix +from scikits.learn.metrics import classification_report # from scikits.learn.svm.sparse import LinearSVC from scikits.learn.sgd.sparse import SGD @@ -79,7 +80,6 @@ print "Training a linear SVM (hinge loss and L2 regularizer) using SGD.\n"\ "SGD(n_iter=50, alpha=0.00001, fit_intercept=True)" t0 = time() clf = SGD(n_iter=50, alpha=0.00001, fit_intercept=True) -#clf = LinearSVC(**parameters) clf.fit(data, target) print "done in %fs" % (time() - t0) print "Percentage of non zeros coef: %f" % (np.mean(clf.coef_ != 0) * 100) @@ -95,12 +95,15 @@ neg_idx = np.where(target == neg)[0] idx = np.concatenate((pos_idx, neg_idx)) data = news_test.data[idx] target = news_test.target[idx] +target_names = news_test.target_names[:2] print "Predicting the labels of the test set..." t0 = time() pred = clf.predict(data) print "done in %fs" % (time() - t0) -print "Classification accuracy: %f" % (np.mean(pred == target) * 100) +print "Classification report on test set:" +print classification_report(target, pred, class_names=target_names) + cm = confusion_matrix(target, pred) print "Confusion matrix:" diff --git a/scikits/learn/datasets/mlcomp.py b/scikits/learn/datasets/mlcomp.py index 232543b694..22b35dd224 100644 --- a/scikits/learn/datasets/mlcomp.py +++ b/scikits/learn/datasets/mlcomp.py @@ -12,7 +12,7 @@ from scikits.learn.feature_extraction.text import SparseHashingVectorizer def _load_document_classification(dataset_path, metadata, set_, sparse, **kw): """Loader implementation for the DocumentClassification format""" target = [] - target_names = {} + target_names = [] filenames = [] vectorizer = kw.get('vectorizer') if vectorizer is None: @@ -31,7 +31,7 @@ def _load_document_classification(dataset_path, metadata, set_, sparse, **kw): folders = [f for f in sorted(os.listdir(dataset_path)) if os.path.isdir(os.path.join(dataset_path, f))] for label, folder in enumerate(folders): - target_names[label] = folder + target_names.append(folder) folder_path = os.path.join(dataset_path, folder) documents = [os.path.join(folder_path, d) for d in sorted(os.listdir(folder_path))] diff --git a/scikits/learn/metrics.py b/scikits/learn/metrics.py index 8ef121d86b..c04582dac9 100644 --- a/scikits/learn/metrics.py +++ b/scikits/learn/metrics.py @@ -341,6 +341,7 @@ def classification_report(y_true, y_pred, labels=None, class_names=None): class_names = ['%d' % l for l in labels] else: width = max(len(cn) for cn in class_names) + width = max(width, len('mean')) headers = ["precision", "recall", "f1-score"] -- GitLab