From d0828deba072cc9d343f24c91bdd23c75d27ceda Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 1 Nov 2010 12:37:06 +0100
Subject: [PATCH] showcase the new classification report in the examples

---
 examples/mlcomp_document_classification.py                | 7 ++++++-
 examples/mlcomp_sparse_document_classification.py         | 7 ++++++-
 examples/sgd/mlcomp_sparse_document_classification_sgd.py | 7 +++++--
 scikits/learn/datasets/mlcomp.py                          | 4 ++--
 scikits/learn/metrics.py                                  | 1 +
 5 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/examples/mlcomp_document_classification.py b/examples/mlcomp_document_classification.py
index bdb6d87fb8..e1936abd35 100644
--- a/examples/mlcomp_document_classification.py
+++ b/examples/mlcomp_document_classification.py
@@ -43,6 +43,7 @@ import pylab as pl
 from scikits.learn.datasets import load_mlcomp
 from scikits.learn.svm import LinearSVC
 from scikits.learn.metrics import confusion_matrix
+from scikits.learn.metrics import classification_report
 
 if 'MLCOMP_DATASETS_HOME' not in os.environ:
     print "Please follow those instructions to get started:"
@@ -86,7 +87,11 @@ print "Predicting the labels of the test set..."
 t0 = time()
 pred = clf.predict(news_test.data)
 print "done in %fs" % (time() - t0)
-print "Classification accuracy: %f" % (np.mean(pred == news_test.target) * 100)
+
+print "Classification report on test set:"
+print classification_report(news_test.target, pred,
+                            class_names=news_test.target_names)
+
 
 cm = confusion_matrix(news_test.target, pred)
 print "Confusion matrix:"
diff --git a/examples/mlcomp_sparse_document_classification.py b/examples/mlcomp_sparse_document_classification.py
index 639678cf29..77093fd9ab 100644
--- a/examples/mlcomp_sparse_document_classification.py
+++ b/examples/mlcomp_sparse_document_classification.py
@@ -45,6 +45,7 @@ import pylab as pl
 from scikits.learn.datasets import load_mlcomp
 from scikits.learn.svm.sparse import LinearSVC
 from scikits.learn.metrics import confusion_matrix
+from scikits.learn.metrics import classification_report
 
 if 'MLCOMP_DATASETS_HOME' not in os.environ:
     print "Please follow those instructions to get started:"
@@ -87,7 +88,11 @@ print "Predicting the labels of the test set..."
 t0 = time()
 pred = clf.predict(news_test.data)
 print "done in %fs" % (time() - t0)
-print "Classification accuracy: %f" % (np.mean(pred == news_test.target) * 100)
+
+print "Classification report on test set:"
+print classification_report(news_test.target, pred,
+                            class_names=news_test.target_names)
+
 
 cm = confusion_matrix(news_test.target, pred)
 print "Confusion matrix:"
diff --git a/examples/sgd/mlcomp_sparse_document_classification_sgd.py b/examples/sgd/mlcomp_sparse_document_classification_sgd.py
index d56af4fdf3..d1cdb281da 100644
--- a/examples/sgd/mlcomp_sparse_document_classification_sgd.py
+++ b/examples/sgd/mlcomp_sparse_document_classification_sgd.py
@@ -44,6 +44,7 @@ import numpy as np
 
 from scikits.learn.datasets import load_mlcomp
 from scikits.learn.metrics import confusion_matrix
+from scikits.learn.metrics import classification_report
 
 # from scikits.learn.svm.sparse import LinearSVC
 from scikits.learn.sgd.sparse import SGD
@@ -79,7 +80,6 @@ print "Training a linear SVM (hinge loss and L2 regularizer) using SGD.\n"\
       "SGD(n_iter=50, alpha=0.00001, fit_intercept=True)"
 t0 = time()
 clf = SGD(n_iter=50, alpha=0.00001, fit_intercept=True)
-#clf = LinearSVC(**parameters)
 clf.fit(data, target)
 print "done in %fs" % (time() - t0)
 print "Percentage of non zeros coef: %f" % (np.mean(clf.coef_ != 0) * 100)
@@ -95,12 +95,15 @@ neg_idx = np.where(target == neg)[0]
 idx = np.concatenate((pos_idx, neg_idx))
 data = news_test.data[idx]
 target = news_test.target[idx]
+target_names = news_test.target_names[:2]
 
 print "Predicting the labels of the test set..."
 t0 = time()
 pred = clf.predict(data)
 print "done in %fs" % (time() - t0)
-print "Classification accuracy: %f" % (np.mean(pred == target) * 100)
+print "Classification report on test set:"
+print classification_report(target, pred, class_names=target_names)
+
 
 cm = confusion_matrix(target, pred)
 print "Confusion matrix:"
diff --git a/scikits/learn/datasets/mlcomp.py b/scikits/learn/datasets/mlcomp.py
index 232543b694..22b35dd224 100644
--- a/scikits/learn/datasets/mlcomp.py
+++ b/scikits/learn/datasets/mlcomp.py
@@ -12,7 +12,7 @@ from scikits.learn.feature_extraction.text import SparseHashingVectorizer
 def _load_document_classification(dataset_path, metadata, set_, sparse, **kw):
     """Loader implementation for the DocumentClassification format"""
     target = []
-    target_names = {}
+    target_names = []
     filenames = []
     vectorizer = kw.get('vectorizer')
     if vectorizer is None:
@@ -31,7 +31,7 @@ def _load_document_classification(dataset_path, metadata, set_, sparse, **kw):
     folders = [f for f in sorted(os.listdir(dataset_path))
                if os.path.isdir(os.path.join(dataset_path, f))]
     for label, folder in enumerate(folders):
-        target_names[label] = folder
+        target_names.append(folder)
         folder_path = os.path.join(dataset_path, folder)
         documents = [os.path.join(folder_path, d)
                      for d in sorted(os.listdir(folder_path))]
diff --git a/scikits/learn/metrics.py b/scikits/learn/metrics.py
index 8ef121d86b..c04582dac9 100644
--- a/scikits/learn/metrics.py
+++ b/scikits/learn/metrics.py
@@ -341,6 +341,7 @@ def classification_report(y_true, y_pred, labels=None, class_names=None):
         class_names = ['%d' % l for l in labels]
     else:
         width = max(len(cn) for cn in class_names)
+        width = max(width, len('mean'))
 
 
     headers = ["precision", "recall", "f1-score"]
-- 
GitLab