From 33a0f4e022e7bd333ece611f8e616f7bcea465ed Mon Sep 17 00:00:00 2001
From: Gael varoquaux <gael.varoquaux@normalesup.org>
Date: Sat, 27 Nov 2010 13:32:14 +0100
Subject: [PATCH] DOC: Better plotting in RFE example

---
 ...alidation.py => plot_rfe_with_cross_validation.py} | 11 ++++++++++-
 scikits/learn/feature_selection/rfe.py                |  1 +
 2 files changed, 11 insertions(+), 1 deletion(-)
 rename examples/{rfe_with_cross_validation.py => plot_rfe_with_cross_validation.py} (72%)

diff --git a/examples/rfe_with_cross_validation.py b/examples/plot_rfe_with_cross_validation.py
similarity index 72%
rename from examples/rfe_with_cross_validation.py
rename to examples/plot_rfe_with_cross_validation.py
index c566b89bd9..0ab408af3c 100644
--- a/examples/rfe_with_cross_validation.py
+++ b/examples/plot_rfe_with_cross_validation.py
@@ -7,6 +7,7 @@ Recursive feature elimination with automatic tuning of the
 number of features selected with cross-validation
 """
 print __doc__
+import numpy as np
 
 from scikits.learn.svm import SVC
 from scikits.learn.cross_val import StratifiedKFold
@@ -30,6 +31,14 @@ print 'Optimal number of features : %d' % rfecv.support_.sum()
 
 import pylab as pl
 pl.figure()
-pl.plot(rfecv.cv_scores_)
+pl.semilogx(rfecv.n_features_, rfecv.cv_scores_)
+pl.xlabel('Number of features selected')
+pl.ylabel('Cross validation score (nb of misclassifications)')
+# 15 ticks regularly-space in log
+x_ticks = np.unique(np.logspace(np.log10(2), 
+                                np.log10(rfecv.n_features_.max()),
+                                15,
+                    ).astype(np.int))
+pl.xticks(x_ticks, x_ticks)
 pl.show()
 
diff --git a/scikits/learn/feature_selection/rfe.py b/scikits/learn/feature_selection/rfe.py
index 98b8e6e973..730e8b7a3d 100644
--- a/scikits/learn/feature_selection/rfe.py
+++ b/scikits/learn/feature_selection/rfe.py
@@ -181,6 +181,7 @@ class RFECV(RFE):
         clf = self.estimator
         n_models = np.max(self.ranking_)
         self.cv_scores_ = np.zeros(n_models)
+        self.n_features_ = np.bincount(self.ranking_)[::-1].cumsum()[-2::-1] 
 
         for train, test in cv:
             ranking_ = rfe.fit(X[train], y[train]).ranking_
-- 
GitLab