DOC: faster and more meaningful example

b93105e8 · Gael Varoquaux · e6ecf577 · b93105e8
Commit b93105e8 authored 13 years ago by Gael Varoquaux
--- a/examples/plot_digits_pipe.py
+++ b/examples/plot_digits_pipe.py
@@ -3,10 +3,13 @@

 """
 =========================================================
-Pipelining
+Pipelining: chaining a PCA and a logistic regression
 =========================================================

-This plot is generated by pipelining a PCA and a logisitic regression.
+The PCA does an unsupervised dimensionality reduction, while the logistic
+regression does the prediction.
+
+We use a GridSearchCV to set the dimensionality of the PCA

 """
 print __doc__
@@ -50,18 +53,17 @@ scores = cross_validation.cross_val_score(pipe, X_digits, y_digits, n_jobs=-1)

 from sklearn.grid_search import GridSearchCV

-n_components = [10, 15, 20, 30, 40, 50, 64]
-Cs = np.logspace(-4, 4, 16)
+n_components = [20, 40, 64]
+Cs = np.logspace(-4, 4, 3)

 #Parameters of pipelines can be set using ‘__’ separated parameter names:

 estimator = GridSearchCV(pipe,
                         dict(pca__n_components=n_components,
-                              logistic__C=Cs),
-                         n_jobs=-1)
+                              logistic__C=Cs))
 estimator.fit(X_digits, y_digits)

-# Plot the PCA spectrum
-pca.fit(X_digits)
-
+pl.axvline(estimator.best_estimator_.named_steps['pca'].n_components,
+           linestyle=':', label='n_components chosen')
+pl.legend(prop=dict(size=12))
 pl.show()