diff --git a/examples/plot_digits_pipe.py b/examples/plot_digits_pipe.py index 5fc612cc4bb4c4ba0c58407eafd8fa20003e6277..6ca85e22f9b842f798b01161c203a934314a2ecb 100644 --- a/examples/plot_digits_pipe.py +++ b/examples/plot_digits_pipe.py @@ -3,10 +3,13 @@ """ ========================================================= -Pipelining +Pipelining: chaining a PCA and a logistic regression ========================================================= -This plot is generated by pipelining a PCA and a logisitic regression. +The PCA does an unsupervised dimensionality reduction, while the logistic +regression does the prediction. + +We use a GridSearchCV to set the dimensionality of the PCA """ print __doc__ @@ -50,18 +53,17 @@ scores = cross_validation.cross_val_score(pipe, X_digits, y_digits, n_jobs=-1) from sklearn.grid_search import GridSearchCV -n_components = [10, 15, 20, 30, 40, 50, 64] -Cs = np.logspace(-4, 4, 16) +n_components = [20, 40, 64] +Cs = np.logspace(-4, 4, 3) #Parameters of pipelines can be set using ‘__’ separated parameter names: estimator = GridSearchCV(pipe, dict(pca__n_components=n_components, - logistic__C=Cs), - n_jobs=-1) + logistic__C=Cs)) estimator.fit(X_digits, y_digits) -# Plot the PCA spectrum -pca.fit(X_digits) - +pl.axvline(estimator.best_estimator_.named_steps['pca'].n_components, + linestyle=':', label='n_components chosen') +pl.legend(prop=dict(size=12)) pl.show()