From 96d6060cd7ac241575b11e072c49d705f3b6983d Mon Sep 17 00:00:00 2001
From: Jake Vanderplas <vanderplas@astro.washington.edu>
Date: Fri, 23 Dec 2011 10:12:26 -0800
Subject: [PATCH] pep8

---
 ...e_agglomeration_vs_univariate_selection.py | 16 +++----
 examples/cluster/plot_lena_segmentation.py    |  8 ++--
 .../cluster/plot_lena_ward_segmentation.py    |  4 +-
 examples/cluster/plot_segmentation_toy.py     | 22 ++++-----
 .../covariance/plot_covariance_estimation.py  |  2 +-
 examples/covariance/plot_outlier_detection.py |  2 +-
 .../gaussian_process/gp_diabetes_dataset.py   |  6 +--
 examples/linear_model/lasso_and_elasticnet.py | 24 +++++-----
 .../linear_model/logistic_l1_l2_sparsity.py   |  1 -
 .../plot_lasso_coordinate_descent_path.py     |  9 ++--
 examples/linear_model/plot_lasso_lars.py      |  1 -
 .../plot_lasso_model_selection.py             |  7 +--
 examples/linear_model/plot_logistic_path.py   |  3 +-
 examples/linear_model/plot_ols.py             |  1 -
 examples/linear_model/plot_ridge_path.py      |  7 ++-
 .../linear_model/plot_sgd_loss_functions.py   | 12 ++---
 examples/linear_model/plot_sgd_ols.py         |  1 -
 examples/linear_model/plot_sgd_penalties.py   | 45 +++++++++++--------
 .../plot_sgd_separating_hyperplane.py         | 17 ++++---
 .../linear_model/plot_sgd_weighted_classes.py |  9 ++--
 .../linear_model/plot_sgd_weighted_samples.py |  2 +-
 examples/manifold/plot_lle_digits.py          |  4 +-
 examples/manifold/plot_swissroll.py           |  2 +-
 examples/mixture/plot_gmm.py                  | 13 +++---
 examples/mixture/plot_gmm_classifier.py       | 15 ++++---
 examples/mixture/plot_gmm_pdf.py              |  3 +-
 examples/mixture/plot_gmm_sin.py              | 24 +++++-----
 examples/plot_multilabel.py                   |  4 +-
 28 files changed, 132 insertions(+), 132 deletions(-)

diff --git a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py
index 9a5fe895d7..a5ac8ca32a 100644
--- a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py
+++ b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py
@@ -34,7 +34,7 @@ from sklearn.cross_validation import KFold
 ###############################################################################
 # Generate data
 n_samples = 200
-size = 40 # image size
+size = 40  # image size
 roi_size = 15
 snr = 5.
 np.random.seed(0)
@@ -44,8 +44,8 @@ coef = np.zeros((size, size))
 coef[0:roi_size, 0:roi_size] = -1.
 coef[-roi_size:, -roi_size:] = 1.
 
-X = np.random.randn(n_samples, size**2)
-for x in X: # smooth data
+X = np.random.randn(n_samples, size ** 2)
+for x in X:  # smooth data
     x[:] = ndimage.gaussian_filter(x.reshape(size, size), sigma=1.0).ravel()
 X -= X.mean(axis=0)
 X /= X.std(axis=0)
@@ -53,11 +53,11 @@ X /= X.std(axis=0)
 y = np.dot(X, coef.ravel())
 noise = np.random.randn(y.shape[0])
 noise_coef = (linalg.norm(y, 2) / np.exp(snr / 20.)) / linalg.norm(noise, 2)
-y += noise_coef * noise # add noise
+y += noise_coef * noise  # add noise
 
 ###############################################################################
 # Compute the coefs of a Bayesian Ridge with GridSearch
-cv = KFold(len(y), 2) # cross-validation generator for model selection
+cv = KFold(len(y), 2)  # cross-validation generator for model selection
 ridge = BayesianRidge()
 mem = Memory(cachedir='.', verbose=1)
 
@@ -68,18 +68,18 @@ ward = WardAgglomeration(n_clusters=10, connectivity=A, memory=mem,
 clf = Pipeline([('ward', ward), ('ridge', ridge)])
 # Select the optimal number of parcels with grid search
 clf = GridSearchCV(clf, {'ward__n_clusters': [10, 20, 30]}, n_jobs=1, cv=cv)
-clf.fit(X, y) # set the best parameters
+clf.fit(X, y)  # set the best parameters
 coef_ = clf.best_estimator.steps[-1][1].coef_
 coef_ = clf.best_estimator.steps[0][1].inverse_transform(coef_)
 coef_agglomeration_ = coef_.reshape(size, size)
 
 # Anova univariate feature selection followed by BayesianRidge
-f_regression = mem.cache(feature_selection.f_regression) # caching function
+f_regression = mem.cache(feature_selection.f_regression)  # caching function
 anova = feature_selection.SelectPercentile(f_regression)
 clf = Pipeline([('anova', anova), ('ridge', ridge)])
 # Select the optimal percentage of features with grid search
 clf = GridSearchCV(clf, {'anova__percentile': [5, 10, 20]}, cv=cv)
-clf.fit(X, y) # set the best parameters
+clf.fit(X, y)  # set the best parameters
 coef_ = clf.best_estimator.steps[-1][1].coef_
 coef_ = clf.best_estimator.steps[0][1].inverse_transform(coef_)
 coef_selection_ = coef_.reshape(size, size)
diff --git a/examples/cluster/plot_lena_segmentation.py b/examples/cluster/plot_lena_segmentation.py
index 52822ac7ec..66f2649a30 100644
--- a/examples/cluster/plot_lena_segmentation.py
+++ b/examples/cluster/plot_lena_segmentation.py
@@ -35,8 +35,8 @@ graph = image.img_to_graph(lena)
 # The smaller beta is, the more independant the segmentation is of the
 # actual image. For beta=1, the segmentation is close to a voronoi
 beta = 5
-eps  = 1e-6
-graph.data = np.exp(-beta*graph.data/lena.std()) + eps
+eps = 1e-6
+graph.data = np.exp(-beta * graph.data / lena.std()) + eps
 
 # Apply spectral clustering (this step goes much faster if you have pyamg
 # installed)
@@ -44,13 +44,13 @@ N_REGIONS = 11
 labels = spectral_clustering(graph, k=N_REGIONS)
 labels = labels.reshape(lena.shape)
 
-################################################################################
+###############################################################################
 # Visualize the resulting regions
 pl.figure(figsize=(5, 5))
 pl.imshow(lena,   cmap=pl.cm.gray)
 for l in range(N_REGIONS):
     pl.contour(labels == l, contours=1,
-            colors=[pl.cm.spectral(l/float(N_REGIONS)), ])
+            colors=[pl.cm.spectral(l / float(N_REGIONS)), ])
 pl.xticks(())
 pl.yticks(())
 pl.show()
diff --git a/examples/cluster/plot_lena_ward_segmentation.py b/examples/cluster/plot_lena_ward_segmentation.py
index 58294f3076..bbe81a2492 100644
--- a/examples/cluster/plot_lena_ward_segmentation.py
+++ b/examples/cluster/plot_lena_ward_segmentation.py
@@ -36,7 +36,7 @@ connectivity = grid_to_graph(*lena.shape)
 # Compute clustering
 print "Compute structured hierarchical clustering..."
 st = time.time()
-n_clusters = 15 # number of regions
+n_clusters = 15  # number of regions
 ward = Ward(n_clusters=n_clusters, connectivity=connectivity).fit(X)
 label = np.reshape(ward.labels_, lena.shape)
 print "Elaspsed time: ", time.time() - st
@@ -49,7 +49,7 @@ pl.figure(figsize=(5, 5))
 pl.imshow(lena, cmap=pl.cm.gray)
 for l in range(n_clusters):
     pl.contour(label == l, contours=1,
-            colors=[pl.cm.spectral(l/float(n_clusters)), ])
+            colors=[pl.cm.spectral(l / float(n_clusters)), ])
 pl.xticks(())
 pl.yticks(())
 pl.show()
diff --git a/examples/cluster/plot_segmentation_toy.py b/examples/cluster/plot_segmentation_toy.py
index 80cdd6893c..fc1b8cafc7 100644
--- a/examples/cluster/plot_segmentation_toy.py
+++ b/examples/cluster/plot_segmentation_toy.py
@@ -36,7 +36,7 @@ import pylab as pl
 from sklearn.feature_extraction import image
 from sklearn.cluster import spectral_clustering
 
-################################################################################
+###############################################################################
 l = 100
 x, y = np.indices((l, l))
 
@@ -47,18 +47,18 @@ center4 = (24, 70)
 
 radius1, radius2, radius3, radius4 = 16, 14, 15, 14
 
-circle1 = (x - center1[0])**2 + (y - center1[1])**2 < radius1**2
-circle2 = (x - center2[0])**2 + (y - center2[1])**2 < radius2**2
-circle3 = (x - center3[0])**2 + (y - center3[1])**2 < radius3**2
-circle4 = (x - center4[0])**2 + (y - center4[1])**2 < radius4**2
+circle1 = (x - center1[0]) ** 2 + (y - center1[1]) ** 2 < radius1 ** 2
+circle2 = (x - center2[0]) ** 2 + (y - center2[1]) ** 2 < radius2 ** 2
+circle3 = (x - center3[0]) ** 2 + (y - center3[1]) ** 2 < radius3 ** 2
+circle4 = (x - center4[0]) ** 2 + (y - center4[1]) ** 2 < radius4 ** 2
 
-################################################################################
+###############################################################################
 # 4 circles
 img = circle1 + circle2 + circle3 + circle4
 mask = img.astype(bool)
 img = img.astype(float)
 
-img += 1 + 0.2*np.random.randn(*img.shape)
+img += 1 + 0.2 * np.random.randn(*img.shape)
 
 # Convert the image into a graph with the value of the gradient on the
 # edges.
@@ -66,7 +66,7 @@ graph = image.img_to_graph(img, mask=mask)
 
 # Take a decreasing function of the gradient: we take it weakly
 # dependant from the gradient the segmentation is close to a voronoi
-graph.data = np.exp(-graph.data/graph.data.std())
+graph.data = np.exp(-graph.data / graph.data.std())
 
 # Force the solver to be arpack, since amg is numerically
 # unstable on this example
@@ -77,16 +77,16 @@ label_im[mask] = labels
 pl.matshow(img)
 pl.matshow(label_im)
 
-################################################################################
+###############################################################################
 # 2 circles
 img = circle1 + circle2
 mask = img.astype(bool)
 img = img.astype(float)
 
-img += 1 + 0.2*np.random.randn(*img.shape)
+img += 1 + 0.2 * np.random.randn(*img.shape)
 
 graph = image.img_to_graph(img, mask=mask)
-graph.data = np.exp(-graph.data/graph.data.std())
+graph.data = np.exp(-graph.data / graph.data.std())
 
 labels = spectral_clustering(graph, k=2, mode='arpack')
 label_im = -np.ones(mask.shape)
diff --git a/examples/covariance/plot_covariance_estimation.py b/examples/covariance/plot_covariance_estimation.py
index 14c92c637c..a430835376 100644
--- a/examples/covariance/plot_covariance_estimation.py
+++ b/examples/covariance/plot_covariance_estimation.py
@@ -79,7 +79,7 @@ pl.loglog(shrinkages, negative_logliks)
 # BUG: hlines(..., linestyle='--') breaks on some older versions of matplotlib
 #pl.hlines(loglik_real, pl.xlim()[0], pl.xlim()[1], color='red',
 #          label="real covariance likelihood", linestyle='--')
-pl.plot(pl.xlim(), 2*[loglik_real], '--r',
+pl.plot(pl.xlim(), 2 * [loglik_real], '--r',
         label="real covariance likelihood")
 
 # adjust view
diff --git a/examples/covariance/plot_outlier_detection.py b/examples/covariance/plot_outlier_detection.py
index 0de3ed1e74..75ab445c82 100644
--- a/examples/covariance/plot_outlier_detection.py
+++ b/examples/covariance/plot_outlier_detection.py
@@ -69,7 +69,7 @@ for i, offset in enumerate(clusters_separation):
         clf.fit(X)
         y_pred = clf.decision_function(X).ravel()
         threshold = stats.scoreatpercentile(y_pred,
-                                            100*outliers_fraction)
+                                            100 * outliers_fraction)
         y_pred = y_pred > threshold
         n_errors = (y_pred != ground_truth).sum()
         # plot the levels lines and the points
diff --git a/examples/gaussian_process/gp_diabetes_dataset.py b/examples/gaussian_process/gp_diabetes_dataset.py
index 4c0996826d..4ea4c4b0d9 100644
--- a/examples/gaussian_process/gp_diabetes_dataset.py
+++ b/examples/gaussian_process/gp_diabetes_dataset.py
@@ -40,12 +40,12 @@ gp = GaussianProcess(regr='constant', corr='absolute_exponential',
 gp.fit(X, y)
 
 # Deactivate maximum likelihood estimation for the cross-validation loop
-gp.theta0 = gp.theta # Given correlation parameter = MLE
-gp.thetaL, gp.thetaU = None, None # None bounds deactivate MLE
+gp.theta0 = gp.theta  # Given correlation parameter = MLE
+gp.thetaL, gp.thetaU = None, None  # None bounds deactivate MLE
 
 # Perform a cross-validation estimate of the coefficient of determination using
 # the cross_validation module using all CPUs available on the machine
-K = 20 # folds
+K = 20  # folds
 R2 = cross_val_score(gp, X, y=y, cv=KFold(y.size, K), n_jobs=1).mean()
 print("The %d-Folds estimate of the coefficient of determination is R2 = %s"
     % (K, R2))
diff --git a/examples/linear_model/lasso_and_elasticnet.py b/examples/linear_model/lasso_and_elasticnet.py
index 6dbdea3856..bcac07cb3a 100644
--- a/examples/linear_model/lasso_and_elasticnet.py
+++ b/examples/linear_model/lasso_and_elasticnet.py
@@ -8,24 +8,24 @@ print __doc__
 
 import numpy as np
 
-################################################################################
+###############################################################################
 # generate some sparse data to play with
 
 n_samples, n_features = 50, 200
 X = np.random.randn(n_samples, n_features)
-coef = 3*np.random.randn(n_features)
-coef[10:] = 0 # sparsify coef
+coef = 3 * np.random.randn(n_features)
+coef[10:] = 0  # sparsify coef
 y = np.dot(X, coef)
 
 # add noise
-y += 0.01*np.random.normal((n_samples,))
+y += 0.01 * np.random.normal((n_samples,))
 
 # Split data in train set and test set
 n_samples = X.shape[0]
-X_train, y_train = X[:n_samples/2], y[:n_samples/2]
-X_test, y_test = X[n_samples/2:], y[n_samples/2:]
+X_train, y_train = X[:n_samples / 2], y[:n_samples / 2]
+X_test, y_test = X[n_samples / 2:], y[n_samples / 2:]
 
-################################################################################
+###############################################################################
 # Lasso
 from sklearn.linear_model import Lasso
 
@@ -34,10 +34,10 @@ lasso = Lasso(alpha=alpha)
 
 y_pred_lasso = lasso.fit(X_train, y_train).predict(X_test)
 print lasso
-print "r^2 on test data : %f" % (1 - np.linalg.norm(y_test - y_pred_lasso)**2
-                                      / np.linalg.norm(y_test)**2)
+print "r^2 on test data : %f" % (1 - np.linalg.norm(y_test - y_pred_lasso) ** 2
+                                      / np.linalg.norm(y_test) ** 2)
 
-################################################################################
+###############################################################################
 # ElasticNet
 from sklearn.linear_model import ElasticNet
 
@@ -45,5 +45,5 @@ enet = ElasticNet(alpha=alpha, rho=0.7)
 
 y_pred_enet = enet.fit(X_train, y_train).predict(X_test)
 print enet
-print "r^2 on test data : %f" % (1 - np.linalg.norm(y_test - y_pred_enet)**2
-                                      / np.linalg.norm(y_test)**2)
+print "r^2 on test data : %f" % (1 - np.linalg.norm(y_test - y_pred_enet) ** 2
+                                      / np.linalg.norm(y_test) ** 2)
diff --git a/examples/linear_model/logistic_l1_l2_sparsity.py b/examples/linear_model/logistic_l1_l2_sparsity.py
index c74ebcf7dd..0ca916ad78 100644
--- a/examples/linear_model/logistic_l1_l2_sparsity.py
+++ b/examples/linear_model/logistic_l1_l2_sparsity.py
@@ -45,4 +45,3 @@ for C in (0.1, 1, 10):
     print "C=%f" % C
     print "Sparsity with L1 penalty: %f" % sparsity_l1_LR
     print "Sparsity with L2 penalty: %f" % sparsity_l2_LR
-
diff --git a/examples/linear_model/plot_lasso_coordinate_descent_path.py b/examples/linear_model/plot_lasso_coordinate_descent_path.py
index 60e6214e8f..f23f08c5e1 100644
--- a/examples/linear_model/plot_lasso_coordinate_descent_path.py
+++ b/examples/linear_model/plot_lasso_coordinate_descent_path.py
@@ -21,12 +21,12 @@ diabetes = datasets.load_diabetes()
 X = diabetes.data
 y = diabetes.target
 
-X /= X.std(0) # Standardize data (easier to set the rho parameter)
+X /= X.std(0)  # Standardize data (easier to set the rho parameter)
 
-################################################################################
+###############################################################################
 # Compute paths
 
-eps = 5e-3 # the smaller it is the longer is the path
+eps = 5e-3  # the smaller it is the longer is the path
 
 print "Computing regularization path using the lasso..."
 models = lasso_path(X, y, eps=eps)
@@ -38,7 +38,7 @@ models = enet_path(X, y, eps=eps, rho=0.8)
 alphas_enet = np.array([model.alpha for model in models])
 coefs_enet = np.array([model.coef_ for model in models])
 
-################################################################################
+###############################################################################
 # Display results
 
 ax = pl.gca()
@@ -52,4 +52,3 @@ pl.title('Lasso and Elastic-Net Paths')
 pl.legend((l1[-1], l2[-1]), ('Lasso', 'Elastic-Net'), loc='lower left')
 pl.axis('tight')
 pl.show()
-
diff --git a/examples/linear_model/plot_lasso_lars.py b/examples/linear_model/plot_lasso_lars.py
index dfd2c24061..8a0608ac1a 100644
--- a/examples/linear_model/plot_lasso_lars.py
+++ b/examples/linear_model/plot_lasso_lars.py
@@ -38,4 +38,3 @@ pl.ylabel('Coefficients')
 pl.title('LASSO Path')
 pl.axis('tight')
 pl.show()
-
diff --git a/examples/linear_model/plot_lasso_model_selection.py b/examples/linear_model/plot_lasso_model_selection.py
index cf049e9aa7..a00e153f3b 100644
--- a/examples/linear_model/plot_lasso_model_selection.py
+++ b/examples/linear_model/plot_lasso_model_selection.py
@@ -93,7 +93,8 @@ pl.figure()
 plot_ic_criterion(model_aic, 'AIC', 'b')
 plot_ic_criterion(model_bic, 'BIC', 'r')
 pl.legend()
-pl.title('Information-criterion for model selection (training time %.3fs)' % t_bic)
+pl.title('Information-criterion for model selection (training time %.3fs)'
+         % t_bic)
 
 ##############################################################################
 # LassoCV: coordinate descent
@@ -119,8 +120,8 @@ pl.legend()
 
 pl.xlabel('-log(lambda)')
 pl.ylabel('Mean square error')
-pl.title('Mean square error on each fold: coordinate descent (train time: %.2fs)' %
-            t_lasso_cv)
+pl.title('Mean square error on each fold: coordinate descent '
+         '(train time: %.2fs)' % t_lasso_cv)
 pl.axis('tight')
 pl.ylim(ymin, ymax)
 
diff --git a/examples/linear_model/plot_logistic_path.py b/examples/linear_model/plot_logistic_path.py
index 47fed448c2..8f108f013d 100644
--- a/examples/linear_model/plot_logistic_path.py
+++ b/examples/linear_model/plot_logistic_path.py
@@ -29,7 +29,7 @@ y = y[y != 2]
 
 X -= np.mean(X, 0)
 
-################################################################################
+###############################################################################
 # Demo path functions
 
 cs = l1_min_c(X, y, loss='log') * np.logspace(0, 3)
@@ -53,4 +53,3 @@ pl.ylabel('Coefficients')
 pl.title('Logistic Regression Path')
 pl.axis('tight')
 pl.show()
-
diff --git a/examples/linear_model/plot_ols.py b/examples/linear_model/plot_ols.py
index 9d8ae6a6bb..bf91f479a9 100644
--- a/examples/linear_model/plot_ols.py
+++ b/examples/linear_model/plot_ols.py
@@ -31,4 +31,3 @@ pl.plot(X, clf.predict(X), color='blue', linewidth=3)
 pl.xticks(())
 pl.yticks(())
 pl.show()
-
diff --git a/examples/linear_model/plot_ridge_path.py b/examples/linear_model/plot_ridge_path.py
index d3d9bae64a..a21977578d 100644
--- a/examples/linear_model/plot_ridge_path.py
+++ b/examples/linear_model/plot_ridge_path.py
@@ -24,7 +24,7 @@ from sklearn import linear_model
 X = 1. / (np.arange(1, 11) + np.arange(0, 10)[:, np.newaxis])
 y = np.ones(10)
 
-################################################################################
+###############################################################################
 # Compute paths
 
 n_alphas = 200
@@ -37,7 +37,7 @@ for a in alphas:
     clf.fit(X, y)
     coefs.append(clf.coef_)
 
-################################################################################
+###############################################################################
 # Display results
 
 ax = pl.gca()
@@ -45,10 +45,9 @@ ax.set_color_cycle(['b', 'r', 'g', 'c', 'k', 'y', 'm'])
 
 ax.plot(alphas, coefs)
 ax.set_xscale('log')
-ax.set_xlim(ax.get_xlim()[::-1]) # reverse axis
+ax.set_xlim(ax.get_xlim()[::-1])  # reverse axis
 pl.xlabel('alpha')
 pl.ylabel('weights')
 pl.title('Ridge coefficients as a function of the regularization')
 pl.axis('tight')
 pl.show()
-
diff --git a/examples/linear_model/plot_sgd_loss_functions.py b/examples/linear_model/plot_sgd_loss_functions.py
index c1a1039f15..4907cf9722 100644
--- a/examples/linear_model/plot_sgd_loss_functions.py
+++ b/examples/linear_model/plot_sgd_loss_functions.py
@@ -3,7 +3,8 @@
 SGD: Convex Loss Functions
 ==========================
 
-Plot the convex loss functions supported by `sklearn.linear_model.stochastic_gradient`.
+Plot the convex loss functions supported by
+`sklearn.linear_model.stochastic_gradient`.
 """
 print __doc__
 
@@ -25,17 +26,16 @@ squared_loss = SquaredLoss()
 xx = np.linspace(xmin, xmax, 100)
 pl.plot([xmin, 0, 0, xmax], [1, 1, 0, 0], 'k-',
         label="Zero-one loss")
-pl.plot(xx, [hinge.loss(x,1) for x in xx], 'g-',
+pl.plot(xx, [hinge.loss(x, 1) for x in xx], 'g-',
         label="Hinge loss")
-pl.plot(xx, [log_loss(x,1) for x in xx], 'r-',
+pl.plot(xx, [log_loss(x, 1) for x in xx], 'r-',
         label="Log loss")
-pl.plot(xx, [modified_huber.loss(x,1) for x in xx], 'y-',
+pl.plot(xx, [modified_huber.loss(x, 1) for x in xx], 'y-',
         label="Modified huber loss")
-#pl.plot(xx, [2.0*squared_loss.loss(x,1) for x in xx], 'c-',
+#pl.plot(xx, [2.0*squared_loss.loss(x, 1) for x in xx], 'c-',
 #        label="Squared loss")
 pl.ylim((0, 5))
 pl.legend(loc="upper right")
 pl.xlabel(r"$y \cdot f(x)$")
 pl.ylabel("$L(y, f(x))$")
 pl.show()
-
diff --git a/examples/linear_model/plot_sgd_ols.py b/examples/linear_model/plot_sgd_ols.py
index dde2030ee0..9749d77c53 100644
--- a/examples/linear_model/plot_sgd_ols.py
+++ b/examples/linear_model/plot_sgd_ols.py
@@ -30,4 +30,3 @@ clf.fit(X, Y)
 pl.scatter(X, Y, color='black')
 pl.plot(X, clf.predict(X), color='blue', linewidth=3)
 pl.show()
-
diff --git a/examples/linear_model/plot_sgd_penalties.py b/examples/linear_model/plot_sgd_penalties.py
index 597e0b784b..40617a13bf 100644
--- a/examples/linear_model/plot_sgd_penalties.py
+++ b/examples/linear_model/plot_sgd_penalties.py
@@ -3,7 +3,8 @@
 SGD: Penalties
 ==============
 
-Plot the contours of the three penalties supported by `sklearn.linear_model.stochastic_gradient`.
+Plot the contours of the three penalties supported by
+`sklearn.linear_model.stochastic_gradient`.
 
 """
 from __future__ import division
@@ -12,40 +13,49 @@ print __doc__
 import numpy as np
 import pylab as pl
 
-def l1(xs): return np.array([np.sqrt((1 - np.sqrt(x**2.0))**2.0) for x in xs])
 
-def l2(xs): return np.array([np.sqrt(1.0-x**2.0) for x in xs])
+def l1(xs):
+    return np.array([np.sqrt((1 - np.sqrt(x ** 2.0)) ** 2.0) for x in xs])
+
+
+def l2(xs):
+    return np.array([np.sqrt(1.0 - x ** 2.0) for x in xs])
+
 
 def el(xs, z):
-    return np.array([(2 - 2*x - 2*z + 4*x*z -
-                   (4*z**2 - 8*x*z**2 + 8*x**2*z**2 -
-                    16*x**2*z**3 + 8*x*z**3 + 4*x**2*z**4)**(1/2) -
-                   2*x*z**2)/(2 - 4*z) for x in xs])
+    return np.array([(2 - 2 * x - 2 * z + 4 * x * z -
+                      (4 * z ** 2
+                       - 8 * x * z ** 2
+                       + 8 * x ** 2 * z ** 2
+                       - 16 * x ** 2 * z ** 3
+                       + 8 * x * z ** 3 + 4 * x ** 2 * z ** 4) ** (1. / 2)
+                      - 2 * x * z ** 2) / (2 - 4 * z) for x in xs])
+
 
 def cross(ext):
-    pl.plot([-ext,ext],[0,0], "k-")
-    pl.plot([0,0], [-ext,ext], "k-")
+    pl.plot([-ext, ext], [0, 0], "k-")
+    pl.plot([0, 0], [-ext, ext], "k-")
 
 xs = np.linspace(0, 1, 100)
 
-alpha = 0.501 # 0.5 division throuh zero
+alpha = 0.501  # 0.5 division throuh zero
 
 cross(1.2)
 
 pl.plot(xs, l1(xs), "r-", label="L1")
-pl.plot(xs, -1.0*l1(xs), "r-")
-pl.plot(-1*xs, l1(xs), "r-")
-pl.plot(-1*xs, -1.0*l1(xs), "r-")
+pl.plot(xs, -1.0 * l1(xs), "r-")
+pl.plot(-1 * xs, l1(xs), "r-")
+pl.plot(-1 * xs, -1.0 * l1(xs), "r-")
 
 pl.plot(xs, l2(xs), "b-", label="L2")
 pl.plot(xs, -1.0 * l2(xs), "b-")
-pl.plot(-1*xs, l2(xs), "b-")
-pl.plot(-1*xs, -1.0 * l2(xs), "b-")
+pl.plot(-1 * xs, l2(xs), "b-")
+pl.plot(-1 * xs, -1.0 * l2(xs), "b-")
 
 pl.plot(xs, el(xs, alpha), "y-", label="Elastic Net")
 pl.plot(xs, -1.0 * el(xs, alpha), "y-")
-pl.plot(-1*xs, el(xs, alpha), "y-")
-pl.plot(-1*xs, -1.0 * el(xs, alpha), "y-")
+pl.plot(-1 * xs, el(xs, alpha), "y-")
+pl.plot(-1 * xs, -1.0 * el(xs, alpha), "y-")
 
 pl.xlabel(r"$w_0$")
 pl.ylabel(r"$w_1$")
@@ -53,4 +63,3 @@ pl.legend()
 
 pl.axis("equal")
 pl.show()
-
diff --git a/examples/linear_model/plot_sgd_separating_hyperplane.py b/examples/linear_model/plot_sgd_separating_hyperplane.py
index b55b27dc91..902629e5f8 100644
--- a/examples/linear_model/plot_sgd_separating_hyperplane.py
+++ b/examples/linear_model/plot_sgd_separating_hyperplane.py
@@ -15,12 +15,11 @@ from sklearn.linear_model import SGDClassifier
 
 # we create 40 separable points
 np.random.seed(0)
-X = np.r_[np.random.randn(20, 2) - [2,2], np.random.randn(20, 2) + [2, 2]]
-Y = [0]*20 + [1]*20
+X = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20, 2) + [2, 2]]
+Y = [0] * 20 + [1] * 20
 
 # fit the model
-clf = SGDClassifier(loss="hinge", alpha = 0.01, n_iter=50,
-                    fit_intercept=True)
+clf = SGDClassifier(loss="hinge", alpha=0.01, n_iter=50, fit_intercept=True)
 clf.fit(X, Y)
 
 # plot the line, the points, and the nearest vectors to the plane
@@ -28,17 +27,17 @@ xx = np.linspace(-5, 5, 10)
 yy = np.linspace(-5, 5, 10)
 X1, X2 = np.meshgrid(xx, yy)
 Z = np.empty(X1.shape)
-for (i,j), val in np.ndenumerate(X1):
+for (i, j), val in np.ndenumerate(X1):
     x1 = val
-    x2 = X2[i,j]
+    x2 = X2[i, j]
     p = clf.decision_function([x1, x2])
-    Z[i,j] = p[0]
+    Z[i, j] = p[0]
 levels = [-1.0, 0.0, 1.0]
-linestyles = ['dashed','solid', 'dashed']
+linestyles = ['dashed', 'solid', 'dashed']
 colors = 'k'
 pl.set_cmap(pl.cm.Paired)
 pl.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles)
-pl.scatter(X[:,0], X[:,1], c=Y)
+pl.scatter(X[:, 0], X[:, 1], c=Y)
 
 pl.axis('tight')
 pl.show()
diff --git a/examples/linear_model/plot_sgd_weighted_classes.py b/examples/linear_model/plot_sgd_weighted_classes.py
index aab147d9a2..72ac092713 100644
--- a/examples/linear_model/plot_sgd_weighted_classes.py
+++ b/examples/linear_model/plot_sgd_weighted_classes.py
@@ -17,9 +17,9 @@ from sklearn.linear_model import SGDClassifier
 np.random.seed(0)
 n_samples_1 = 1000
 n_samples_2 = 100
-X = np.r_[1.5*np.random.randn(n_samples_1, 2),
-          0.5*np.random.randn(n_samples_2, 2) + [2, 2]]
-y = np.array([0]*(n_samples_1) + [1]*(n_samples_2), dtype=np.float64)
+X = np.r_[1.5 * np.random.randn(n_samples_1, 2),
+          0.5 * np.random.randn(n_samples_2, 2) + [2, 2]]
+y = np.array([0] * (n_samples_1) + [1] * (n_samples_2), dtype=np.float64)
 idx = np.arange(y.shape[0])
 np.random.shuffle(idx)
 X = X[idx]
@@ -50,9 +50,8 @@ wyy = wa * xx - wclf.intercept_ / ww[1]
 pl.set_cmap(pl.cm.Paired)
 h0 = pl.plot(xx, yy, 'k-')
 h1 = pl.plot(xx, wyy, 'k--')
-pl.scatter(X[:,0], X[:,1], c=y)
+pl.scatter(X[:, 0], X[:, 1], c=y)
 pl.legend((h0, h1), ('no weights', 'with weights'))
 
 pl.axis('tight')
 pl.show()
-
diff --git a/examples/linear_model/plot_sgd_weighted_samples.py b/examples/linear_model/plot_sgd_weighted_samples.py
index c0cdcb3228..221f7f377e 100644
--- a/examples/linear_model/plot_sgd_weighted_samples.py
+++ b/examples/linear_model/plot_sgd_weighted_samples.py
@@ -15,7 +15,7 @@ from sklearn import linear_model
 # we create 20 points
 np.random.seed(0)
 X = np.r_[np.random.randn(10, 2) + [1, 1], np.random.randn(10, 2)]
-y = [1]*10 + [-1]*10
+y = [1] * 10 + [-1] * 10
 sample_weight = 100 * np.abs(np.random.randn(20))
 # and assign a bigger weight to the last 10 samples
 sample_weight[:10] *= 10
diff --git a/examples/manifold/plot_lle_digits.py b/examples/manifold/plot_lle_digits.py
index 9e4298b5c4..2a0ec53015 100644
--- a/examples/manifold/plot_lle_digits.py
+++ b/examples/manifold/plot_lle_digits.py
@@ -63,10 +63,10 @@ def plot_embedding(X, title=None):
 N = 20
 img = np.zeros((10 * N, 10 * N))
 for i in range(N):
-    ix = 10*i + 1
+    ix = 10 * i + 1
     for j in range(N):
         iy = 10 * j + 1
-        img[ix:ix + 8, iy:iy + 8] = X[i*N+j].reshape((8,8))
+        img[ix:ix + 8, iy:iy + 8] = X[i * N + j].reshape((8, 8))
 pl.imshow(img, cmap=pl.cm.binary)
 pl.xticks([])
 pl.yticks([])
diff --git a/examples/manifold/plot_swissroll.py b/examples/manifold/plot_swissroll.py
index 0ce92160d8..8659099c50 100644
--- a/examples/manifold/plot_swissroll.py
+++ b/examples/manifold/plot_swissroll.py
@@ -40,7 +40,7 @@ except:
 
 ax.set_title("Original data")
 ax = fig.add_subplot(212)
-ax.scatter(X_r[:,0], X_r[:,1], c=color, cmap=pl.cm.Spectral)
+ax.scatter(X_r[:, 0], X_r[:, 1], c=color, cmap=pl.cm.Spectral)
 pl.axis('tight')
 pl.xticks([]), pl.yticks([])
 pl.title('Projected data')
diff --git a/examples/mixture/plot_gmm.py b/examples/mixture/plot_gmm.py
index 662675c481..ae3c547517 100644
--- a/examples/mixture/plot_gmm.py
+++ b/examples/mixture/plot_gmm.py
@@ -37,7 +37,7 @@ n_samples = 500
 np.random.seed(0)
 C = np.array([[0., -0.1], [1.7, .4]])
 X = np.r_[np.dot(np.random.randn(n_samples, 2), C),
-          .7*np.random.randn(n_samples, 2) + np.array([-6, 3])]
+          .7 * np.random.randn(n_samples, 2) + np.array([-6, 3])]
 
 # Fit a mixture of gaussians with EM using five components
 gmm = mixture.GMM(n_components=5, cvtype='full')
@@ -47,11 +47,11 @@ gmm.fit(X)
 dpgmm = mixture.DPGMM(n_components=5, cvtype='full')
 dpgmm.fit(X)
 
-color_iter = itertools.cycle (['r', 'g', 'b', 'c', 'm'])
+color_iter = itertools.cycle(['r', 'g', 'b', 'c', 'm'])
 
 for i, (clf, title) in enumerate([(gmm, 'GMM'),
                                   (dpgmm, 'Dirichlet Process GMM')]):
-    splot = pl.subplot(2, 1, 1+i)
+    splot = pl.subplot(2, 1, 1 + i)
     Y_ = clf.predict(X)
     for i, (mean, covar, color) in enumerate(zip(clf.means, clf.covars,
                                                  color_iter)):
@@ -62,11 +62,11 @@ for i, (clf, title) in enumerate([(gmm, 'GMM'),
         # components.
         if not np.any(Y_ == i):
             continue
-        pl.scatter(X[Y_== i, 0], X[Y_== i, 1], .8, color=color)
+        pl.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color)
 
         # Plot an ellipse to show the Gaussian component
-        angle = np.arctan(u[1]/u[0])
-        angle = 180 * angle / np.pi # convert to degrees
+        angle = np.arctan(u[1] / u[0])
+        angle = 180 * angle / np.pi  # convert to degrees
         ell = mpl.patches.Ellipse(mean, v[0], v[1], 180 + angle, color=color)
         ell.set_clip_box(splot.bbox)
         ell.set_alpha(0.5)
@@ -79,4 +79,3 @@ for i, (clf, title) in enumerate([(gmm, 'GMM'),
     pl.title(title)
 
 pl.show()
-
diff --git a/examples/mixture/plot_gmm_classifier.py b/examples/mixture/plot_gmm_classifier.py
index e0be6e0164..5eb09b1a2c 100644
--- a/examples/mixture/plot_gmm_classifier.py
+++ b/examples/mixture/plot_gmm_classifier.py
@@ -34,12 +34,13 @@ from sklearn import datasets
 from sklearn.cross_validation import StratifiedKFold
 from sklearn.mixture import GMM
 
+
 def make_ellipses(gmm, ax):
     for n, color in enumerate('rgb'):
         v, w = np.linalg.eigh(gmm.covars[n][:2, :2])
         u = w[0] / np.linalg.norm(w[0])
-        angle = np.arctan(u[1]/u[0])
-        angle = 180 * angle / np.pi # convert to degrees
+        angle = np.arctan(u[1] / u[0])
+        angle = 180 * angle / np.pi  # convert to degrees
         v *= 9
         ell = mpl.patches.Ellipse(gmm.means[n, :2], v[0], v[1], 180 + angle,
                                   color=color)
@@ -69,7 +70,7 @@ classifiers = dict((x, GMM(n_components=n_classes, cvtype=x))
 
 n_classifiers = len(classifiers)
 
-pl.figure(figsize=(3*n_classifiers/2, 6))
+pl.figure(figsize=(3 * n_classifiers / 2, 6))
 pl.subplots_adjust(bottom=.01, top=0.95, hspace=.15, wspace=.05,
                    left=.01, right=.99)
 
@@ -83,12 +84,12 @@ for index, (name, classifier) in enumerate(classifiers.iteritems()):
     # Train the other parameters using the EM algorithm.
     classifier.fit(X_train, init_params='wc', n_iter=20)
 
-    h = pl.subplot(2, n_classifiers/2, index + 1)
+    h = pl.subplot(2, n_classifiers / 2, index + 1)
     make_ellipses(classifier, h)
 
     for n, color in enumerate('rgb'):
         data = iris.data[iris.target == n]
-        pl.scatter(data[:,0], data[:, 1], 0.8, color=color,
+        pl.scatter(data[:, 0], data[:, 1], 0.8, color=color,
                     label=iris.target_names[n])
     # Plot the test data with crosses
     for n, color in enumerate('rgb'):
@@ -96,12 +97,12 @@ for index, (name, classifier) in enumerate(classifiers.iteritems()):
         pl.plot(data[:, 0], data[:, 1], 'x', color=color)
 
     y_train_pred = classifier.predict(X_train)
-    train_accuracy  = np.mean(y_train_pred.ravel() == y_train.ravel()) * 100
+    train_accuracy = np.mean(y_train_pred.ravel() == y_train.ravel()) * 100
     pl.text(0.05, 0.9, 'Train accuracy: %.1f' % train_accuracy,
                     transform=h.transAxes)
 
     y_test_pred = classifier.predict(X_test)
-    test_accuracy  = np.mean(y_test_pred.ravel() == y_test.ravel()) * 100
+    test_accuracy = np.mean(y_test_pred.ravel() == y_test.ravel()) * 100
     pl.text(0.05, 0.8, 'Test accuracy: %.1f' % test_accuracy,
                     transform=h.transAxes)
 
diff --git a/examples/mixture/plot_gmm_pdf.py b/examples/mixture/plot_gmm_pdf.py
index 396a550252..868b0069df 100644
--- a/examples/mixture/plot_gmm_pdf.py
+++ b/examples/mixture/plot_gmm_pdf.py
@@ -27,7 +27,7 @@ x = np.linspace(-20.0, 30.0)
 y = np.linspace(-20.0, 40.0)
 X, Y = np.meshgrid(x, y)
 XX = np.c_[X.ravel(), Y.ravel()]
-Z =  np.log(-clf.eval(XX)[0])
+Z = np.log(-clf.eval(XX)[0])
 Z = Z.reshape(X.shape)
 
 CS = pl.contour(X, Y, Z)
@@ -36,4 +36,3 @@ pl.scatter(X_train[:, 0], X_train[:, 1], .8)
 
 pl.axis('tight')
 pl.show()
-
diff --git a/examples/mixture/plot_gmm_sin.py b/examples/mixture/plot_gmm_sin.py
index 078f121dd8..63962ad0b1 100644
--- a/examples/mixture/plot_gmm_sin.py
+++ b/examples/mixture/plot_gmm_sin.py
@@ -29,19 +29,20 @@ n_samples = 100
 # Generate random sample following a sine curve
 np.random.seed(0)
 X = np.zeros((n_samples, 2))
-step = 4*np.pi/n_samples
+step = 4 * np.pi / n_samples
 
 for i in xrange(X.shape[0]):
-    x = i*step-6
-    X[i,0] = x+np.random.normal(0, 0.1)
-    X[i,1] = 3*(np.sin(x)+np.random.normal(0, .2))
+    x = i * step - 6
+    X[i, 0] = x + np.random.normal(0, 0.1)
+    X[i, 1] = 3 * (np.sin(x) + np.random.normal(0, .2))
 
 
-color_iter = itertools.cycle (['r', 'g', 'b', 'c', 'm'])
+color_iter = itertools.cycle(['r', 'g', 'b', 'c', 'm'])
 
 
 for i, (clf, title) in enumerate([
-        (mixture.GMM(n_components=10, cvtype='diag'), "Expectation-maximization"),
+        (mixture.GMM(n_components=10, cvtype='diag'),
+         "Expectation-maximization"),
         (mixture.DPGMM(n_components=10, cvtype='diag', alpha=0.01),
          "Dirichlet Process,alpha=0.01"),
         (mixture.DPGMM(n_components=10, cvtype='diag', alpha=100.),
@@ -49,7 +50,7 @@ for i, (clf, title) in enumerate([
         ]):
 
     clf.fit(X, n_iter=100)
-    splot = pl.subplot(3, 1, 1+i)
+    splot = pl.subplot(3, 1, 1 + i)
     Y_ = clf.predict(X)
     for i, (mean, covar, color) in enumerate(zip(clf.means, clf.covars,
                                                  color_iter)):
@@ -60,19 +61,18 @@ for i, (clf, title) in enumerate([
         # components.
         if not np.any(Y_ == i):
             continue
-        pl.scatter(X[Y_== i, 0], X[Y_== i, 1], .8, color=color)
+        pl.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color)
 
         # Plot an ellipse to show the Gaussian component
-        angle = np.arctan(u[1]/u[0])
-        angle = 180 * angle / np.pi # convert to degrees
+        angle = np.arctan(u[1] / u[0])
+        angle = 180 * angle / np.pi  # convert to degrees
         ell = mpl.patches.Ellipse(mean, v[0], v[1], 180 + angle, color=color)
         ell.set_clip_box(splot.bbox)
         ell.set_alpha(0.5)
         splot.add_artist(ell)
 
-    pl.xlim(-6, 4*np.pi-6)
+    pl.xlim(-6, 4 * np.pi - 6)
     pl.ylim(-5, 5)
     pl.title(title)
 
 pl.show()
-
diff --git a/examples/plot_multilabel.py b/examples/plot_multilabel.py
index 63ca04c8e9..66f1c2f128 100644
--- a/examples/plot_multilabel.py
+++ b/examples/plot_multilabel.py
@@ -15,8 +15,8 @@ dataset is generated randomly based on the following process:
 
 In the above process, rejection sampling is used to make sure that n is more
 than 2, and that the document length is never zero. Likewise, we reject classes
-which have already been chosen.  The documents that are assigned to both classes
-are plotted surrounded by two colored circles.
+which have already been chosen.  The documents that are assigned to both
+classes are plotted surrounded by two colored circles.
 
 The classification is performed by projecting to the first two principal
 components found by PCA and CCA for visualisation purposes, followed by using
-- 
GitLab