diff --git a/examples/decomposition/plot_sparse_coding.py b/examples/decomposition/plot_sparse_coding.py
index efbd411adc877dcb4e1421a673a8edd7fd9f3b38..e0e0fe1a73624a002b2dc26c19416c37cca42760 100644
--- a/examples/decomposition/plot_sparse_coding.py
+++ b/examples/decomposition/plot_sparse_coding.py
@@ -6,7 +6,7 @@ Sparse coding with a precomputed dictionary
 Transform a signal as a sparse combination of Ricker wavelets. This example
 visually compares different sparse coding methods using the
 :class:`sklearn.decomposition.SparseCoder` estimator. The Ricker (also known
-as mexican hat or the second derivative of a Gaussian) is not a particularly
+as Mexican hat or the second derivative of a Gaussian) is not a particularly
 good kernel to represent piecewise constant signals like this one. It can
 therefore be seen how much adding different widths of atoms matters and it
 therefore motivates learning the dictionary to best fit your type of signals.
@@ -23,7 +23,7 @@ from sklearn.decomposition import SparseCoder
 
 
 def ricker_function(resolution, center, width):
-    """Discrete sub-sampled Ricker (mexican hat) wavelet"""
+    """Discrete sub-sampled Ricker (Mexican hat) wavelet"""
     x = np.linspace(0, resolution - 1, resolution)
     x = ((2 / ((np.sqrt(3 * width) * np.pi ** 1 / 4)))
          * (1 - ((x - center) ** 2 / width ** 2))
@@ -32,7 +32,7 @@ def ricker_function(resolution, center, width):
 
 
 def ricker_matrix(width, resolution, n_components):
-    """Dictionary of Ricker (mexican hat) wavelets"""
+    """Dictionary of Ricker (Mexican hat) wavelets"""
     centers = np.linspace(0, resolution - 1, n_components)
     D = np.empty((n_components, resolution))
     for i, center in enumerate(centers):
diff --git a/examples/mixture/plot_gmm.py b/examples/mixture/plot_gmm.py
index 65f21584cccd25886cb85d8be87e0f4ed5f3f578..1ce272706778ceb3696a95eabd022fcf0d6c8cb3 100644
--- a/examples/mixture/plot_gmm.py
+++ b/examples/mixture/plot_gmm.py
@@ -4,7 +4,7 @@ Gaussian Mixture Model Ellipsoids
 =================================
 
 Plot the confidence ellipsoids of a mixture of two Gaussians with EM
-and variational dirichlet process.
+and variational Dirichlet process.
 
 Both models have access to five components with which to fit the
 data. Note that the EM model will necessarily use all five components
@@ -15,7 +15,7 @@ is trying to fit too many components, while the Dirichlet Process model
 adapts it number of state automatically.
 
 This example doesn't show it, as we're in a low-dimensional space, but
-another advantage of the dirichlet process model is that it can fit
+another advantage of the Dirichlet process model is that it can fit
 full covariance matrices effectively even when there are less examples
 per cluster than there are dimensions in the data, due to
 regularization properties of the inference algorithm.
@@ -42,7 +42,7 @@ X = np.r_[np.dot(np.random.randn(n_samples, 2), C),
 gmm = mixture.GMM(n_components=5, covariance_type='full')
 gmm.fit(X)
 
-# Fit a dirichlet process mixture of Gaussians using five components
+# Fit a Dirichlet process mixture of Gaussians using five components
 dpgmm = mixture.DPGMM(n_components=5, covariance_type='full')
 dpgmm.fit(X)
 
diff --git a/examples/mixture/plot_gmm_sin.py b/examples/mixture/plot_gmm_sin.py
index fc65d2f17b26778471f9923c93fd85d5f5bfe877..0fe87a3bd71e4b6622bbc5bc6ec83c620ccc632c 100644
--- a/examples/mixture/plot_gmm_sin.py
+++ b/examples/mixture/plot_gmm_sin.py
@@ -8,7 +8,7 @@ complexity control and dealing with sparse data. The dataset is formed
 by 100 points loosely spaced following a noisy sine curve. The fit by
 the GMM class, using the expectation-maximization algorithm to fit a
 mixture of 10 Gaussian components, finds too-small components and very
-little structure. The fits by the dirichlet process, however, show
+little structure. The fits by the Dirichlet process, however, show
 that the model can either learn a global structure for the data (small
 alpha) or easily interpolate to finding relevant local structure
 (large alpha), never falling into the problems shown by the GMM class.