diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index ab472c206221fcb8da83af6cc1a34634ee392586..61b17a5a12ab290ba16c8a80ccb383d9317b5e9b 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -381,7 +381,7 @@ Signal Decomposition
    decomposition.SparsePCA
    decomposition.MiniBatchSparsePCA
    decomposition.DictionaryLearning
-   decomposition.DictionaryLearningOnline
+   decomposition.MiniBatchDictionaryLearning
 
 .. autosummary::
    :toctree: generated/
diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst
index 059f09cd45d79583280cb43a43d4fad602b55698..b94d3022acc4a6cbe270863c405dd3a26192a618 100644
--- a/doc/modules/decomposition.rst
+++ b/doc/modules/decomposition.rst
@@ -430,16 +430,16 @@ extracted from part of the image of Lena looks like.
     <http://www.di.ens.fr/sierra/pdfs/icml09.pdf>`_
     J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009
 
-.. _DictionaryLearningOnline
+.. _MiniBatchDictionaryLearning
 
-Online dictionary learning
+Mini-batch dictionary learning
 --------------------------
 
-:class:`DictionaryLearningOnline` implements a faster, but less accurate
+:class:`MiniBatchDictionaryLearning` implements a faster, but less accurate
 version of the dictionary learning algorithm that is better suited for large
 datasets. 
 
-By default, :class:`DictionaryLearningOnline` divides the data into
+By default, :class:`MiniBatchDictionaryLearning` divides the data into
 mini-batches and optimizes in an online manner by cycling over the mini-batches
 for the specified number of iterations. However, at the moment it does not
 implement a stopping condition.
@@ -448,4 +448,3 @@ The estimator also implements `partial_fit`, which updates the dictionary by
 iterating only once over a mini-batch. This can be used for online learning
 when the data is not readily available from the start, or for when the data
 does not fit into the memory.
-
diff --git a/examples/decomposition/plot_faces_decomposition.py b/examples/decomposition/plot_faces_decomposition.py
index 1cd41b9e9fc9a6180b9c2177913a5fdd154d0495..170eeb4e2c31da87363a222e1a00753780880e8b 100644
--- a/examples/decomposition/plot_faces_decomposition.py
+++ b/examples/decomposition/plot_faces_decomposition.py
@@ -82,8 +82,8 @@ estimators = [
                                       n_iter=100, chunk_size=3),
      True, False),
 
-    ('Dictionary atoms - DictionaryLearningOnline',
-    decomposition.DictionaryLearningOnline(n_atoms=n_components, alpha=1e-3,
+    ('Dict. atoms - MiniBatchDictionaryLearning',
+    decomposition.MiniBatchDictionaryLearning(n_atoms=n_components, alpha=1e-3,
                                            n_iter=100, chunk_size=3),
      True, False),
 
diff --git a/examples/decomposition/plot_img_denoising.py b/examples/decomposition/plot_img_denoising.py
index ceddb85a9bc274d03ac8198fcbbf7f9a8401a600..bc1dede7b37dd68e8bd8eda91907ac6ed94f818f 100644
--- a/examples/decomposition/plot_img_denoising.py
+++ b/examples/decomposition/plot_img_denoising.py
@@ -35,7 +35,7 @@ import pylab as pl
 import scipy as sp
 import numpy as np
 
-from sklearn.decomposition import DictionaryLearningOnline
+from sklearn.decomposition import MiniBatchDictionaryLearning
 from sklearn.feature_extraction.image import extract_patches_2d
 from sklearn.feature_extraction.image import reconstruct_from_patches_2d
 
@@ -69,7 +69,7 @@ print 'done in %.2fs.' % (time() - t0)
 
 print 'Learning the dictionary... '
 t0 = time()
-dico = DictionaryLearningOnline(n_atoms=100, alpha=1e-2, n_iter=500)
+dico = MiniBatchDictionaryLearning(n_atoms=100, alpha=1e-2, n_iter=500)
 V = dico.fit(data).components_
 dt = time() - t0
 print 'done in %.2fs.' % dt
diff --git a/sklearn/decomposition/__init__.py b/sklearn/decomposition/__init__.py
index fbc9e2f33ad1d13bacfe9561452b41417b2703e8..748832041db71068999a6b3374e6bdb18593c401 100644
--- a/sklearn/decomposition/__init__.py
+++ b/sklearn/decomposition/__init__.py
@@ -8,4 +8,4 @@ from .kernel_pca import KernelPCA
 from .sparse_pca import SparsePCA, MiniBatchSparsePCA
 from .fastica_ import FastICA, fastica
 from .dict_learning import dict_learning, dict_learning_online, \
-                           DictionaryLearning, DictionaryLearningOnline
+                           DictionaryLearning, MiniBatchDictionaryLearning
diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py
index 03365cde25078b4e587c0accabd0f456d0921a0e..b211c122e79c012ea6c0e13bfbc69e85509b1cd8 100644
--- a/sklearn/decomposition/dict_learning.py
+++ b/sklearn/decomposition/dict_learning.py
@@ -828,8 +828,8 @@ class DictionaryLearning(BaseDictionaryLearning):
         return self
 
 
-class DictionaryLearningOnline(BaseDictionaryLearning):
-    """ Online dictionary learning
+class MiniBatchDictionaryLearning(BaseDictionaryLearning):
+    """Mini-batch dictionary learning
 
     Finds a dictionary (a set of atoms) that can best be used to represent data
     using a sparse code.
diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
index fcde71495e44ed6a8f2c52b31f873e7591255b59..0f6a9ae116c1cc0fe44c5ec87bd7c7f20e6b0146 100644
--- a/sklearn/decomposition/tests/test_dict_learning.py
+++ b/sklearn/decomposition/tests/test_dict_learning.py
@@ -2,7 +2,7 @@ import numpy as np
 from numpy.testing import assert_array_almost_equal, assert_array_equal, \
                           assert_equal
 
-from .. import DictionaryLearning, DictionaryLearningOnline, \
+from .. import DictionaryLearning, MiniBatchDictionaryLearning, \
                dict_learning_online
 from ..dict_learning import sparse_encode, sparse_encode_parallel
 
@@ -74,20 +74,20 @@ def test_dict_learning_online_shapes():
 
 def test_dict_learning_online_estimator_shapes():
     n_atoms = 5
-    dico = DictionaryLearningOnline(n_atoms, n_iter=20).fit(X)
+    dico = MiniBatchDictionaryLearning(n_atoms, n_iter=20).fit(X)
     assert dico.components_.shape == (n_atoms, n_features)
 
 
 def test_dict_learning_online_overcomplete():
     n_atoms = 12
-    dico = DictionaryLearningOnline(n_atoms, n_iter=20).fit(X)
+    dico = MiniBatchDictionaryLearning(n_atoms, n_iter=20).fit(X)
     assert dico.components_.shape == (n_atoms, n_features)
 
 
 def test_dict_learning_online_initialization():
     n_atoms = 12
     V = rng.randn(n_atoms, n_features)
-    dico = DictionaryLearningOnline(n_atoms, n_iter=0, dict_init=V).fit(X)
+    dico = MiniBatchDictionaryLearning(n_atoms, n_iter=0, dict_init=V).fit(X)
     assert_array_equal(dico.components_, V)
 
 
@@ -96,13 +96,13 @@ def test_dict_learning_online_partial_fit():
     V = rng.randn(n_atoms, n_features)  # random init
     rng1 = np.random.RandomState(0)
     rng2 = np.random.RandomState(0)
-    dico1 = DictionaryLearningOnline(n_atoms, n_iter=10, chunk_size=1,
-                                     shuffle=False, dict_init=V,
-                                     transform_algorithm='threshold',
-                                     random_state=rng1).fit(X)
-    dico2 = DictionaryLearningOnline(n_atoms, n_iter=1, dict_init=V,
-                                     transform_algorithm='threshold',
-                                     random_state=rng2)
+    dico1 = MiniBatchDictionaryLearning(n_atoms, n_iter=10, chunk_size=1,
+                                        shuffle=False, dict_init=V,
+                                        transform_algorithm='threshold',
+                                        random_state=rng1).fit(X)
+    dico2 = MiniBatchDictionaryLearning(n_atoms, n_iter=1, dict_init=V,
+                                        transform_algorithm='threshold',
+                                        random_state=rng2)
     for ii, sample in enumerate(X):
         dico2.partial_fit(sample, iter_offset=ii * dico2.n_iter)