diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index ab472c206221fcb8da83af6cc1a34634ee392586..61b17a5a12ab290ba16c8a80ccb383d9317b5e9b 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -381,7 +381,7 @@ Signal Decomposition decomposition.SparsePCA decomposition.MiniBatchSparsePCA decomposition.DictionaryLearning - decomposition.DictionaryLearningOnline + decomposition.MiniBatchDictionaryLearning .. autosummary:: :toctree: generated/ diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst index 059f09cd45d79583280cb43a43d4fad602b55698..b94d3022acc4a6cbe270863c405dd3a26192a618 100644 --- a/doc/modules/decomposition.rst +++ b/doc/modules/decomposition.rst @@ -430,16 +430,16 @@ extracted from part of the image of Lena looks like. <http://www.di.ens.fr/sierra/pdfs/icml09.pdf>`_ J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009 -.. _DictionaryLearningOnline +.. _MiniBatchDictionaryLearning -Online dictionary learning +Mini-batch dictionary learning -------------------------- -:class:`DictionaryLearningOnline` implements a faster, but less accurate +:class:`MiniBatchDictionaryLearning` implements a faster, but less accurate version of the dictionary learning algorithm that is better suited for large datasets. -By default, :class:`DictionaryLearningOnline` divides the data into +By default, :class:`MiniBatchDictionaryLearning` divides the data into mini-batches and optimizes in an online manner by cycling over the mini-batches for the specified number of iterations. However, at the moment it does not implement a stopping condition. @@ -448,4 +448,3 @@ The estimator also implements `partial_fit`, which updates the dictionary by iterating only once over a mini-batch. This can be used for online learning when the data is not readily available from the start, or for when the data does not fit into the memory. - diff --git a/examples/decomposition/plot_faces_decomposition.py b/examples/decomposition/plot_faces_decomposition.py index 1cd41b9e9fc9a6180b9c2177913a5fdd154d0495..170eeb4e2c31da87363a222e1a00753780880e8b 100644 --- a/examples/decomposition/plot_faces_decomposition.py +++ b/examples/decomposition/plot_faces_decomposition.py @@ -82,8 +82,8 @@ estimators = [ n_iter=100, chunk_size=3), True, False), - ('Dictionary atoms - DictionaryLearningOnline', - decomposition.DictionaryLearningOnline(n_atoms=n_components, alpha=1e-3, + ('Dict. atoms - MiniBatchDictionaryLearning', + decomposition.MiniBatchDictionaryLearning(n_atoms=n_components, alpha=1e-3, n_iter=100, chunk_size=3), True, False), diff --git a/examples/decomposition/plot_img_denoising.py b/examples/decomposition/plot_img_denoising.py index ceddb85a9bc274d03ac8198fcbbf7f9a8401a600..bc1dede7b37dd68e8bd8eda91907ac6ed94f818f 100644 --- a/examples/decomposition/plot_img_denoising.py +++ b/examples/decomposition/plot_img_denoising.py @@ -35,7 +35,7 @@ import pylab as pl import scipy as sp import numpy as np -from sklearn.decomposition import DictionaryLearningOnline +from sklearn.decomposition import MiniBatchDictionaryLearning from sklearn.feature_extraction.image import extract_patches_2d from sklearn.feature_extraction.image import reconstruct_from_patches_2d @@ -69,7 +69,7 @@ print 'done in %.2fs.' % (time() - t0) print 'Learning the dictionary... ' t0 = time() -dico = DictionaryLearningOnline(n_atoms=100, alpha=1e-2, n_iter=500) +dico = MiniBatchDictionaryLearning(n_atoms=100, alpha=1e-2, n_iter=500) V = dico.fit(data).components_ dt = time() - t0 print 'done in %.2fs.' % dt diff --git a/sklearn/decomposition/__init__.py b/sklearn/decomposition/__init__.py index fbc9e2f33ad1d13bacfe9561452b41417b2703e8..748832041db71068999a6b3374e6bdb18593c401 100644 --- a/sklearn/decomposition/__init__.py +++ b/sklearn/decomposition/__init__.py @@ -8,4 +8,4 @@ from .kernel_pca import KernelPCA from .sparse_pca import SparsePCA, MiniBatchSparsePCA from .fastica_ import FastICA, fastica from .dict_learning import dict_learning, dict_learning_online, \ - DictionaryLearning, DictionaryLearningOnline + DictionaryLearning, MiniBatchDictionaryLearning diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py index 03365cde25078b4e587c0accabd0f456d0921a0e..b211c122e79c012ea6c0e13bfbc69e85509b1cd8 100644 --- a/sklearn/decomposition/dict_learning.py +++ b/sklearn/decomposition/dict_learning.py @@ -828,8 +828,8 @@ class DictionaryLearning(BaseDictionaryLearning): return self -class DictionaryLearningOnline(BaseDictionaryLearning): - """ Online dictionary learning +class MiniBatchDictionaryLearning(BaseDictionaryLearning): + """Mini-batch dictionary learning Finds a dictionary (a set of atoms) that can best be used to represent data using a sparse code. diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py index fcde71495e44ed6a8f2c52b31f873e7591255b59..0f6a9ae116c1cc0fe44c5ec87bd7c7f20e6b0146 100644 --- a/sklearn/decomposition/tests/test_dict_learning.py +++ b/sklearn/decomposition/tests/test_dict_learning.py @@ -2,7 +2,7 @@ import numpy as np from numpy.testing import assert_array_almost_equal, assert_array_equal, \ assert_equal -from .. import DictionaryLearning, DictionaryLearningOnline, \ +from .. import DictionaryLearning, MiniBatchDictionaryLearning, \ dict_learning_online from ..dict_learning import sparse_encode, sparse_encode_parallel @@ -74,20 +74,20 @@ def test_dict_learning_online_shapes(): def test_dict_learning_online_estimator_shapes(): n_atoms = 5 - dico = DictionaryLearningOnline(n_atoms, n_iter=20).fit(X) + dico = MiniBatchDictionaryLearning(n_atoms, n_iter=20).fit(X) assert dico.components_.shape == (n_atoms, n_features) def test_dict_learning_online_overcomplete(): n_atoms = 12 - dico = DictionaryLearningOnline(n_atoms, n_iter=20).fit(X) + dico = MiniBatchDictionaryLearning(n_atoms, n_iter=20).fit(X) assert dico.components_.shape == (n_atoms, n_features) def test_dict_learning_online_initialization(): n_atoms = 12 V = rng.randn(n_atoms, n_features) - dico = DictionaryLearningOnline(n_atoms, n_iter=0, dict_init=V).fit(X) + dico = MiniBatchDictionaryLearning(n_atoms, n_iter=0, dict_init=V).fit(X) assert_array_equal(dico.components_, V) @@ -96,13 +96,13 @@ def test_dict_learning_online_partial_fit(): V = rng.randn(n_atoms, n_features) # random init rng1 = np.random.RandomState(0) rng2 = np.random.RandomState(0) - dico1 = DictionaryLearningOnline(n_atoms, n_iter=10, chunk_size=1, - shuffle=False, dict_init=V, - transform_algorithm='threshold', - random_state=rng1).fit(X) - dico2 = DictionaryLearningOnline(n_atoms, n_iter=1, dict_init=V, - transform_algorithm='threshold', - random_state=rng2) + dico1 = MiniBatchDictionaryLearning(n_atoms, n_iter=10, chunk_size=1, + shuffle=False, dict_init=V, + transform_algorithm='threshold', + random_state=rng1).fit(X) + dico2 = MiniBatchDictionaryLearning(n_atoms, n_iter=1, dict_init=V, + transform_algorithm='threshold', + random_state=rng2) for ii, sample in enumerate(X): dico2.partial_fit(sample, iter_offset=ii * dico2.n_iter)