diff --git a/scikits/learn/datasets/base.py b/scikits/learn/datasets/base.py
index 4dccd0c73ceff5b3bfd29bd6911094b180b360b8..290dc8dd2a6feb1d2c006322478fad16e6da96bf 100644
--- a/scikits/learn/datasets/base.py
+++ b/scikits/learn/datasets/base.py
@@ -40,11 +40,14 @@ def load_iris():
     Let's say you are interested in the samples 10, 25, and 50, and want to
     know their class name.
 
+    >>> from scikits.learn.datasets import load_iris
     >>> data = load_iris()
-    >>> print data.target[[10, 25, 50]]
-    [0 0 1]
-    >>> print data.target_names
-    ['setosa' 'versicolor' 'virginica']
+    >>> data.target[[10, 25, 50]]
+    array([0, 0, 1])
+    >>> data.target_names
+    array(['setosa', 'versicolor', 'virginica'],
+          dtype='|S10')
+
     """
     
     data_file = csv.reader(open(os.path.dirname(__file__) 
diff --git a/scikits/learn/glm/ridge.py b/scikits/learn/glm/ridge.py
index 80127b08be7f79b16a6911c3ff02f675ddf6b3e3..6836759b791f017d6327b8062c972674109276cc 100644
--- a/scikits/learn/glm/ridge.py
+++ b/scikits/learn/glm/ridge.py
@@ -1,3 +1,7 @@
+"""
+Ridge regression
+"""
+
 import numpy as np
 from scipy import linalg
 
@@ -19,6 +23,7 @@ class Ridge(LinearModel):
 
     Examples
     --------
+    >>> from scikits.learn.glm import Ridge
     >>> import numpy as np
     >>> n_samples, n_features = 10, 5
     >>> np.random.seed(0)
diff --git a/scikits/learn/gmm.py b/scikits/learn/gmm.py
index 12adc5118adffa37e6e42501db4bed348175fb45..9011387b2cc7968f85c31cb8ddb0408fd370c7d1 100644
--- a/scikits/learn/gmm.py
+++ b/scikits/learn/gmm.py
@@ -1,6 +1,7 @@
-#
-# Gaussian Mixture Models
-#
+"""
+Gaussian Mixture Models
+"""
+
 # Author: Ron Weiss <ronweiss@gmail.com>
 #         Fabian Pedregosa <fabian.pedregosa@inria.fr>
 #
@@ -184,43 +185,56 @@ class GMM(BaseEstimator):
     Examples
     --------
     >>> import numpy as np
+    >>> from scikits.learn.gmm import GMM
     >>> g = GMM(n_states=2, n_dim=1)
     >>> # The initial parameters are fixed.
-    >>> print np.round(g.weights, 2)
-    [ 0.5  0.5]
-    >>> print np.round(g.means, 2)
-    [[ 0.]
-     [ 0.]]
-    >>> print np.round(g.covars, 2)
-    [[[ 1.]]
+    >>> np.round(g.weights, 2)
+    array([ 0.5,  0.5])
+    >>> np.round(g.means, 2)
+    array([[ 0.],
+           [ 0.]])
+    >>> np.round(g.covars, 2)
+    array([[[ 1.]],
     <BLANKLINE>
-     [[ 1.]]]
+            [[ 1.]]])
 
     >>> # Generate random observations with two modes centered on 0
     >>> # and 10 to use for training.
     >>> np.random.seed(0)
     >>> obs = np.concatenate((np.random.randn(100, 1),
     ...                       10 + np.random.randn(300, 1)))
-    >>> _ = g.fit(obs)
-    >>> print np.round(g.weights, 2)
-    [ 0.75  0.25]
-    >>> print np.round(g.means, 2)
-    [[ 9.94]
-     [ 0.06]]
-    >>> print np.round(g.covars, 2)
-    [[[ 0.96]]
+    >>> g.fit(obs)
+    GMM(n_dim=1, cvtype='diag',
+        means=array([[ 9.94199],
+            [ 0.05981]]),
+        covars=[array([[ 0.96081]]), array([[ 1.01683]])], n_states=2,
+        weights=array([ 0.75,  0.25]))
+
+    >>> np.round(g.weights, 2)
+    array([ 0.75,  0.25])
+    >>> np.round(g.means, 2)
+    array([[ 9.94],
+           [ 0.06]])
+    >>> np.round(g.covars, 2)
+    array([[[ 0.96]],
     <BLANKLINE>
-     [[ 1.02]]]
-    >>> print g.predict([[0], [2], [9], [10]])
-    [1 1 0 0]
-    >>> print np.round(g.score([[0], [2], [9], [10]]), 2)
-    [-2.32 -4.16 -1.65 -1.19]
+           [[ 1.02]]])
+    >>> g.predict([[0], [2], [9], [10]])
+    array([1, 1, 0, 0])
+    >>> np.round(g.score([[0], [2], [9], [10]]), 2)
+    array([-2.32, -4.16, -1.65, -1.19])
 
     >>> # Refit the model on new data (initial parameters remain the
     >>> #same), this time with an even split between the two modes.
-    >>> _ = g.fit(20 * [[0]] +  20 * [[10]])
-    >>> print np.round(g.weights, 2)
-    [ 0.5  0.5]
+    >>> g.fit(20 * [[0]] +  20 * [[10]])
+    GMM(n_dim=1, cvtype='diag',
+        means=array([[ 10.],
+            [  0.]]),
+        covars=[array([[ 0.001]]), array([[ 0.001]])], n_states=2,
+        weights=array([ 0.5,  0.5]))
+
+    >>> np.round(g.weights, 2)
+    array([ 0.5,  0.5])
     """
 
     def __init__(self, n_states=1, n_dim=1, cvtype='diag', weights=None,
diff --git a/scikits/learn/grid_search.py b/scikits/learn/grid_search.py
index 6032ae6047c348d442499861dffc97b1197d5047..bed1bc5893c084c4355cbcd6fa5ae92ec5161fc2 100644
--- a/scikits/learn/grid_search.py
+++ b/scikits/learn/grid_search.py
@@ -41,9 +41,11 @@ def iter_grid(param_grid):
 
         Examples
         ---------
+        >>> from scikits.learn.grid_search import iter_grid
         >>> param_grid = {'a':[1, 2], 'b':[True, False]}
         >>> list(iter_grid(param_grid))
         [{'a': 1, 'b': True}, {'a': 1, 'b': False}, {'a': 2, 'b': True}, {'a': 2, 'b': False}]
+
     """
     if hasattr(param_grid, 'has_key'):
         param_grid = [param_grid]
@@ -79,6 +81,7 @@ def fit_grid_point(X, y, base_clf, clf_params, cv, loss_func, iid,
     return clf, score
 
 
+################################################################################
 class GridSearchCV(object):
     """
     Grid search on the parameters of a classifier.
@@ -126,13 +129,14 @@ class GridSearchCV(object):
     >>> import numpy as np
     >>> from scikits.learn.cross_val import LeaveOneOut
     >>> from scikits.learn.svm import SVR
+    >>> from scikits.learn.grid_search import GridSearchCV
     >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
     >>> y = np.array([1, 1, 2, 2])
     >>> parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
     >>> svr = SVR()
     >>> clf = GridSearchCV(svr, parameters, n_jobs=1)
-    >>> print clf.fit(X, y).predict([[-0.8, -1]])
-    [ 1.]
+    >>> clf.fit(X, y).predict([[-0.8, -1]])
+    array([ 1.])
     """
 
     def __init__(self, estimator, param_grid, loss_func=None,
diff --git a/scikits/learn/hmm.py b/scikits/learn/hmm.py
index fb26abb714a2d97cd9548b35dc4de38c0f7714ae..ca5e60b03f57e01f1f375dc3b458a3909edaef1a 100644
--- a/scikits/learn/hmm.py
+++ b/scikits/learn/hmm.py
@@ -569,7 +569,18 @@ class GaussianHMM(_BaseHMM):
 
     Examples
     --------
-    >>> ghmm = GaussianHMM(n_states=2, n_dim=1)
+    >>> from scikits.learn.hmm import GaussianHMM
+    >>> GaussianHMM(n_states=2, n_dim=1)
+    GaussianHMM(n_dim=1, cvtype='diag', n_states=2, means_weight=0,
+          means=array([[ 0.],
+           [ 0.]]),
+          covars=array([[ 1.],
+           [ 1.]]), labels=[None, None],
+          startprob=array([ 0.5,  0.5]),
+          transmat=array([[ 0.5,  0.5],
+           [ 0.5,  0.5]]),
+          startprob_prior=1.0, transmat_prior=1.0, means_prior=None,
+          covars_weight=1, covars_prior=0.01)
 
     See Also
     --------
@@ -808,8 +819,17 @@ class MultinomialHMM(_BaseHMM):
 
     Examples
     --------
-    >>> mhmm = MultinomialHMM(n_states=2, nsymbols=3)
-
+    >>> from scikits.learn.hmm import MultinomialHMM
+    >>> MultinomialHMM(n_states=2, nsymbols=3)
+    MultinomialHMM(n_states=2,
+                emissionprob=array([[ 0.3663 ,  0.12783,  0.50587],
+               [ 0.35851,  0.21559,  0.42589]]),
+                labels=[None, None], startprob_prior=1.0,
+                startprob=array([ 0.5,  0.5]),
+                transmat=array([[ 0.5,  0.5],
+               [ 0.5,  0.5]]), nsymbols=3,
+                transmat_prior=1.0)
+    
     See Also
     --------
     GaussianHMM : HMM with Gaussian emissions
@@ -931,7 +951,9 @@ class GMMHMM(_BaseHMM):
 
     Examples
     --------
-    >>> hmm = GMMHMM(n_states=2, n_mix=10, n_dim=3)
+    >>> from scikits.learn.hmm import GMMHMM
+    >>> GMMHMM(n_states=2, n_mix=10, n_dim=3) # doctest: +ELLIPSIS
+    GMMHMM(n_dim=3, n_mix=10, n_states=2, cvtype=None, labels=[None, None], ...)
 
     See Also
     --------
@@ -957,6 +979,10 @@ class GMMHMM(_BaseHMM):
 
         self._n_dim = n_dim
 
+        # XXX: Hotfit for n_mix that is incompatible with the scikit's
+        # BaseEstimator API
+        self.n_mix = n_mix
+        self.cvtype= cvtype
         if gmms is None:
             gmms = []
             for x in xrange(self.n_states):
diff --git a/scikits/learn/naive_bayes.py b/scikits/learn/naive_bayes.py
index 50d0a595b64d6c1a317a5693502fa64fc35f537a..a4635827f87addbb43ef360e0174f8281440920c 100644
--- a/scikits/learn/naive_bayes.py
+++ b/scikits/learn/naive_bayes.py
@@ -1,3 +1,6 @@
+""" Naives Bayes classifiers.
+"""
+
 # Author: Vincent Michel <vincent.michel@inria.fr>
 # License: BSD Style.
 import numpy as np
@@ -39,8 +42,10 @@ class GNB(BaseEstimator, ClassifierMixin):
 
     Examples
     --------
+    >>> import numpy as np
     >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
     >>> Y = np.array([1, 1, 1, 2, 2, 2])
+    >>> from scikits.learn.naive_bayes import GNB
     >>> clf = GNB()
     >>> clf.fit(X, Y)
     GNB()
diff --git a/scikits/learn/neighbors.py b/scikits/learn/neighbors.py
index 4fc9b451e1ab72ab4bc3803fb34caa1546d64ea1..06385b50ff324fab34e2aea5e2d86bf88f3097bd 100644
--- a/scikits/learn/neighbors.py
+++ b/scikits/learn/neighbors.py
@@ -31,6 +31,7 @@ class Neighbors(BaseEstimator, ClassifierMixin):
   --------
   >>> samples = [[0.,0.,1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
   >>> labels = [0,0,1,1]
+  >>> from scikits.learn.neighbors import Neighbors
   >>> neigh = Neighbors(k=3)
   >>> neigh.fit(samples, labels)
   Neighbors(k=3, window_size=1)
@@ -78,9 +79,9 @@ class Neighbors(BaseEstimator, ClassifierMixin):
     array representing our data set and ask who's the closest point to
     [1,1,1]
 
-    >>> import numpy as np
     >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]
     >>> labels = [0, 0, 1]
+    >>> from scikits.learn.neighbors import Neighbors
     >>> neigh = Neighbors(k=1)
     >>> neigh.fit(samples, labels)
     Neighbors(k=1, window_size=1)
@@ -119,9 +120,9 @@ class Neighbors(BaseEstimator, ClassifierMixin):
 
     Examples
     --------
-    >>> import numpy as np
     >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]
     >>> labels = [0, 0, 1]
+    >>> from scikits.learn.neighbors import Neighbors
     >>> neigh = Neighbors(k=1)
     >>> neigh.fit(samples, labels)
     Neighbors(k=1, window_size=1)
diff --git a/scikits/learn/pca.py b/scikits/learn/pca.py
index f111962da734731e380a98adf39300b26b264a5f..c7428bb0e6b63867ecfbe924ff3ce2db2562419a 100644
--- a/scikits/learn/pca.py
+++ b/scikits/learn/pca.py
@@ -113,7 +113,9 @@ class PCA(BaseEstimator):
 
     Examples
     --------
+    >>> import numpy as np
     >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
+    >>> from scikits.learn.pca import PCA
     >>> pca = PCA(n_comp=2)
     >>> pca.fit(X)
     PCA(n_comp=2, copy=True)
diff --git a/scikits/learn/pipeline.py b/scikits/learn/pipeline.py
index c611adc2708988bb2ab8c845de9300397a8445e9..ea4ce2ea72a026cc96366868bd1c4c31dff9f0f7 100644
--- a/scikits/learn/pipeline.py
+++ b/scikits/learn/pipeline.py
@@ -62,7 +62,9 @@ class Pipeline(BaseEstimator):
         >>> # You can set the parameters using the names issued
         >>> # For instance, fit using a k of 10 in the SelectKBest
         >>> # and a parameter 'C' of the svn
-        >>> _ = anova_svm.fit(X, y, anova__k=10, svc__C=.1)
+        >>> anova_svm.fit(X, y, anova__k=10, svc__C=.1) #doctest: +ELLIPSIS
+        Pipeline(steps=[('anova', SelectKBest(k=10, score_func=<function f_regression at ...>)), ('svc', SVC(kernel='linear', C=0.1, probability=False, degree=3, coef0=0.0, eps=0.001,
+        cache_size=100.0, shrinking=True, gamma=0.01))])
 
         >>> prediction = anova_svm.predict(X)
         >>> score = anova_svm.score(X)
diff --git a/scikits/learn/svm.py b/scikits/learn/svm.py
index 32b16b9b33ffa26d6b698fb3a87732a1af600a18..a8226b7fa1887345aa58b1238a5bca46aabf5850 100644
--- a/scikits/learn/svm.py
+++ b/scikits/learn/svm.py
@@ -416,8 +416,10 @@ class SVC(_BaseLibSVM, ClassifierMixin):
 
     Examples
     --------
+    >>> import numpy as np
     >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
     >>> Y = np.array([1, 1, 2, 2])
+    >>> from scikits.learn.svm import SVC
     >>> clf = SVC()
     >>> clf.fit(X, Y)
     SVC(kernel='rbf', C=1.0, probability=False, degree=3, coef0=0.0, eps=0.001,
@@ -516,8 +518,10 @@ class NuSVC(_BaseLibSVM, ClassifierMixin):
 
     Examples
     --------
+    >>> import numpy as np
     >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
     >>> Y = np.array([1, 1, 2, 2])
+    >>> from scikits.learn.svm import NuSVC
     >>> clf = NuSVC()
     >>> clf.fit(X, Y)
     NuSVC(kernel='rbf', probability=False, degree=3, coef0=0.0, eps=0.001,
diff --git a/scikits/learn/utils/graph.py b/scikits/learn/utils/graph.py
index a25f0cc4512fea699751af16d20e9b727111311c..96dae729d58fcf8a3d373f00c9cb434e17626d54 100644
--- a/scikits/learn/utils/graph.py
+++ b/scikits/learn/utils/graph.py
@@ -33,6 +33,7 @@ def single_source_shortest_path_length(graph, source, cutoff=None):
 
     Examples
     --------
+    >>> from scikits.learn.utils.graph import single_source_shortest_path_length
     >>> import numpy as np
     >>> graph = np.array([[ 0, 1, 0, 0],
     ...                   [ 1, 0, 1, 0],
@@ -127,6 +128,8 @@ def _graph_laplacian_dense(graph, normed=False, return_diag=False):
     
 
 def graph_laplacian(graph, normed=False, return_diag=False):
+    """ Return the Laplacian of the given graph.
+    """
     if normed and (np.issubdtype(graph.dtype, np.int)
                     or np.issubdtype(graph.dtype, np.uint)):
         graph = graph.astype(np.float)