diff --git a/scikits/learn/datasets/base.py b/scikits/learn/datasets/base.py index 4dccd0c73ceff5b3bfd29bd6911094b180b360b8..290dc8dd2a6feb1d2c006322478fad16e6da96bf 100644 --- a/scikits/learn/datasets/base.py +++ b/scikits/learn/datasets/base.py @@ -40,11 +40,14 @@ def load_iris(): Let's say you are interested in the samples 10, 25, and 50, and want to know their class name. + >>> from scikits.learn.datasets import load_iris >>> data = load_iris() - >>> print data.target[[10, 25, 50]] - [0 0 1] - >>> print data.target_names - ['setosa' 'versicolor' 'virginica'] + >>> data.target[[10, 25, 50]] + array([0, 0, 1]) + >>> data.target_names + array(['setosa', 'versicolor', 'virginica'], + dtype='|S10') + """ data_file = csv.reader(open(os.path.dirname(__file__) diff --git a/scikits/learn/glm/ridge.py b/scikits/learn/glm/ridge.py index 80127b08be7f79b16a6911c3ff02f675ddf6b3e3..6836759b791f017d6327b8062c972674109276cc 100644 --- a/scikits/learn/glm/ridge.py +++ b/scikits/learn/glm/ridge.py @@ -1,3 +1,7 @@ +""" +Ridge regression +""" + import numpy as np from scipy import linalg @@ -19,6 +23,7 @@ class Ridge(LinearModel): Examples -------- + >>> from scikits.learn.glm import Ridge >>> import numpy as np >>> n_samples, n_features = 10, 5 >>> np.random.seed(0) diff --git a/scikits/learn/gmm.py b/scikits/learn/gmm.py index 12adc5118adffa37e6e42501db4bed348175fb45..9011387b2cc7968f85c31cb8ddb0408fd370c7d1 100644 --- a/scikits/learn/gmm.py +++ b/scikits/learn/gmm.py @@ -1,6 +1,7 @@ -# -# Gaussian Mixture Models -# +""" +Gaussian Mixture Models +""" + # Author: Ron Weiss <ronweiss@gmail.com> # Fabian Pedregosa <fabian.pedregosa@inria.fr> # @@ -184,43 +185,56 @@ class GMM(BaseEstimator): Examples -------- >>> import numpy as np + >>> from scikits.learn.gmm import GMM >>> g = GMM(n_states=2, n_dim=1) >>> # The initial parameters are fixed. - >>> print np.round(g.weights, 2) - [ 0.5 0.5] - >>> print np.round(g.means, 2) - [[ 0.] - [ 0.]] - >>> print np.round(g.covars, 2) - [[[ 1.]] + >>> np.round(g.weights, 2) + array([ 0.5, 0.5]) + >>> np.round(g.means, 2) + array([[ 0.], + [ 0.]]) + >>> np.round(g.covars, 2) + array([[[ 1.]], <BLANKLINE> - [[ 1.]]] + [[ 1.]]]) >>> # Generate random observations with two modes centered on 0 >>> # and 10 to use for training. >>> np.random.seed(0) >>> obs = np.concatenate((np.random.randn(100, 1), ... 10 + np.random.randn(300, 1))) - >>> _ = g.fit(obs) - >>> print np.round(g.weights, 2) - [ 0.75 0.25] - >>> print np.round(g.means, 2) - [[ 9.94] - [ 0.06]] - >>> print np.round(g.covars, 2) - [[[ 0.96]] + >>> g.fit(obs) + GMM(n_dim=1, cvtype='diag', + means=array([[ 9.94199], + [ 0.05981]]), + covars=[array([[ 0.96081]]), array([[ 1.01683]])], n_states=2, + weights=array([ 0.75, 0.25])) + + >>> np.round(g.weights, 2) + array([ 0.75, 0.25]) + >>> np.round(g.means, 2) + array([[ 9.94], + [ 0.06]]) + >>> np.round(g.covars, 2) + array([[[ 0.96]], <BLANKLINE> - [[ 1.02]]] - >>> print g.predict([[0], [2], [9], [10]]) - [1 1 0 0] - >>> print np.round(g.score([[0], [2], [9], [10]]), 2) - [-2.32 -4.16 -1.65 -1.19] + [[ 1.02]]]) + >>> g.predict([[0], [2], [9], [10]]) + array([1, 1, 0, 0]) + >>> np.round(g.score([[0], [2], [9], [10]]), 2) + array([-2.32, -4.16, -1.65, -1.19]) >>> # Refit the model on new data (initial parameters remain the >>> #same), this time with an even split between the two modes. - >>> _ = g.fit(20 * [[0]] + 20 * [[10]]) - >>> print np.round(g.weights, 2) - [ 0.5 0.5] + >>> g.fit(20 * [[0]] + 20 * [[10]]) + GMM(n_dim=1, cvtype='diag', + means=array([[ 10.], + [ 0.]]), + covars=[array([[ 0.001]]), array([[ 0.001]])], n_states=2, + weights=array([ 0.5, 0.5])) + + >>> np.round(g.weights, 2) + array([ 0.5, 0.5]) """ def __init__(self, n_states=1, n_dim=1, cvtype='diag', weights=None, diff --git a/scikits/learn/grid_search.py b/scikits/learn/grid_search.py index 6032ae6047c348d442499861dffc97b1197d5047..bed1bc5893c084c4355cbcd6fa5ae92ec5161fc2 100644 --- a/scikits/learn/grid_search.py +++ b/scikits/learn/grid_search.py @@ -41,9 +41,11 @@ def iter_grid(param_grid): Examples --------- + >>> from scikits.learn.grid_search import iter_grid >>> param_grid = {'a':[1, 2], 'b':[True, False]} >>> list(iter_grid(param_grid)) [{'a': 1, 'b': True}, {'a': 1, 'b': False}, {'a': 2, 'b': True}, {'a': 2, 'b': False}] + """ if hasattr(param_grid, 'has_key'): param_grid = [param_grid] @@ -79,6 +81,7 @@ def fit_grid_point(X, y, base_clf, clf_params, cv, loss_func, iid, return clf, score +################################################################################ class GridSearchCV(object): """ Grid search on the parameters of a classifier. @@ -126,13 +129,14 @@ class GridSearchCV(object): >>> import numpy as np >>> from scikits.learn.cross_val import LeaveOneOut >>> from scikits.learn.svm import SVR + >>> from scikits.learn.grid_search import GridSearchCV >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]]) >>> y = np.array([1, 1, 2, 2]) >>> parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]} >>> svr = SVR() >>> clf = GridSearchCV(svr, parameters, n_jobs=1) - >>> print clf.fit(X, y).predict([[-0.8, -1]]) - [ 1.] + >>> clf.fit(X, y).predict([[-0.8, -1]]) + array([ 1.]) """ def __init__(self, estimator, param_grid, loss_func=None, diff --git a/scikits/learn/hmm.py b/scikits/learn/hmm.py index fb26abb714a2d97cd9548b35dc4de38c0f7714ae..ca5e60b03f57e01f1f375dc3b458a3909edaef1a 100644 --- a/scikits/learn/hmm.py +++ b/scikits/learn/hmm.py @@ -569,7 +569,18 @@ class GaussianHMM(_BaseHMM): Examples -------- - >>> ghmm = GaussianHMM(n_states=2, n_dim=1) + >>> from scikits.learn.hmm import GaussianHMM + >>> GaussianHMM(n_states=2, n_dim=1) + GaussianHMM(n_dim=1, cvtype='diag', n_states=2, means_weight=0, + means=array([[ 0.], + [ 0.]]), + covars=array([[ 1.], + [ 1.]]), labels=[None, None], + startprob=array([ 0.5, 0.5]), + transmat=array([[ 0.5, 0.5], + [ 0.5, 0.5]]), + startprob_prior=1.0, transmat_prior=1.0, means_prior=None, + covars_weight=1, covars_prior=0.01) See Also -------- @@ -808,8 +819,17 @@ class MultinomialHMM(_BaseHMM): Examples -------- - >>> mhmm = MultinomialHMM(n_states=2, nsymbols=3) - + >>> from scikits.learn.hmm import MultinomialHMM + >>> MultinomialHMM(n_states=2, nsymbols=3) + MultinomialHMM(n_states=2, + emissionprob=array([[ 0.3663 , 0.12783, 0.50587], + [ 0.35851, 0.21559, 0.42589]]), + labels=[None, None], startprob_prior=1.0, + startprob=array([ 0.5, 0.5]), + transmat=array([[ 0.5, 0.5], + [ 0.5, 0.5]]), nsymbols=3, + transmat_prior=1.0) + See Also -------- GaussianHMM : HMM with Gaussian emissions @@ -931,7 +951,9 @@ class GMMHMM(_BaseHMM): Examples -------- - >>> hmm = GMMHMM(n_states=2, n_mix=10, n_dim=3) + >>> from scikits.learn.hmm import GMMHMM + >>> GMMHMM(n_states=2, n_mix=10, n_dim=3) # doctest: +ELLIPSIS + GMMHMM(n_dim=3, n_mix=10, n_states=2, cvtype=None, labels=[None, None], ...) See Also -------- @@ -957,6 +979,10 @@ class GMMHMM(_BaseHMM): self._n_dim = n_dim + # XXX: Hotfit for n_mix that is incompatible with the scikit's + # BaseEstimator API + self.n_mix = n_mix + self.cvtype= cvtype if gmms is None: gmms = [] for x in xrange(self.n_states): diff --git a/scikits/learn/naive_bayes.py b/scikits/learn/naive_bayes.py index 50d0a595b64d6c1a317a5693502fa64fc35f537a..a4635827f87addbb43ef360e0174f8281440920c 100644 --- a/scikits/learn/naive_bayes.py +++ b/scikits/learn/naive_bayes.py @@ -1,3 +1,6 @@ +""" Naives Bayes classifiers. +""" + # Author: Vincent Michel <vincent.michel@inria.fr> # License: BSD Style. import numpy as np @@ -39,8 +42,10 @@ class GNB(BaseEstimator, ClassifierMixin): Examples -------- + >>> import numpy as np >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) >>> Y = np.array([1, 1, 1, 2, 2, 2]) + >>> from scikits.learn.naive_bayes import GNB >>> clf = GNB() >>> clf.fit(X, Y) GNB() diff --git a/scikits/learn/neighbors.py b/scikits/learn/neighbors.py index 4fc9b451e1ab72ab4bc3803fb34caa1546d64ea1..06385b50ff324fab34e2aea5e2d86bf88f3097bd 100644 --- a/scikits/learn/neighbors.py +++ b/scikits/learn/neighbors.py @@ -31,6 +31,7 @@ class Neighbors(BaseEstimator, ClassifierMixin): -------- >>> samples = [[0.,0.,1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]] >>> labels = [0,0,1,1] + >>> from scikits.learn.neighbors import Neighbors >>> neigh = Neighbors(k=3) >>> neigh.fit(samples, labels) Neighbors(k=3, window_size=1) @@ -78,9 +79,9 @@ class Neighbors(BaseEstimator, ClassifierMixin): array representing our data set and ask who's the closest point to [1,1,1] - >>> import numpy as np >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]] >>> labels = [0, 0, 1] + >>> from scikits.learn.neighbors import Neighbors >>> neigh = Neighbors(k=1) >>> neigh.fit(samples, labels) Neighbors(k=1, window_size=1) @@ -119,9 +120,9 @@ class Neighbors(BaseEstimator, ClassifierMixin): Examples -------- - >>> import numpy as np >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]] >>> labels = [0, 0, 1] + >>> from scikits.learn.neighbors import Neighbors >>> neigh = Neighbors(k=1) >>> neigh.fit(samples, labels) Neighbors(k=1, window_size=1) diff --git a/scikits/learn/pca.py b/scikits/learn/pca.py index f111962da734731e380a98adf39300b26b264a5f..c7428bb0e6b63867ecfbe924ff3ce2db2562419a 100644 --- a/scikits/learn/pca.py +++ b/scikits/learn/pca.py @@ -113,7 +113,9 @@ class PCA(BaseEstimator): Examples -------- + >>> import numpy as np >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) + >>> from scikits.learn.pca import PCA >>> pca = PCA(n_comp=2) >>> pca.fit(X) PCA(n_comp=2, copy=True) diff --git a/scikits/learn/pipeline.py b/scikits/learn/pipeline.py index c611adc2708988bb2ab8c845de9300397a8445e9..ea4ce2ea72a026cc96366868bd1c4c31dff9f0f7 100644 --- a/scikits/learn/pipeline.py +++ b/scikits/learn/pipeline.py @@ -62,7 +62,9 @@ class Pipeline(BaseEstimator): >>> # You can set the parameters using the names issued >>> # For instance, fit using a k of 10 in the SelectKBest >>> # and a parameter 'C' of the svn - >>> _ = anova_svm.fit(X, y, anova__k=10, svc__C=.1) + >>> anova_svm.fit(X, y, anova__k=10, svc__C=.1) #doctest: +ELLIPSIS + Pipeline(steps=[('anova', SelectKBest(k=10, score_func=<function f_regression at ...>)), ('svc', SVC(kernel='linear', C=0.1, probability=False, degree=3, coef0=0.0, eps=0.001, + cache_size=100.0, shrinking=True, gamma=0.01))]) >>> prediction = anova_svm.predict(X) >>> score = anova_svm.score(X) diff --git a/scikits/learn/svm.py b/scikits/learn/svm.py index 32b16b9b33ffa26d6b698fb3a87732a1af600a18..a8226b7fa1887345aa58b1238a5bca46aabf5850 100644 --- a/scikits/learn/svm.py +++ b/scikits/learn/svm.py @@ -416,8 +416,10 @@ class SVC(_BaseLibSVM, ClassifierMixin): Examples -------- + >>> import numpy as np >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]]) >>> Y = np.array([1, 1, 2, 2]) + >>> from scikits.learn.svm import SVC >>> clf = SVC() >>> clf.fit(X, Y) SVC(kernel='rbf', C=1.0, probability=False, degree=3, coef0=0.0, eps=0.001, @@ -516,8 +518,10 @@ class NuSVC(_BaseLibSVM, ClassifierMixin): Examples -------- + >>> import numpy as np >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]]) >>> Y = np.array([1, 1, 2, 2]) + >>> from scikits.learn.svm import NuSVC >>> clf = NuSVC() >>> clf.fit(X, Y) NuSVC(kernel='rbf', probability=False, degree=3, coef0=0.0, eps=0.001, diff --git a/scikits/learn/utils/graph.py b/scikits/learn/utils/graph.py index a25f0cc4512fea699751af16d20e9b727111311c..96dae729d58fcf8a3d373f00c9cb434e17626d54 100644 --- a/scikits/learn/utils/graph.py +++ b/scikits/learn/utils/graph.py @@ -33,6 +33,7 @@ def single_source_shortest_path_length(graph, source, cutoff=None): Examples -------- + >>> from scikits.learn.utils.graph import single_source_shortest_path_length >>> import numpy as np >>> graph = np.array([[ 0, 1, 0, 0], ... [ 1, 0, 1, 0], @@ -127,6 +128,8 @@ def _graph_laplacian_dense(graph, normed=False, return_diag=False): def graph_laplacian(graph, normed=False, return_diag=False): + """ Return the Laplacian of the given graph. + """ if normed and (np.issubdtype(graph.dtype, np.int) or np.issubdtype(graph.dtype, np.uint)): graph = graph.astype(np.float)