From 34c2758c6a90aab30b7ec33a25a4d2dd9e152b01 Mon Sep 17 00:00:00 2001 From: Olivier Grisel <olivier.grisel@ensta.org> Date: Tue, 14 Sep 2010 14:48:21 +0200 Subject: [PATCH] fix NORMALIZE_WHITESPACE issues in doctests --- scikits/learn/datasets/base.py | 27 ++++++++++++++------------- scikits/learn/gmm.py | 12 +++++++----- scikits/learn/hmm.py | 14 ++++++++------ scikits/learn/pipeline.py | 10 +++++++--- 4 files changed, 36 insertions(+), 27 deletions(-) diff --git a/scikits/learn/datasets/base.py b/scikits/learn/datasets/base.py index 290dc8dd2a..580f5c8f28 100644 --- a/scikits/learn/datasets/base.py +++ b/scikits/learn/datasets/base.py @@ -26,12 +26,12 @@ class Bunch(dict): def load_iris(): """load the iris dataset and returns it. - + Returns ------- data : Bunch Dictionnary-like object, the interesting attributes are: - 'data', the data to learn, 'target', the classification labels, + 'data', the data to learn, 'target', the classification labels, 'target_names', the meaning of the labels, and 'DESCR', the full description of the dataset. @@ -45,14 +45,15 @@ def load_iris(): >>> data.target[[10, 25, 50]] array([0, 0, 1]) >>> data.target_names + ... #doctest: +NORMALIZE_WHITESPACE array(['setosa', 'versicolor', 'virginica'], dtype='|S10') """ - - data_file = csv.reader(open(os.path.dirname(__file__) + + data_file = csv.reader(open(os.path.dirname(__file__) + '/data/iris.csv')) - fdescr = open(os.path.dirname(__file__) + fdescr = open(os.path.dirname(__file__) + '/descr/iris.rst') temp = data_file.next() n_samples = int(temp[0]) @@ -63,20 +64,20 @@ def load_iris(): for i, ir in enumerate(data_file): data[i] = np.asanyarray(ir[:-1], dtype=np.float) target[i] = np.asanyarray(ir[-1], dtype=np.int) - return Bunch(data=data, target=target, target_names=target_names, + return Bunch(data=data, target=target, target_names=target_names, DESCR=fdescr.read()) def load_digits(): """load the digits dataset and returns it. - + Returns ------- data : Bunch Dictionnary-like object, the interesting attributes are: 'data', the data to learn, `images`, the images corresponding to each sample, 'target', the classification labels for each - sample, 'target_names', the meaning of the labels, and 'DESCR', + sample, 'target_names', the meaning of the labels, and 'DESCR', the full description of the dataset. Example @@ -90,17 +91,17 @@ def load_digits(): pl.matshow(digits.raw_data[0]) """ - - data = np.loadtxt(os.path.join(os.path.dirname(__file__) + + data = np.loadtxt(os.path.join(os.path.dirname(__file__) + '/data/digits.csv.gz'), delimiter=',') - fdescr = open(os.path.join(os.path.dirname(__file__) + fdescr = open(os.path.join(os.path.dirname(__file__) + '/descr/digits.rst')) target = data[:, -1] flat_data = data[:, :-1] images = flat_data.view() images.shape = (-1, 8, 8) - return Bunch(data=flat_data, target=target.astype(np.int), - target_names=np.arange(10), + return Bunch(data=flat_data, target=target.astype(np.int), + target_names=np.arange(10), images=images, DESCR=fdescr.read()) diff --git a/scikits/learn/gmm.py b/scikits/learn/gmm.py index 9011387b2c..f2cb630a82 100644 --- a/scikits/learn/gmm.py +++ b/scikits/learn/gmm.py @@ -194,9 +194,9 @@ class GMM(BaseEstimator): array([[ 0.], [ 0.]]) >>> np.round(g.covars, 2) + ... #doctest: +NORMALIZE_WHITESPACE array([[[ 1.]], - <BLANKLINE> - [[ 1.]]]) + [[ 1.]]]) >>> # Generate random observations with two modes centered on 0 >>> # and 10 to use for training. @@ -204,9 +204,10 @@ class GMM(BaseEstimator): >>> obs = np.concatenate((np.random.randn(100, 1), ... 10 + np.random.randn(300, 1))) >>> g.fit(obs) + ... #doctest: +NORMALIZE_WHITESPACE GMM(n_dim=1, cvtype='diag', means=array([[ 9.94199], - [ 0.05981]]), + [ 0.05981]]), covars=[array([[ 0.96081]]), array([[ 1.01683]])], n_states=2, weights=array([ 0.75, 0.25])) @@ -216,8 +217,8 @@ class GMM(BaseEstimator): array([[ 9.94], [ 0.06]]) >>> np.round(g.covars, 2) + ... #doctest: +NORMALIZE_WHITESPACE array([[[ 0.96]], - <BLANKLINE> [[ 1.02]]]) >>> g.predict([[0], [2], [9], [10]]) array([1, 1, 0, 0]) @@ -227,9 +228,10 @@ class GMM(BaseEstimator): >>> # Refit the model on new data (initial parameters remain the >>> #same), this time with an even split between the two modes. >>> g.fit(20 * [[0]] + 20 * [[10]]) + ... #doctest: +NORMALIZE_WHITESPACE GMM(n_dim=1, cvtype='diag', means=array([[ 10.], - [ 0.]]), + [ 0.]]), covars=[array([[ 0.001]]), array([[ 0.001]])], n_states=2, weights=array([ 0.5, 0.5])) diff --git a/scikits/learn/hmm.py b/scikits/learn/hmm.py index ca5e60b03f..ac0f04a6e9 100644 --- a/scikits/learn/hmm.py +++ b/scikits/learn/hmm.py @@ -78,7 +78,7 @@ class _BaseHMM(BaseEstimator): if startprob_prior is None: startprob_prior = 1.0 self.startprob_prior = startprob_prior - + if transmat is None: transmat = np.tile(1.0 / n_states, (n_states, n_states)) self.transmat = transmat @@ -486,7 +486,7 @@ class _BaseHMM(BaseEstimator): self.transmat[:] = 1.0 / self._n_states # Methods used by self.fit() - + def _initialize_sufficient_statistics(self): stats = {'nobs': 0, 'start': np.zeros(self._n_states), @@ -586,7 +586,7 @@ class GaussianHMM(_BaseHMM): -------- GMM : Gaussian mixture model """ - + def __init__(self, n_states=1, n_dim=1, cvtype='diag', startprob=None, transmat=None, labels=None, means=None, covars=None, startprob_prior=None, transmat_prior=None, @@ -821,6 +821,7 @@ class MultinomialHMM(_BaseHMM): -------- >>> from scikits.learn.hmm import MultinomialHMM >>> MultinomialHMM(n_states=2, nsymbols=3) + ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE MultinomialHMM(n_states=2, emissionprob=array([[ 0.3663 , 0.12783, 0.50587], [ 0.35851, 0.21559, 0.42589]]), @@ -829,7 +830,7 @@ class MultinomialHMM(_BaseHMM): transmat=array([[ 0.5, 0.5], [ 0.5, 0.5]]), nsymbols=3, transmat_prior=1.0) - + See Also -------- GaussianHMM : HMM with Gaussian emissions @@ -952,7 +953,8 @@ class GMMHMM(_BaseHMM): Examples -------- >>> from scikits.learn.hmm import GMMHMM - >>> GMMHMM(n_states=2, n_mix=10, n_dim=3) # doctest: +ELLIPSIS + >>> GMMHMM(n_states=2, n_mix=10, n_dim=3) + ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE GMMHMM(n_dim=3, n_mix=10, n_states=2, cvtype=None, labels=[None, None], ...) See Also @@ -1025,7 +1027,7 @@ class GMMHMM(_BaseHMM): super(GMMHMM, self)._accumulate_sufficient_statistics( stats, obs, framelogprob, posteriors, fwdlattice, bwdlattice, params) - + for state,g in enumerate(self.gmms): gmm_logprob, gmm_posteriors = g.eval(obs) gmm_posteriors *= posteriors[:,state][:,np.newaxis] diff --git a/scikits/learn/pipeline.py b/scikits/learn/pipeline.py index ea4ce2ea72..1c7cb5dd3a 100644 --- a/scikits/learn/pipeline.py +++ b/scikits/learn/pipeline.py @@ -62,9 +62,13 @@ class Pipeline(BaseEstimator): >>> # You can set the parameters using the names issued >>> # For instance, fit using a k of 10 in the SelectKBest >>> # and a parameter 'C' of the svn - >>> anova_svm.fit(X, y, anova__k=10, svc__C=.1) #doctest: +ELLIPSIS - Pipeline(steps=[('anova', SelectKBest(k=10, score_func=<function f_regression at ...>)), ('svc', SVC(kernel='linear', C=0.1, probability=False, degree=3, coef0=0.0, eps=0.001, - cache_size=100.0, shrinking=True, gamma=0.01))]) + >>> anova_svm.fit(X, y, anova__k=10, svc__C=.1) + ... #doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE + Pipeline(steps=[('anova', SelectKBest(k=10, + score_func=<function f_regression at ...>)), + ('svc', SVC(kernel='linear', C=0.1, probability=False, degree=3, + coef0=0.0, eps=0.001, cache_size=100.0, shrinking=True, + gamma=0.01))]) >>> prediction = anova_svm.predict(X) >>> score = anova_svm.score(X) -- GitLab