From 431d8f4c4dfde2c1ea47c5e89cbf2c4b494799fe Mon Sep 17 00:00:00 2001 From: Ron Weiss <ronweiss@gmail.com> Date: Sun, 7 Nov 2010 16:21:09 -0500 Subject: [PATCH] rename HMM.n_dim to HMM.n_features to be consistent with the rest of the scikit --- scikits/learn/hmm.py | 72 +++++++++++++++++---------------- scikits/learn/tests/test_hmm.py | 50 +++++++++++------------ 2 files changed, 62 insertions(+), 60 deletions(-) diff --git a/scikits/learn/hmm.py b/scikits/learn/hmm.py index 1191e5932d..f748d4108d 100644 --- a/scikits/learn/hmm.py +++ b/scikits/learn/hmm.py @@ -93,8 +93,8 @@ class _BaseHMM(BaseEstimator): Parameters ---------- - obs : array_like, shape (n, n_dim) - Sequence of n_dim-dimensional data points. Each row + obs : array_like, shape (n, n_features) + Sequence of n_features-dimensional data points. Each row corresponds to a single point in the sequence. maxrank : int Maximum rank to evaluate for rank pruning. If not None, @@ -138,8 +138,8 @@ class _BaseHMM(BaseEstimator): Parameters ---------- - obs : array_like, shape (n, n_dim) - Sequence of n_dim-dimensional data points. Each row + obs : array_like, shape (n, n_features) + Sequence of n_features-dimensional data points. Each row corresponds to a single data point. maxrank : int Maximum rank to evaluate for rank pruning. If not None, @@ -174,9 +174,9 @@ class _BaseHMM(BaseEstimator): Parameters ---------- - obs : array_like, shape (n, n_dim) - List of n_dim-dimensional data points. Each row corresponds to a - single data point. + obs : array_like, shape (n, n_features) + List of n_features-dimensional data points. Each row + corresponds to a single data point. maxrank : int Maximum rank to evaluate for rank pruning. If not None, only consider the top `maxrank` states in the inner @@ -210,9 +210,9 @@ class _BaseHMM(BaseEstimator): Parameters ---------- - obs : array_like, shape (n, n_dim) - List of n_dim-dimensional data points. Each row corresponds to a - single data point. + obs : array_like, shape (n, n_features) + List of n_features-dimensional data points. Each row + corresponds to a single data point. maxrank : int Maximum rank to evaluate for rank pruning. If not None, only consider the top `maxrank` states in the inner @@ -236,9 +236,9 @@ class _BaseHMM(BaseEstimator): Parameters ---------- - obs : array_like, shape (n, n_dim) - List of n_dim-dimensional data points. Each row corresponds to a - single data point. + obs : array_like, shape (n, n_features) + List of n_features-dimensional data points. Each row + corresponds to a single data point. See eval() for a list of accepted keyword arguments. @@ -295,7 +295,7 @@ class _BaseHMM(BaseEstimator): Parameters ---------- obs : list - List of array-like observation sequences (shape (n_i, n_dim)). + List of array-like observation sequences (shape (n_i, n_features)). n_iter : int, optional Number of iterations to perform. thresh : float, optional @@ -542,7 +542,7 @@ class GaussianHMM(_BaseHMM): cvtype : string (read-only) String describing the type of covariance parameters used by the model. Must be one of 'spherical', 'tied', 'diag', 'full'. - n_dim : int (read-only) + n_features : int (read-only) Dimensionality of the Gaussian emissions. n_states : int (read-only) Number of states in the model. @@ -550,15 +550,15 @@ class GaussianHMM(_BaseHMM): Matrix of transition probabilities between states. startprob : array, shape ('n_states`,) Initial state occupation distribution. - means : array, shape (`n_states`, `n_dim`) + means : array, shape (`n_states`, `n_features`) Mean parameters for each state. covars : array Covariance parameters for each state. The shape depends on `cvtype`: (`n_states`,) if 'spherical', - (`n_dim`, `n_dim`) if 'tied', - (`n_states`, `n_dim`) if 'diag', - (`n_states`, `n_dim`, `n_dim`) if 'full' + (`n_features`, `n_features`) if 'tied', + (`n_states`, `n_features`) if 'diag', + (`n_states`, `n_features`, `n_features`) if 'full' Methods ------- @@ -641,11 +641,11 @@ class GaussianHMM(_BaseHMM): def _set_means(self, means): means = np.asanyarray(means) - if hasattr(self, 'n_dim') and \ - means.shape != (self._n_states, self.n_dim): - raise ValueError('means must have shape (n_states, n_dim)') + if hasattr(self, 'n_features') and \ + means.shape != (self._n_states, self.n_features): + raise ValueError('means must have shape (n_states, n_features)') self._means = means.copy() - self.n_dim = self._means.shape[1] + self.n_features = self._means.shape[1] means = property(_get_means, _set_means) @@ -658,11 +658,11 @@ class GaussianHMM(_BaseHMM): elif self.cvtype == 'tied': return [self._covars] * self._n_states elif self.cvtype == 'spherical': - return [np.eye(self.n_dim) * f for f in self._covars] + return [np.eye(self.n_features) * f for f in self._covars] def _set_covars(self, covars): covars = np.asanyarray(covars) - _validate_covars(covars, self._cvtype, self._n_states, self.n_dim) + _validate_covars(covars, self._cvtype, self._n_states, self.n_features) self._covars = covars.copy() covars = property(_get_covars, _set_covars) @@ -680,11 +680,11 @@ class GaussianHMM(_BaseHMM): def _init(self, obs, params='stmc'): super(GaussianHMM, self)._init(obs, params=params) - if hasattr(self, 'n_dim') and self.n_dim != obs.shape[2]: + if hasattr(self, 'n_features') and self.n_features != obs.shape[2]: raise ValueError('Unexpected number of dimensions, got %s but ' - 'expected %s' % (obs.shape[2], self.n_dim)) + 'expected %s' % (obs.shape[2], self.n_features)) - self.n_dim = obs.shape[2] + self.n_features = obs.shape[2] if 'm' in params: self._means = cluster.KMeans( @@ -699,10 +699,10 @@ class GaussianHMM(_BaseHMM): def _initialize_sufficient_statistics(self): stats = super(GaussianHMM, self)._initialize_sufficient_statistics() stats['post'] = np.zeros(self._n_states) - stats['obs'] = np.zeros((self._n_states, self.n_dim)) - stats['obs**2'] = np.zeros((self._n_states, self.n_dim)) - stats['obs*obs.T'] = np.zeros((self._n_states, self.n_dim, - self.n_dim)) + stats['obs'] = np.zeros((self._n_states, self.n_features)) + stats['obs**2'] = np.zeros((self._n_states, self.n_features)) + stats['obs*obs.T'] = np.zeros((self._n_states, self.n_features, + self.n_features)) return stats def _accumulate_sufficient_statistics(self, stats, obs, framelogprob, @@ -765,7 +765,8 @@ class GaussianHMM(_BaseHMM): elif self._cvtype == 'diag': self._covars = (covars_prior + cv_num) / cv_den elif self._cvtype in ('tied', 'full'): - cvnum = np.empty((self._n_states, self.n_dim, self.n_dim)) + cvnum = np.empty((self._n_states, self.n_features, + self.n_features)) for c in xrange(self._n_states): obsmean = np.outer(stats['obs'][c], self._means[c]) @@ -775,7 +776,7 @@ class GaussianHMM(_BaseHMM): - obsmean - obsmean.T + np.outer(self._means[c], self._means[c]) * stats['post'][c]) - cvweight = max(covars_weight - self.n_dim, 0) + cvweight = max(covars_weight - self.n_features, 0) if self._cvtype == 'tied': self._covars = ((covars_prior + cvnum.sum(axis=0)) / (cvweight + stats['post'].sum())) @@ -945,7 +946,8 @@ class GMMHMM(_BaseHMM): transmat=array([[ 0.5, 0.5], [ 0.5, 0.5]]), transmat_prior=1.0, - gmms=[GMM(cvtype='diag', n_states=10), GMM(cvtype='diag', n_states=10)]) + gmms=[GMM(cvtype='diag', n_states=10), GMM(cvtype='diag', + n_states=10)]) See Also -------- diff --git a/scikits/learn/tests/test_hmm.py b/scikits/learn/tests/test_hmm.py index 69c9bdf74e..5a80dacdc6 100644 --- a/scikits/learn/tests/test_hmm.py +++ b/scikits/learn/tests/test_hmm.py @@ -215,18 +215,18 @@ def train_hmm_and_keep_track_of_log_likelihood(hmm, obs, n_iter=1, **kwargs): class GaussianHMMParams(object): n_states = 3 - n_dim = 3 + n_features = 3 startprob = np.random.rand(n_states) startprob = startprob / startprob.sum() transmat = np.random.rand(n_states, n_states) transmat /= np.tile(transmat.sum(axis=1)[:,np.newaxis], (1, n_states)) - means = np.random.randint(-20, 20, (n_states, n_dim)) + means = np.random.randint(-20, 20, (n_states, n_features)) covars = {'spherical': (1.0 + 2 * np.random.rand(n_states))**2, - 'tied': _generate_random_spd_matrix(n_dim) + np.eye(n_dim), - 'diag': (1.0 + 2 * np.random.rand(n_states, n_dim))**2, - 'full': np.array([_generate_random_spd_matrix(n_dim) - + np.eye(n_dim) for x in xrange(n_states)])} - expanded_covars = {'spherical': [np.eye(n_dim) * cov + 'tied': _generate_random_spd_matrix(n_features) + np.eye(n_features), + 'diag': (1.0 + 2 * np.random.rand(n_states, n_features))**2, + 'full': np.array([_generate_random_spd_matrix(n_features) + + np.eye(n_features) for x in xrange(n_states)])} + expanded_covars = {'spherical': [np.eye(n_features) * cov for cov in covars['spherical']], 'diag': [np.diag(cov) for cov in covars['diag']], 'tied': [covars['tied']] * n_states, @@ -251,7 +251,7 @@ class GaussianHMMTester(GaussianHMMParams): 2 * self.startprob) self.assertRaises(ValueError, h.__setattr__, 'startprob', []) self.assertRaises(ValueError, h.__setattr__, 'startprob', - np.zeros((self.n_states - 2, self.n_dim))) + np.zeros((self.n_states - 2, self.n_features))) h.transmat = self.transmat assert_array_almost_equal(h.transmat, self.transmat) @@ -263,16 +263,16 @@ class GaussianHMMTester(GaussianHMMParams): h.means = self.means assert_array_almost_equal(h.means, self.means) - self.assertEquals(h.n_dim, self.n_dim) + self.assertEquals(h.n_features, self.n_features) self.assertRaises(ValueError, h.__setattr__, 'means', []) self.assertRaises(ValueError, h.__setattr__, 'means', - np.zeros((self.n_states - 2, self.n_dim))) + np.zeros((self.n_states - 2, self.n_features))) h.covars = self.covars[self.cvtype] assert_array_almost_equal(h.covars, self.expanded_covars[self.cvtype]) #self.assertRaises(ValueError, h.__setattr__, 'covars', []) #self.assertRaises(ValueError, h.__setattr__, 'covars', - # np.zeros((self.n_states - 2, self.n_dim))) + # np.zeros((self.n_states - 2, self.n_features))) def test_eval_and_decode(self): h = hmm.GaussianHMM(self.n_states, self.cvtype) @@ -285,7 +285,7 @@ class GaussianHMMTester(GaussianHMMParams): gaussidx = np.repeat(range(self.n_states), 5) nobs = len(gaussidx) - obs = np.random.randn(nobs, self.n_dim) + h.means[gaussidx] + obs = np.random.randn(nobs, self.n_features) + h.means[gaussidx] ll, posteriors = h.eval(obs) @@ -304,7 +304,7 @@ class GaussianHMMTester(GaussianHMMParams): h.startprob = self.startprob samples = h.rvs(n) - self.assertEquals(samples.shape, (n, self.n_dim)) + self.assertEquals(samples.shape, (n, self.n_features)) def test_fit(self, params='stmc', n_iter=15, verbose=False, **kwargs): h = hmm.GaussianHMM(self.n_states, self.cvtype) @@ -337,7 +337,7 @@ class GaussianHMMTester(GaussianHMMParams): means_weight = 2.0 covars_weight = 2.0 if self.cvtype in ('full', 'tied'): - covars_weight += self.n_dim + covars_weight += self.n_features covars_prior = self.covars[self.cvtype] h = hmm.GaussianHMM(self.n_states, self.cvtype) @@ -502,7 +502,7 @@ class TestMultinomialHMM(MultinomialHMMParams, class GMMHMMParams(object): n_states = 3 n_mix = 2 - n_dim = 2 + n_features = 2 cvtype = 'diag' startprob = np.random.rand(n_states) startprob = startprob / startprob.sum() @@ -510,18 +510,18 @@ class GMMHMMParams(object): transmat /= np.tile(transmat.sum(axis=1)[:,np.newaxis], (1, n_states)) @staticmethod - def create_random_gmm(n_mix, n_dim, cvtype): + def create_random_gmm(n_mix, n_features, cvtype): from scikits.learn import gmm g = gmm.GMM(n_mix, cvtype=cvtype) - g.means = np.random.randint(-20, 20, (n_mix, n_dim)) + g.means = np.random.randint(-20, 20, (n_mix, n_features)) mincv = 0.1 g.covars = {'spherical': (mincv + mincv * np.random.rand(n_mix))**2, - 'tied': _generate_random_spd_matrix(n_dim) - + mincv * np.eye(n_dim), - 'diag': (mincv + mincv * np.random.rand(n_mix, n_dim))**2, - 'full': np.array([_generate_random_spd_matrix(n_dim) - + mincv * np.eye(n_dim) + 'tied': _generate_random_spd_matrix(n_features) + + mincv * np.eye(n_features), + 'diag': (mincv + mincv * np.random.rand(n_mix, n_features))**2, + 'full': np.array([_generate_random_spd_matrix(n_features) + + mincv * np.eye(n_features) for x in xrange(n_mix)])}[cvtype] g.weights = hmm.normalize(np.random.rand(n_mix)) @@ -534,7 +534,7 @@ class TestGMMHMM(GMMHMMParams, SeedRandomNumberGeneratorTestCase): np.random.seed(self.seed) self.gmms = [] for state in xrange(self.n_states): - self.gmms.append(self.create_random_gmm(self.n_mix, self.n_dim, + self.gmms.append(self.create_random_gmm(self.n_mix, self.n_features, self.cvtype)) def test_attributes(self): @@ -548,7 +548,7 @@ class TestGMMHMM(GMMHMMParams, SeedRandomNumberGeneratorTestCase): 2 * self.startprob) self.assertRaises(ValueError, h.__setattr__, 'startprob', []) self.assertRaises(ValueError, h.__setattr__, 'startprob', - np.zeros((self.n_states - 2, self.n_dim))) + np.zeros((self.n_states - 2, self.n_features))) h.transmat = self.transmat assert_array_almost_equal(h.transmat, self.transmat) @@ -582,7 +582,7 @@ class TestGMMHMM(GMMHMMParams, SeedRandomNumberGeneratorTestCase): startprob=self.startprob, transmat=self.transmat, gmms=self.gmms) samples = h.rvs(n) - self.assertEquals(samples.shape, (n, self.n_dim)) + self.assertEquals(samples.shape, (n, self.n_features)) def test_fit(self, params='stmwc', n_iter=5, verbose=True, **kwargs): h = hmm.GMMHMM(self.n_states) -- GitLab