From 431d8f4c4dfde2c1ea47c5e89cbf2c4b494799fe Mon Sep 17 00:00:00 2001
From: Ron Weiss <ronweiss@gmail.com>
Date: Sun, 7 Nov 2010 16:21:09 -0500
Subject: [PATCH] rename HMM.n_dim to HMM.n_features to be consistent with the
 rest of the scikit

---
 scikits/learn/hmm.py            | 72 +++++++++++++++++----------------
 scikits/learn/tests/test_hmm.py | 50 +++++++++++------------
 2 files changed, 62 insertions(+), 60 deletions(-)

diff --git a/scikits/learn/hmm.py b/scikits/learn/hmm.py
index 1191e5932d..f748d4108d 100644
--- a/scikits/learn/hmm.py
+++ b/scikits/learn/hmm.py
@@ -93,8 +93,8 @@ class _BaseHMM(BaseEstimator):
 
         Parameters
         ----------
-        obs : array_like, shape (n, n_dim)
-            Sequence of n_dim-dimensional data points.  Each row
+        obs : array_like, shape (n, n_features)
+            Sequence of n_features-dimensional data points.  Each row
             corresponds to a single point in the sequence.
         maxrank : int
             Maximum rank to evaluate for rank pruning.  If not None,
@@ -138,8 +138,8 @@ class _BaseHMM(BaseEstimator):
 
         Parameters
         ----------
-        obs : array_like, shape (n, n_dim)
-            Sequence of n_dim-dimensional data points.  Each row
+        obs : array_like, shape (n, n_features)
+            Sequence of n_features-dimensional data points.  Each row
             corresponds to a single data point.
         maxrank : int
             Maximum rank to evaluate for rank pruning.  If not None,
@@ -174,9 +174,9 @@ class _BaseHMM(BaseEstimator):
 
         Parameters
         ----------
-        obs : array_like, shape (n, n_dim)
-            List of n_dim-dimensional data points.  Each row corresponds to a
-            single data point.
+        obs : array_like, shape (n, n_features)
+            List of n_features-dimensional data points.  Each row
+            corresponds to a single data point.
         maxrank : int
             Maximum rank to evaluate for rank pruning.  If not None,
             only consider the top `maxrank` states in the inner
@@ -210,9 +210,9 @@ class _BaseHMM(BaseEstimator):
 
         Parameters
         ----------
-        obs : array_like, shape (n, n_dim)
-            List of n_dim-dimensional data points.  Each row corresponds to a
-            single data point.
+        obs : array_like, shape (n, n_features)
+            List of n_features-dimensional data points.  Each row
+            corresponds to a single data point.
         maxrank : int
             Maximum rank to evaluate for rank pruning.  If not None,
             only consider the top `maxrank` states in the inner
@@ -236,9 +236,9 @@ class _BaseHMM(BaseEstimator):
 
         Parameters
         ----------
-        obs : array_like, shape (n, n_dim)
-            List of n_dim-dimensional data points.  Each row corresponds to a
-            single data point.
+        obs : array_like, shape (n, n_features)
+            List of n_features-dimensional data points.  Each row
+            corresponds to a single data point.
 
         See eval() for a list of accepted keyword arguments.
 
@@ -295,7 +295,7 @@ class _BaseHMM(BaseEstimator):
         Parameters
         ----------
         obs : list
-            List of array-like observation sequences (shape (n_i, n_dim)).
+            List of array-like observation sequences (shape (n_i, n_features)).
         n_iter : int, optional
             Number of iterations to perform.
         thresh : float, optional
@@ -542,7 +542,7 @@ class GaussianHMM(_BaseHMM):
     cvtype : string (read-only)
         String describing the type of covariance parameters used by
         the model.  Must be one of 'spherical', 'tied', 'diag', 'full'.
-    n_dim : int (read-only)
+    n_features : int (read-only)
         Dimensionality of the Gaussian emissions.
     n_states : int (read-only)
         Number of states in the model.
@@ -550,15 +550,15 @@ class GaussianHMM(_BaseHMM):
         Matrix of transition probabilities between states.
     startprob : array, shape ('n_states`,)
         Initial state occupation distribution.
-    means : array, shape (`n_states`, `n_dim`)
+    means : array, shape (`n_states`, `n_features`)
         Mean parameters for each state.
     covars : array
         Covariance parameters for each state.  The shape depends on
         `cvtype`:
             (`n_states`,)                   if 'spherical',
-            (`n_dim`, `n_dim`)              if 'tied',
-            (`n_states`, `n_dim`)           if 'diag',
-            (`n_states`, `n_dim`, `n_dim`)  if 'full'
+            (`n_features`, `n_features`)              if 'tied',
+            (`n_states`, `n_features`)           if 'diag',
+            (`n_states`, `n_features`, `n_features`)  if 'full'
 
     Methods
     -------
@@ -641,11 +641,11 @@ class GaussianHMM(_BaseHMM):
 
     def _set_means(self, means):
         means = np.asanyarray(means)
-        if hasattr(self, 'n_dim') and \
-               means.shape != (self._n_states, self.n_dim):
-            raise ValueError('means must have shape (n_states, n_dim)')
+        if hasattr(self, 'n_features') and \
+               means.shape != (self._n_states, self.n_features):
+            raise ValueError('means must have shape (n_states, n_features)')
         self._means = means.copy()
-        self.n_dim = self._means.shape[1]
+        self.n_features = self._means.shape[1]
 
     means = property(_get_means, _set_means)
 
@@ -658,11 +658,11 @@ class GaussianHMM(_BaseHMM):
         elif self.cvtype == 'tied':
             return [self._covars] * self._n_states
         elif self.cvtype == 'spherical':
-            return [np.eye(self.n_dim) * f for f in self._covars]
+            return [np.eye(self.n_features) * f for f in self._covars]
 
     def _set_covars(self, covars):
         covars = np.asanyarray(covars)
-        _validate_covars(covars, self._cvtype, self._n_states, self.n_dim)
+        _validate_covars(covars, self._cvtype, self._n_states, self.n_features)
         self._covars = covars.copy()
 
     covars = property(_get_covars, _set_covars)
@@ -680,11 +680,11 @@ class GaussianHMM(_BaseHMM):
     def _init(self, obs, params='stmc'):
         super(GaussianHMM, self)._init(obs, params=params)
 
-        if hasattr(self, 'n_dim') and self.n_dim != obs.shape[2]:
+        if hasattr(self, 'n_features') and self.n_features != obs.shape[2]:
             raise ValueError('Unexpected number of dimensions, got %s but '
-                             'expected %s' % (obs.shape[2], self.n_dim))
+                             'expected %s' % (obs.shape[2], self.n_features))
 
-        self.n_dim = obs.shape[2]
+        self.n_features = obs.shape[2]
 
         if 'm' in params:
             self._means = cluster.KMeans(
@@ -699,10 +699,10 @@ class GaussianHMM(_BaseHMM):
     def _initialize_sufficient_statistics(self):
         stats = super(GaussianHMM, self)._initialize_sufficient_statistics()
         stats['post'] = np.zeros(self._n_states)
-        stats['obs'] = np.zeros((self._n_states, self.n_dim))
-        stats['obs**2'] = np.zeros((self._n_states, self.n_dim))
-        stats['obs*obs.T'] = np.zeros((self._n_states, self.n_dim,
-                                       self.n_dim))
+        stats['obs'] = np.zeros((self._n_states, self.n_features))
+        stats['obs**2'] = np.zeros((self._n_states, self.n_features))
+        stats['obs*obs.T'] = np.zeros((self._n_states, self.n_features,
+                                       self.n_features))
         return stats
 
     def _accumulate_sufficient_statistics(self, stats, obs, framelogprob,
@@ -765,7 +765,8 @@ class GaussianHMM(_BaseHMM):
                 elif self._cvtype == 'diag':
                     self._covars = (covars_prior + cv_num) / cv_den
             elif self._cvtype in ('tied', 'full'):
-                cvnum = np.empty((self._n_states, self.n_dim, self.n_dim))
+                cvnum = np.empty((self._n_states, self.n_features,
+                                  self.n_features))
                 for c in xrange(self._n_states):
                     obsmean = np.outer(stats['obs'][c], self._means[c])
 
@@ -775,7 +776,7 @@ class GaussianHMM(_BaseHMM):
                                 - obsmean - obsmean.T
                                 + np.outer(self._means[c], self._means[c])
                                 * stats['post'][c])
-                cvweight = max(covars_weight - self.n_dim, 0)
+                cvweight = max(covars_weight - self.n_features, 0)
                 if self._cvtype == 'tied':
                     self._covars = ((covars_prior + cvnum.sum(axis=0))
                                     / (cvweight + stats['post'].sum()))
@@ -945,7 +946,8 @@ class GMMHMM(_BaseHMM):
         transmat=array([[ 0.5,  0.5],
            [ 0.5,  0.5]]),
         transmat_prior=1.0,
-        gmms=[GMM(cvtype='diag', n_states=10), GMM(cvtype='diag', n_states=10)])
+        gmms=[GMM(cvtype='diag', n_states=10), GMM(cvtype='diag',
+              n_states=10)])
 
     See Also
     --------
diff --git a/scikits/learn/tests/test_hmm.py b/scikits/learn/tests/test_hmm.py
index 69c9bdf74e..5a80dacdc6 100644
--- a/scikits/learn/tests/test_hmm.py
+++ b/scikits/learn/tests/test_hmm.py
@@ -215,18 +215,18 @@ def train_hmm_and_keep_track_of_log_likelihood(hmm, obs, n_iter=1, **kwargs):
 
 class GaussianHMMParams(object):
     n_states = 3
-    n_dim = 3
+    n_features = 3
     startprob = np.random.rand(n_states)
     startprob = startprob / startprob.sum()
     transmat = np.random.rand(n_states, n_states)
     transmat /= np.tile(transmat.sum(axis=1)[:,np.newaxis], (1, n_states))
-    means = np.random.randint(-20, 20, (n_states, n_dim))
+    means = np.random.randint(-20, 20, (n_states, n_features))
     covars = {'spherical': (1.0 + 2 * np.random.rand(n_states))**2,
-              'tied': _generate_random_spd_matrix(n_dim) + np.eye(n_dim),
-              'diag': (1.0 + 2 * np.random.rand(n_states, n_dim))**2,
-              'full': np.array([_generate_random_spd_matrix(n_dim)
-                                + np.eye(n_dim) for x in xrange(n_states)])}
-    expanded_covars = {'spherical': [np.eye(n_dim) * cov
+              'tied': _generate_random_spd_matrix(n_features) + np.eye(n_features),
+              'diag': (1.0 + 2 * np.random.rand(n_states, n_features))**2,
+              'full': np.array([_generate_random_spd_matrix(n_features)
+                                + np.eye(n_features) for x in xrange(n_states)])}
+    expanded_covars = {'spherical': [np.eye(n_features) * cov
                                      for cov in covars['spherical']],
                        'diag': [np.diag(cov) for cov in covars['diag']],
                        'tied': [covars['tied']] * n_states,
@@ -251,7 +251,7 @@ class GaussianHMMTester(GaussianHMMParams):
                           2 * self.startprob)
         self.assertRaises(ValueError, h.__setattr__, 'startprob', [])
         self.assertRaises(ValueError, h.__setattr__, 'startprob',
-                          np.zeros((self.n_states - 2, self.n_dim)))
+                          np.zeros((self.n_states - 2, self.n_features)))
 
         h.transmat = self.transmat
         assert_array_almost_equal(h.transmat, self.transmat)
@@ -263,16 +263,16 @@ class GaussianHMMTester(GaussianHMMParams):
 
         h.means = self.means
         assert_array_almost_equal(h.means, self.means)
-        self.assertEquals(h.n_dim, self.n_dim)
+        self.assertEquals(h.n_features, self.n_features)
         self.assertRaises(ValueError, h.__setattr__, 'means', [])
         self.assertRaises(ValueError, h.__setattr__, 'means',
-                          np.zeros((self.n_states - 2, self.n_dim)))
+                          np.zeros((self.n_states - 2, self.n_features)))
 
         h.covars = self.covars[self.cvtype]
         assert_array_almost_equal(h.covars, self.expanded_covars[self.cvtype])
         #self.assertRaises(ValueError, h.__setattr__, 'covars', [])
         #self.assertRaises(ValueError, h.__setattr__, 'covars',
-        #                  np.zeros((self.n_states - 2, self.n_dim)))
+        #                  np.zeros((self.n_states - 2, self.n_features)))
 
     def test_eval_and_decode(self):
         h = hmm.GaussianHMM(self.n_states, self.cvtype)
@@ -285,7 +285,7 @@ class GaussianHMMTester(GaussianHMMParams):
 
         gaussidx = np.repeat(range(self.n_states), 5)
         nobs = len(gaussidx)
-        obs = np.random.randn(nobs, self.n_dim) + h.means[gaussidx]
+        obs = np.random.randn(nobs, self.n_features) + h.means[gaussidx]
 
         ll, posteriors = h.eval(obs)
 
@@ -304,7 +304,7 @@ class GaussianHMMTester(GaussianHMMParams):
         h.startprob = self.startprob
 
         samples = h.rvs(n)
-        self.assertEquals(samples.shape, (n, self.n_dim))
+        self.assertEquals(samples.shape, (n, self.n_features))
 
     def test_fit(self, params='stmc', n_iter=15, verbose=False, **kwargs):
         h = hmm.GaussianHMM(self.n_states, self.cvtype)
@@ -337,7 +337,7 @@ class GaussianHMMTester(GaussianHMMParams):
         means_weight = 2.0
         covars_weight = 2.0
         if self.cvtype in ('full', 'tied'):
-            covars_weight += self.n_dim
+            covars_weight += self.n_features
         covars_prior = self.covars[self.cvtype]
 
         h = hmm.GaussianHMM(self.n_states, self.cvtype)
@@ -502,7 +502,7 @@ class TestMultinomialHMM(MultinomialHMMParams,
 class GMMHMMParams(object):
     n_states = 3
     n_mix = 2
-    n_dim = 2
+    n_features = 2
     cvtype = 'diag'
     startprob = np.random.rand(n_states)
     startprob = startprob / startprob.sum()
@@ -510,18 +510,18 @@ class GMMHMMParams(object):
     transmat /= np.tile(transmat.sum(axis=1)[:,np.newaxis], (1, n_states))
 
     @staticmethod
-    def create_random_gmm(n_mix, n_dim, cvtype):
+    def create_random_gmm(n_mix, n_features, cvtype):
         from scikits.learn import gmm
 
         g = gmm.GMM(n_mix, cvtype=cvtype)
-        g.means = np.random.randint(-20, 20, (n_mix, n_dim))
+        g.means = np.random.randint(-20, 20, (n_mix, n_features))
         mincv = 0.1
         g.covars = {'spherical': (mincv + mincv * np.random.rand(n_mix))**2,
-                    'tied': _generate_random_spd_matrix(n_dim)
-                           + mincv * np.eye(n_dim),
-                    'diag': (mincv + mincv * np.random.rand(n_mix, n_dim))**2,
-                    'full': np.array([_generate_random_spd_matrix(n_dim)
-                                      + mincv * np.eye(n_dim)
+                    'tied': _generate_random_spd_matrix(n_features)
+                           + mincv * np.eye(n_features),
+                    'diag': (mincv + mincv * np.random.rand(n_mix, n_features))**2,
+                    'full': np.array([_generate_random_spd_matrix(n_features)
+                                      + mincv * np.eye(n_features)
                                       for x in xrange(n_mix)])}[cvtype]
         g.weights = hmm.normalize(np.random.rand(n_mix))
 
@@ -534,7 +534,7 @@ class TestGMMHMM(GMMHMMParams, SeedRandomNumberGeneratorTestCase):
         np.random.seed(self.seed)
         self.gmms = []
         for state in xrange(self.n_states):
-            self.gmms.append(self.create_random_gmm(self.n_mix, self.n_dim,
+            self.gmms.append(self.create_random_gmm(self.n_mix, self.n_features,
                                                     self.cvtype))
 
     def test_attributes(self):
@@ -548,7 +548,7 @@ class TestGMMHMM(GMMHMMParams, SeedRandomNumberGeneratorTestCase):
                           2 * self.startprob)
         self.assertRaises(ValueError, h.__setattr__, 'startprob', [])
         self.assertRaises(ValueError, h.__setattr__, 'startprob',
-                          np.zeros((self.n_states - 2, self.n_dim)))
+                          np.zeros((self.n_states - 2, self.n_features)))
 
         h.transmat = self.transmat
         assert_array_almost_equal(h.transmat, self.transmat)
@@ -582,7 +582,7 @@ class TestGMMHMM(GMMHMMParams, SeedRandomNumberGeneratorTestCase):
                        startprob=self.startprob, transmat=self.transmat,
                        gmms=self.gmms)
         samples = h.rvs(n)
-        self.assertEquals(samples.shape, (n, self.n_dim))
+        self.assertEquals(samples.shape, (n, self.n_features))
 
     def test_fit(self, params='stmwc', n_iter=5, verbose=True, **kwargs):
         h = hmm.GMMHMM(self.n_states)
-- 
GitLab