From 68088f71bccf06802da34c1bed9b34c9d46ceaed Mon Sep 17 00:00:00 2001
From: Gilles Louppe <g.louppe@gmail.com>
Date: Sat, 6 Aug 2011 19:54:01 +0200
Subject: [PATCH] Added tests for the samples generator module.

---
 .../datasets/tests/test_samples_generator.py  | 120 +++++++++++++++++-
 1 file changed, 116 insertions(+), 4 deletions(-)

diff --git a/scikits/learn/datasets/tests/test_samples_generator.py b/scikits/learn/datasets/tests/test_samples_generator.py
index 5372a36096..15ec7c32fd 100644
--- a/scikits/learn/datasets/tests/test_samples_generator.py
+++ b/scikits/learn/datasets/tests/test_samples_generator.py
@@ -1,18 +1,130 @@
 import numpy as np
-from numpy.testing import assert_equal, assert_array_almost_equal
+from numpy.testing import assert_equal, assert_almost_equal, \
+                          assert_array_almost_equal, assert_array_less
 
+from .. import make_classification
+from .. import make_regression
+from .. import make_blobs
+from .. import make_friedman1
+from .. import make_friedman2
+from .. import make_friedman3
+from .. import make_low_rank_matrix
 from .. import make_sparse_coded_signal
+from .. import make_sparse_uncorrelated
+from .. import make_spd_matrix
+from .. import make_swiss_roll
+from .. import make_s_curve
 
+def test_make_classification():
+    X, y = make_classification(n_samples=100, n_features=20, n_informative=5, 
+                               n_classes=3, n_clusters_per_class=1, 
+                               weights=[0.1, 0.25, 0.65], random_state=0)
+
+    assert_equal(X.shape, (100, 20), "X shape mismatch")
+    assert_equal(y.shape, (100,), "y shape mismatch")
+    assert_equal(np.unique(y).shape, (3,), "Unexpected number of classes")
+    assert_equal(sum(y == 0), 10, "Unexpected number of samples in class #0")
+    assert_equal(sum(y == 1), 25, "Unexpected number of samples in class #1")
+    assert_equal(sum(y == 2), 65, "Unexpected number of samples in class #2")
+
+def test_make_regression():
+    X, y, c = make_regression(n_samples=50, n_features=10, n_informative=3, 
+                              coef=True, bias=0.0, random_state=0)
+
+    assert_equal(X.shape, (50, 10), "X shape mismatch")
+    assert_equal(y.shape, (50,), "y shape mismatch")
+    assert_equal(c.shape, (10,), "coef shape mismatch")
+    assert_equal(sum(c != 0.0), 3, "Unexpected number of informative features")
+    assert_array_almost_equal(y, np.dot(X, c))
+
+def test_make_blobs():
+    X, y = make_blobs(n_samples=50, n_features=5, centers=3, random_state=0)
+
+    assert_equal(X.shape, (50, 5), "X shape mismatch")
+    assert_equal(y.shape, (50,), "y shape mismatch")
+    assert_equal(np.unique(y).shape, (3,), "Unexpected number of blobs")
+
+def test_make_friedman1():
+    X, y = make_friedman1(n_samples=5, n_features=10, noise=0.0, random_state=0)
+
+    assert_equal(X.shape, (5, 10), "X shape mismatch")
+    assert_equal(y.shape, (5,), "y shape mismatch")
+
+    assert_array_almost_equal(y, 10 * np.sin(np.pi * X[:, 0] * X[:, 1]) 
+                                 + 20 * (X[:, 2] - 0.5) ** 2 \
+                                 + 10 * X[:, 3] + 5 * X[:, 4])
+
+def test_make_friedman2():
+    X, y = make_friedman2(n_samples=5, noise=0.0, random_state=0)
+
+    assert_equal(X.shape, (5, 4), "X shape mismatch")
+    assert_equal(y.shape, (5,), "y shape mismatch")
+
+    assert_array_almost_equal(y, (X[:, 0] ** 2 
+                                 + (X[:, 1] * X[:, 2] 
+                                    - 1 / (X[:, 1] * X[:, 3])) ** 2) ** 0.5)
+
+def test_make_friedman3():
+    X, y = make_friedman3(n_samples=5, noise=0.0, random_state=0)
+
+    assert_equal(X.shape, (5, 4), "X shape mismatch")
+    assert_equal(y.shape, (5,), "y shape mismatch")
+
+    assert_array_almost_equal(y, np.arctan((X[:, 1] * X[:, 2] 
+                                            - 1 / (X[:, 1] * X[:, 3])) 
+                                           / X[:, 0]))
+
+def test_make_low_rank_matrix():
+    X = make_low_rank_matrix(n_samples=50, n_features=25, effective_rank=5, 
+                             tail_strength=0.01, random_state=0)
+
+    assert_equal(X.shape, (50, 25), "X shape mismatch")
+
+    from numpy.linalg import svd
+    u, s, v = svd(X)
+    assert sum(s) - 5 < 0.1, "X rank is not approximately 5"
 
 def test_make_sparse_coded_signal():
     Y, D, X = make_sparse_coded_signal(n_samples=5, n_components=8,
                                            n_features=10, n_nonzero_coefs=3,
                                            random_state=0)
-    assert_equal(Y.shape, (10, 5), 'Data shape mismatch')
-    assert_equal(D.shape, (10, 8), 'Dictionary shape mismatch')
-    assert_equal(X.shape, (8, 5), 'Code shape mismatch')
+    assert_equal(Y.shape, (10, 5), "Y shape mismatch")
+    assert_equal(D.shape, (10, 8), "D shape mismatch")
+    assert_equal(X.shape, (8, 5), "X shape mismatch")
     for col in X.T:
         assert_equal(len(np.flatnonzero(col)), 3, 'Non-zero coefs mismatch')
     assert_equal(np.dot(D, X), Y)
     assert_array_almost_equal(np.sqrt((D ** 2).sum(axis=0)),
                               np.ones(D.shape[1]))
+
+def test_make_sparse_uncorrelated():
+    X, y = make_sparse_uncorrelated(n_samples=5, n_features=10, random_state=0)
+
+    assert_equal(X.shape, (5, 10), "X shape mismatch")
+    assert_equal(y.shape, (5,), "y shape mismatch")
+
+def test_make_spd_matrix():
+    X = make_spd_matrix(n_dim=5, random_state=0)
+
+    assert_equal(X.shape, (5, 5), "X shape mismatch")
+    assert_array_almost_equal(X, X.T)
+
+    from numpy.linalg import eig
+    eigenvalues, _ = eig(X)
+    assert_equal(eigenvalues > 0, np.array([True] * 5), "X is not positive-definite")    
+
+def test_make_swiss_roll():
+    X, t = make_swiss_roll(n_samples=5, noise=0.0, random_state=0)
+
+    assert_equal(X.shape, (5, 3), "X shape mismatch")
+    assert_equal(t.shape, (5,), "t shape mismatch")
+    assert_equal(X[:, 0], t * np.cos(t))
+    assert_equal(X[:, 2], t * np.sin(t))
+
+def test_make_s_curve():
+    X, t = make_s_curve(n_samples=5, noise=0.0, random_state=0)
+
+    assert_equal(X.shape, (5, 3), "X shape mismatch")
+    assert_equal(t.shape, (5,), "t shape mismatch")
+    assert_equal(X[:, 0], np.sin(t))
+    assert_equal(X[:, 2], np.sign(t) * (np.cos(t) - 1))
-- 
GitLab