diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py index 3ba9dfd4878689a150a85918ea96ff73a71efccc..82ae355a7f4f265e1ea3b8e6ee0a21c7dab38d69 100644 --- a/sklearn/datasets/samples_generator.py +++ b/sklearn/datasets/samples_generator.py @@ -25,7 +25,7 @@ def _generate_hypercube(samples, dimensions, rng): """Returns distinct binary samples of length dimensions """ if dimensions > 30: - return np.hstack([_generate_hypercube(samples, dimensions - 30, rng), + return np.hstack([rng.randint(2, size=(samples, dimensions - 30)), _generate_hypercube(samples, 30, rng)]) out = sample_without_replacement(2 ** dimensions, samples, random_state=rng).astype(dtype='>u4', diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index cd4d4148c07cc2ea5367f7446e69b4658ac1743b..7e0bcff90d66b741a04eb3513b47de4ba96e1469 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -50,6 +50,17 @@ def test_make_classification(): assert_equal(sum(y == 1), 25, "Unexpected number of samples in class #1") assert_equal(sum(y == 2), 65, "Unexpected number of samples in class #2") + # Test for n_features > 30 + X, y = make_classification(n_samples=2000, n_features=31, n_informative=31, + n_redundant=0, n_repeated=0, hypercube=True, + scale=0.5, random_state=0) + + assert_equal(X.shape, (2000, 31), "X shape mismatch") + assert_equal(y.shape, (2000,), "y shape mismatch") + assert_equal(np.unique(X.view([('', X.dtype)]*X.shape[1])).view(X.dtype) + .reshape(-1, X.shape[1]).shape[0], 2000, + "Unexpected number of unique rows") + def test_make_classification_informative_features(): """Test the construction of informative features in make_classification