diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py
index 00085a32af94f0c33d8b42902438ced863213356..0c58d04c275811b3b086d1a2018f53a8c5ca132b 100644
--- a/sklearn/tests/test_multioutput.py
+++ b/sklearn/tests/test_multioutput.py
@@ -356,7 +356,8 @@ def generate_multilabel_dataset_with_correlations():
     X, y = make_classification(n_samples=1000,
                                n_features=100,
                                n_classes=16,
-                               n_informative=10)
+                               n_informative=10,
+                               random_state=0)
 
     Y_multi = np.array([[int(yyy) for yyy in format(yy, '#06b')[2:]]
                         for yy in y])
@@ -470,22 +471,17 @@ def test_classifier_chain_vs_independent_models():
     # Verify that an ensemble of classifier chains (each of length
     # N) can achieve a higher Jaccard similarity score than N independent
     # models
-    yeast = fetch_mldata('yeast')
-    X = yeast['data']
-    Y = yeast['target'].transpose().toarray()
-    X_train = X[:2000, :]
-    X_test = X[2000:, :]
-    Y_train = Y[:2000, :]
-    Y_test = Y[2000:, :]
+    X, Y = generate_multilabel_dataset_with_correlations()
+    X_train = X[:600, :]
+    X_test = X[600:, :]
+    Y_train = Y[:600, :]
+    Y_test = Y[600:, :]
 
     ovr = OneVsRestClassifier(LogisticRegression())
     ovr.fit(X_train, Y_train)
     Y_pred_ovr = ovr.predict(X_test)
 
-    chain = ClassifierChain(LogisticRegression(),
-                            order=np.array([0, 2, 4, 6, 8, 10,
-                                            12, 1, 3, 5, 7, 9,
-                                            11, 13]))
+    chain = ClassifierChain(LogisticRegression())
     chain.fit(X_train, Y_train)
     Y_pred_chain = chain.predict(X_test)