diff --git a/doc/datasets/twenty_newsgroups.rst b/doc/datasets/twenty_newsgroups.rst index 01c2a53ff77e5401068b3991fe052ff5559ddfcb..e0e845a04f53992cb59e1e49b4bc046c8f3dd6e0 100644 --- a/doc/datasets/twenty_newsgroups.rst +++ b/doc/datasets/twenty_newsgroups.rst @@ -132,8 +132,8 @@ which is fast to train and achieves a decent F-score:: >>> clf = MultinomialNB(alpha=.01) >>> clf.fit(vectors, newsgroups_train.target) >>> pred = clf.predict(vectors_test) - >>> metrics.f1_score(newsgroups_test.target, pred, average='weighted') - 0.88251152461278892 + >>> metrics.f1_score(newsgroups_test.target, pred, average='macro') + 0.88213592402729568 (The example :ref:`example_text_document_classification_20newsgroups.py` shuffles the training and test data, instead of segmenting by time, and in that case @@ -182,8 +182,8 @@ blocks, and quotation blocks respectively. ... categories=categories) >>> vectors_test = vectorizer.transform(newsgroups_test.data) >>> pred = clf.predict(vectors_test) - >>> metrics.f1_score(pred, newsgroups_test.target, average='weighted') - 0.78409163025839435 + >>> metrics.f1_score(pred, newsgroups_test.target, average='macro') + 0.77310350681274775 This classifier lost over a lot of its F-score, just because we removed metadata that has little to do with topic classification. diff --git a/examples/model_selection/grid_search_digits.py b/examples/model_selection/grid_search_digits.py index 40ed573247efd0f61ec9de000ea5c2c8d3adc160..13755b0bc8c105ac1dd5f636dee405bb8ba523b1 100644 --- a/examples/model_selection/grid_search_digits.py +++ b/examples/model_selection/grid_search_digits.py @@ -51,7 +51,7 @@ for score in scores: print() clf = GridSearchCV(SVC(C=1), tuned_parameters, cv=5, - scoring='%s_weighted' % score) + scoring='%s_macro' % score) clf.fit(X_train, y_train) print("Best parameters set found on development set:") diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index f28e6cc77093b12a9db2dff23bdbc05565dd15fb..2794948ce93d5c3c4758656906af1c364f0802fc 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -469,7 +469,7 @@ def test_precision_recall_f1_score_multiclass_pos_label_none(): # compute scores with default labels introspection p, r, f, s = precision_recall_fscore_support(y_true, y_pred, pos_label=None, - average='weighted') + average='binary') def test_zero_precision_recall(): @@ -482,10 +482,10 @@ def test_zero_precision_recall(): y_pred = np.array([2, 0, 1, 1, 2, 0]) assert_almost_equal(precision_score(y_true, y_pred, - average='weighted'), 0.0, 2) - assert_almost_equal(recall_score(y_true, y_pred, average='weighted'), + average='macro'), 0.0, 2) + assert_almost_equal(recall_score(y_true, y_pred, average='macro'), 0.0, 2) - assert_almost_equal(f1_score(y_true, y_pred, average='weighted'), + assert_almost_equal(f1_score(y_true, y_pred, average='macro'), 0.0, 2) finally: diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py index 670180695e4523d619354d1c4b35ead034be5cd5..df9f6f988c0c50080ecf1f2c9c07133704f8cba1 100644 --- a/sklearn/svm/tests/test_svm.py +++ b/sklearn/svm/tests/test_svm.py @@ -439,9 +439,9 @@ def test_auto_weight(): y_pred = clf.fit(X[unbalanced], y[unbalanced]).predict(X) clf.set_params(class_weight='balanced') y_pred_balanced = clf.fit(X[unbalanced], y[unbalanced],).predict(X) - assert_true(metrics.f1_score(y, y_pred, average='weighted') + assert_true(metrics.f1_score(y, y_pred, average='macro') <= metrics.f1_score(y, y_pred_balanced, - average='weighted')) + average='macro')) def test_bad_input():