diff --git a/doc/datasets/twenty_newsgroups.rst b/doc/datasets/twenty_newsgroups.rst index e0e845a04f53992cb59e1e49b4bc046c8f3dd6e0..55fe227682190547eb5944f2b84931ab14dba0f4 100644 --- a/doc/datasets/twenty_newsgroups.rst +++ b/doc/datasets/twenty_newsgroups.rst @@ -197,8 +197,8 @@ It loses even more if we also strip this metadata from the training data: >>> clf.fit(vectors, newsgroups_train.target) >>> vectors_test = vectorizer.transform(newsgroups_test.data) >>> pred = clf.predict(vectors_test) - >>> metrics.f1_score(newsgroups_test.target, pred, average='weighted') - 0.73160869205141166 + >>> metrics.f1_score(newsgroups_test.target, pred, average='macro') + 0.65437545099490202 Some other classifiers cope better with this harder version of the task. Try running :ref:`example_model_selection_grid_search_text_feature_extraction.py` with and without diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 2794948ce93d5c3c4758656906af1c364f0802fc..5f93333e585cff750d9fe6f265a0b92dd8f0cade 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -469,7 +469,7 @@ def test_precision_recall_f1_score_multiclass_pos_label_none(): # compute scores with default labels introspection p, r, f, s = precision_recall_fscore_support(y_true, y_pred, pos_label=None, - average='binary') + average='macro') def test_zero_precision_recall():