diff --git a/scikits/learn/metrics.py b/scikits/learn/metrics.py index 1b3ac7762783db61c36fd66e72ec6d78de221e66..ab5401067a008018c5848d29dc0219c345afd741 100644 --- a/scikits/learn/metrics.py +++ b/scikits/learn/metrics.py @@ -133,7 +133,7 @@ def auc(x, y): return area -def precision(y_true, y_pred): +def precision_score(y_true, y_pred, pos_label=1): """Compute the precision The precision is the ratio :math:`tp / (tp + fp)` where tp is the number of @@ -151,14 +151,24 @@ def precision(y_true, y_pred): y_pred : array, shape = [n_samples] predicted targets + pos_label : int + in the binary classification case, give the label of the positive + class (default is 1) + Returns ======= precision : float - """ - return precision_recall_fscore_support(y_true, y_pred)[0] + precision of the positive class in binary classification or weighted + avergage of the precision of each class for the multiclass task + """ + p, _, _, s = precision_recall_fscore_support(y_true, y_pred) + if p.shape[0] == 2: + return p[pos_label] + else: + return np.average(p, weights=s) -def recall(y_true, y_pred): +def recall_score(y_true, y_pred, pos_label=1): """Compute the recall The recall is the ratio :math:`tp / (tp + fn)` where tp is the number of @@ -175,14 +185,24 @@ def recall(y_true, y_pred): y_pred : array, shape = [n_samples] predicted targets + pos_label : int + in the binary classification case, give the label of the positive + class (default is 1) + Returns ======= - recall : array, shape = [n_unique_labels], dtype = np.double + recall : float + recall of the positive class in binary classification or weighted + avergage of the recall of each class for the multiclass task """ - return precision_recall_fscore_support(y_true, y_pred)[1] + _, r, _, s = precision_recall_fscore_support(y_true, y_pred) + if r.shape[0] == 2: + return r[pos_label] + else: + return np.average(r, weights=s) -def fbeta_score(y_true, y_pred, beta): +def fbeta_score(y_true, y_pred, beta, pos_label=1): """Compute fbeta score The F_beta score can be interpreted as a weighted average of the precision @@ -203,14 +223,25 @@ def fbeta_score(y_true, y_pred, beta): beta: float + pos_label : int + in the binary classification case, give the label of the positive + class (default is 1) + Returns ======= - fbeta_score : array, shape = [n_unique_labels], dtype = np.double + fbeta_score : float + fbeta_score of the positive class in binary classification or weighted + avergage of the fbeta_score of each class for the multiclass task + """ - return precision_recall_fscore(y_true, y_pred, beta=beta)[2] + _, _, f, s = precision_recall_fscore_support(y_true, y_pred, beta=beta) + if f.shape[0] == 2: + return f[pos_label] + else: + return np.average(f, weights=s) -def f1_score(y_true, y_pred): +def f1_score(y_true, y_pred, pos_label=1): """Compute f1 score The F1 score can be interpreted as a weighted average of the precision @@ -222,6 +253,9 @@ def f1_score(y_true, y_pred): See: http://en.wikipedia.org/wiki/F1_score + In the multi-class case, this is the weighted average of the f1-score of + each class. + Parameters ========== y_true : array, shape = [n_samples] @@ -230,15 +264,21 @@ def f1_score(y_true, y_pred): y_pred : array, shape = [n_samples] predicted targets + pos_label : int + in the binary classification case, give the label of the positive class + (default is 1) + Returns ======= - f1_score : array, shape = [n_unique_labels], dtype = np.double + f1_score : float + f1_score of the positive class in binary classification or weighted + avergage of the f1_scores of each class for the multiclass task References ========== http://en.wikipedia.org/wiki/F1_score """ - return fbeta_score(y_true, y_pred, 1) + return fbeta_score(y_true, y_pred, 1, pos_label=pos_label) def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None): diff --git a/scikits/learn/tests/test_metrics.py b/scikits/learn/tests/test_metrics.py index 4810405a30697122df378474c87b27d510b1bbbc..bd7a85f919286699d534eaefe5f5945c085a3730 100644 --- a/scikits/learn/tests/test_metrics.py +++ b/scikits/learn/tests/test_metrics.py @@ -15,10 +15,10 @@ from ..metrics import confusion_matrix from ..metrics import explained_variance from ..metrics import f1_score from ..metrics import mean_square_error -from ..metrics import precision from ..metrics import precision_recall_curve from ..metrics import precision_recall_fscore_support -from ..metrics import recall +from ..metrics import precision_score +from ..metrics import recall_score from ..metrics import roc_curve from ..metrics import zero_one @@ -80,12 +80,25 @@ def test_precision_recall_f1_score_binary(): """Test Precision Recall and F1 Score for binary classification task""" y_true, y_pred, _ = make_prediction(binary=True) + # detailed measures for each class p, r, f, s = precision_recall_fscore_support(y_true, y_pred) assert_array_almost_equal(p, [0.73, 0.75], 2) assert_array_almost_equal(r, [0.76, 0.72], 2) assert_array_almost_equal(f, [0.75, 0.74], 2) assert_array_equal(s, [25, 25]) + # individual scoring function that can be used for grid search: in the + # binary class case the score is the value of the measure for the positive + # class (e.g. label == 1) + ps = precision_score(y_true, y_pred) + assert_array_almost_equal(ps, 0.75, 2) + + rs = recall_score(y_true, y_pred) + assert_array_almost_equal(rs, 0.72, 2) + + fs = f1_score(y_true, y_pred) + assert_array_almost_equal(fs, 0.74, 2) + def test_confusion_matrix_binary(): """Test confusion matrix - binary classification case""" @@ -106,6 +119,19 @@ def test_precision_recall_f1_score_multiclass(): assert_array_almost_equal(f, [0.87, 0.26, 0.62], 2) assert_array_equal(s, [25, 30, 20]) + # individual scoring function that can be used for grid search: in the + # multiclass case the score is the wieghthed average of the individual + # class values hence f1_score is not necessary between precision_score and + # recall_score + ps = precision_score(y_true, y_pred) + assert_array_almost_equal(ps, 0.62, 2) + + rs = recall_score(y_true, y_pred) + assert_array_almost_equal(rs, 0.61, 2) + + fs = f1_score(y_true, y_pred) + assert_array_almost_equal(fs, 0.56, 2) + # same prediction but with and explicit label ordering p, r, f, s = precision_recall_fscore_support( y_true, y_pred, labels=[0, 2, 1]) @@ -166,7 +192,6 @@ avg / total 0.62 0.61 0.56 75 assert_equal(report, expected_report) - def test_precision_recall_curve(): """Test Precision-Recall and aread under PR curve""" y_true, _, probas_pred = make_prediction(binary=True)