diff --git a/scikits/learn/metrics.py b/scikits/learn/metrics.py
index 1b3ac7762783db61c36fd66e72ec6d78de221e66..ab5401067a008018c5848d29dc0219c345afd741 100644
--- a/scikits/learn/metrics.py
+++ b/scikits/learn/metrics.py
@@ -133,7 +133,7 @@ def auc(x, y):
     return area
 
 
-def precision(y_true, y_pred):
+def precision_score(y_true, y_pred, pos_label=1):
     """Compute the precision
 
     The precision is the ratio :math:`tp / (tp + fp)` where tp is the number of
@@ -151,14 +151,24 @@ def precision(y_true, y_pred):
     y_pred : array, shape = [n_samples]
         predicted targets
 
+    pos_label : int
+        in the binary classification case, give the label of the positive
+        class (default is 1)
+
     Returns
     =======
     precision : float
-    """
-    return precision_recall_fscore_support(y_true, y_pred)[0]
+        precision of the positive class in binary classification or weighted
+        avergage of the precision of each class for the multiclass task
+     """
+    p, _, _, s = precision_recall_fscore_support(y_true, y_pred)
+    if p.shape[0] == 2:
+        return p[pos_label]
+    else:
+        return np.average(p, weights=s)
 
 
-def recall(y_true, y_pred):
+def recall_score(y_true, y_pred, pos_label=1):
     """Compute the recall
 
     The recall is the ratio :math:`tp / (tp + fn)` where tp is the number of
@@ -175,14 +185,24 @@ def recall(y_true, y_pred):
     y_pred : array, shape = [n_samples]
         predicted targets
 
+    pos_label : int
+        in the binary classification case, give the label of the positive
+        class (default is 1)
+
     Returns
     =======
-    recall : array, shape = [n_unique_labels], dtype = np.double
+    recall : float
+        recall of the positive class in binary classification or weighted
+        avergage of the recall of each class for the multiclass task
     """
-    return precision_recall_fscore_support(y_true, y_pred)[1]
+    _, r, _, s = precision_recall_fscore_support(y_true, y_pred)
+    if r.shape[0] == 2:
+        return r[pos_label]
+    else:
+        return np.average(r, weights=s)
 
 
-def fbeta_score(y_true, y_pred, beta):
+def fbeta_score(y_true, y_pred, beta, pos_label=1):
     """Compute fbeta score
 
     The F_beta score can be interpreted as a weighted average of the precision
@@ -203,14 +223,25 @@ def fbeta_score(y_true, y_pred, beta):
 
     beta: float
 
+    pos_label : int
+        in the binary classification case, give the label of the positive
+        class (default is 1)
+
     Returns
     =======
-    fbeta_score : array, shape = [n_unique_labels], dtype = np.double
+    fbeta_score : float
+        fbeta_score of the positive class in binary classification or weighted
+        avergage of the fbeta_score of each class for the multiclass task
+
     """
-    return precision_recall_fscore(y_true, y_pred, beta=beta)[2]
+    _, _, f, s = precision_recall_fscore_support(y_true, y_pred, beta=beta)
+    if f.shape[0] == 2:
+        return f[pos_label]
+    else:
+        return np.average(f, weights=s)
 
 
-def f1_score(y_true, y_pred):
+def f1_score(y_true, y_pred, pos_label=1):
     """Compute f1 score
 
     The F1 score can be interpreted as a weighted average of the precision
@@ -222,6 +253,9 @@ def f1_score(y_true, y_pred):
 
     See: http://en.wikipedia.org/wiki/F1_score
 
+    In the multi-class case, this is the weighted average of the f1-score of
+    each class.
+
     Parameters
     ==========
     y_true : array, shape = [n_samples]
@@ -230,15 +264,21 @@ def f1_score(y_true, y_pred):
     y_pred : array, shape = [n_samples]
         predicted targets
 
+    pos_label : int
+        in the binary classification case, give the label of the positive class
+        (default is 1)
+
     Returns
     =======
-    f1_score : array, shape = [n_unique_labels], dtype = np.double
+    f1_score : float
+        f1_score of the positive class in binary classification or weighted
+        avergage of the f1_scores of each class for the multiclass task
 
     References
     ==========
     http://en.wikipedia.org/wiki/F1_score
     """
-    return fbeta_score(y_true, y_pred, 1)
+    return fbeta_score(y_true, y_pred, 1, pos_label=pos_label)
 
 
 def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None):
diff --git a/scikits/learn/tests/test_metrics.py b/scikits/learn/tests/test_metrics.py
index 4810405a30697122df378474c87b27d510b1bbbc..bd7a85f919286699d534eaefe5f5945c085a3730 100644
--- a/scikits/learn/tests/test_metrics.py
+++ b/scikits/learn/tests/test_metrics.py
@@ -15,10 +15,10 @@ from ..metrics import confusion_matrix
 from ..metrics import explained_variance
 from ..metrics import f1_score
 from ..metrics import mean_square_error
-from ..metrics import precision
 from ..metrics import precision_recall_curve
 from ..metrics import precision_recall_fscore_support
-from ..metrics import recall
+from ..metrics import precision_score
+from ..metrics import recall_score
 from ..metrics import roc_curve
 from ..metrics import zero_one
 
@@ -80,12 +80,25 @@ def test_precision_recall_f1_score_binary():
     """Test Precision Recall and F1 Score for binary classification task"""
     y_true, y_pred, _ = make_prediction(binary=True)
 
+    # detailed measures for each class
     p, r, f, s = precision_recall_fscore_support(y_true, y_pred)
     assert_array_almost_equal(p, [0.73, 0.75], 2)
     assert_array_almost_equal(r, [0.76, 0.72], 2)
     assert_array_almost_equal(f, [0.75, 0.74], 2)
     assert_array_equal(s, [25, 25])
 
+    # individual scoring function that can be used for grid search: in the
+    # binary class case the score is the value of the measure for the positive
+    # class (e.g. label == 1)
+    ps = precision_score(y_true, y_pred)
+    assert_array_almost_equal(ps, 0.75, 2)
+
+    rs = recall_score(y_true, y_pred)
+    assert_array_almost_equal(rs, 0.72, 2)
+
+    fs = f1_score(y_true, y_pred)
+    assert_array_almost_equal(fs, 0.74, 2)
+
 
 def test_confusion_matrix_binary():
     """Test confusion matrix - binary classification case"""
@@ -106,6 +119,19 @@ def test_precision_recall_f1_score_multiclass():
     assert_array_almost_equal(f, [0.87, 0.26, 0.62], 2)
     assert_array_equal(s, [25, 30, 20])
 
+    # individual scoring function that can be used for grid search: in the
+    # multiclass case the score is the wieghthed average of the individual
+    # class values hence f1_score is not necessary between precision_score and
+    # recall_score
+    ps = precision_score(y_true, y_pred)
+    assert_array_almost_equal(ps, 0.62, 2)
+
+    rs = recall_score(y_true, y_pred)
+    assert_array_almost_equal(rs, 0.61, 2)
+
+    fs = f1_score(y_true, y_pred)
+    assert_array_almost_equal(fs, 0.56, 2)
+
     # same prediction but with and explicit label ordering
     p, r, f, s = precision_recall_fscore_support(
         y_true, y_pred, labels=[0, 2, 1])
@@ -166,7 +192,6 @@ avg / total       0.62      0.61      0.56        75
     assert_equal(report, expected_report)
 
 
-
 def test_precision_recall_curve():
     """Test Precision-Recall and aread under PR curve"""
     y_true, _, probas_pred = make_prediction(binary=True)