squad_precision_recall

Sleeping

App Files Files Community

omidf commited on Jan 18, 2023

Commit

eee73fc

1 Parent(s): 63e7fe5

Update compute_score.py

Browse files

Files changed (1) hide show

compute_score.py +25 -3

compute_score.py CHANGED Viewed

@@ -26,6 +26,25 @@ def normalize_answer(s):
     return white_space_fix(remove_articles(remove_punc(lower(s))))
 def f1_score(prediction, ground_truth):
     prediction_tokens = normalize_answer(prediction).split()
@@ -53,7 +72,7 @@ def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
 def compute_score(dataset, predictions):
-    f1 = exact_match = total = 0
     for article in dataset:
         for paragraph in article["paragraphs"]:
             for qa in paragraph["qas"]:
@@ -66,11 +85,14 @@ def compute_score(dataset, predictions):
                 prediction = predictions[qa["id"]]
                 exact_match += metric_max_over_ground_truths(exact_match_score, prediction, ground_truths)
                 f1 += metric_max_over_ground_truths(f1_score, prediction, ground_truths)
     exact_match = 100.0 * exact_match / total
     f1 = 100.0 * f1 / total
-    return {"exact_match": exact_match, "f1": f1}
 if __name__ == "__main__":

     return white_space_fix(remove_articles(remove_punc(lower(s))))
+def precision_score(prediction, ground_truth):
+    prediction_tokens = normalize_answer(prediction).split()
+    ground_truth_tokens = normalize_answer(ground_truth).split()
+    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
+    num_same = sum(common.values())
+    if num_same == 0:
+        return 0
+    precision = 1.0 * num_same / len(prediction_tokens)
+    return precision
+def recall_score(prediction, ground_truth):
+    prediction_tokens = normalize_answer(prediction).split()
+    ground_truth_tokens = normalize_answer(ground_truth).split()
+    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
+    num_same = sum(common.values())
+    if num_same == 0:
+        return 0
+    recall = 1.0 * num_same / len(ground_truth_tokens)
+    return recall
 def f1_score(prediction, ground_truth):
     prediction_tokens = normalize_answer(prediction).split()
 def compute_score(dataset, predictions):
+    precision = recall = f1 = exact_match = total = 0
     for article in dataset:
         for paragraph in article["paragraphs"]:
             for qa in paragraph["qas"]:
                 prediction = predictions[qa["id"]]
                 exact_match += metric_max_over_ground_truths(exact_match_score, prediction, ground_truths)
                 f1 += metric_max_over_ground_truths(f1_score, prediction, ground_truths)
+                precision += metric_max_over_ground_truths(precision_score, prediction, ground_truths)
+                recall += metric_max_over_ground_truths(recall_score, prediction, ground_truths)
     exact_match = 100.0 * exact_match / total
     f1 = 100.0 * f1 / total
+    recall = 100.0 * recall / total
+    precision = 100.0 * precision / total
+    return {"exact_match": exact_match, "f1": f1, "precision": precision , "recall": recall}
 if __name__ == "__main__":