Spaces:

aauss
/

tcp_accuracy

Sleeping

aauss commited on Jan 20

Commit

98a831a

1 Parent(s): 78587a7

Add return average flag.

Files changed (2) hide show

tcp_accuracy.py CHANGED Viewed

@@ -93,9 +93,14 @@ class TCPAccuracy(evaluate.Metric):
             return match.group(1).replace("GMT", "").strip()
         return None
-    def _compute(self, predictions, references, subset: str | list[str]):
         """Returns the scores"""
-        # TODO: Compute the different scores of the module
         if isinstance(subset, str):
             subset = [subset] * len(predictions)
         predictions = [self.extract_boxed_answer(p) for p in predictions]
@@ -104,6 +109,6 @@ class TCPAccuracy(evaluate.Metric):
             for r, s in zip(references, subset)
         ]
         accuracy = [int(i == j) for i, j in zip(predictions, references)]
-        return {
-            "accuracy": accuracy,
-        }

             return match.group(1).replace("GMT", "").strip()
         return None
+    def _compute(
+        self,
+        predictions,
+        references,
+        subset: str | list[str],
+        return_average: bool = True,
+    ):
         """Returns the scores"""
         if isinstance(subset, str):
             subset = [subset] * len(predictions)
         predictions = [self.extract_boxed_answer(p) for p in predictions]
             for r, s in zip(references, subset)
         ]
         accuracy = [int(i == j) for i, j in zip(predictions, references)]
+        if return_average:
+            return {"accuracy": sum(accuracy) / len(accuracy)}
+        return {"accuracy": accuracy}

tests/test_metric.py CHANGED Viewed

@@ -21,5 +21,13 @@ def test_metric():
         predictions=[response_1, response_2, response_3],
         references=references,
         subset=subsets,
     )
     assert results["accuracy"] == [1, 0, 1]

         predictions=[response_1, response_2, response_3],
         references=references,
         subset=subsets,
+        return_average=False,
     )
     assert results["accuracy"] == [1, 0, 1]
+    metric = TCPAccuracy()
+    results = metric.compute(
+        predictions=[response_1, response_2, response_3],
+        references=references,
+        subset=subsets,
+    )
+    assert results["accuracy"] == 2/3