Spaces:

bdsaglam
/

jer

Runtime error

App Files Files Community

bdsaglam commited on Feb 3, 2024

Commit

e31d84c

1 Parent(s): 4097b95

update jer metric to add equality operation argument

Browse files

Files changed (1) hide show

jer.py +22 -16

jer.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 """TODO: Add a description here."""
 from typing import Iterable
 import evaluate
@@ -43,17 +44,16 @@ Args:
         should be a string with tokens separated by spaces.
     references: list of reference for each prediction. Each
         reference should be a string with tokens separated by spaces.
 Returns:
-    accuracy: description of the first score,
-    another_score: description of the second score,
 Examples:
-    Examples should be written in doctest format, and should illustrate how
-    to use the function.
-    >>> my_new_module = evaluate.load("my_new_module")
-    >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
     >>> print(results)
-    {'accuracy': 1.0}
 """
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
@@ -85,7 +85,7 @@ class jer(evaluate.Metric):
         # TODO: Download external resources if needed
         pass
-    def _compute(self, predictions, references):
         """Returns the scores"""
         score_dicts = [
             self._compute_single(prediction=prediction, reference=reference)
@@ -93,22 +93,28 @@ class jer(evaluate.Metric):
         ]
         return {('mean_' + key): np.mean([scores[key] for scores in score_dicts]) for key in score_dicts[0].keys()}
-    def _compute_single(self, *, prediction: Iterable[str | tuple | int], reference: Iterable[str | tuple | int]):
         reference_set = set(reference)
         assert len(reference) == len(reference_set), f"Duplicates found in the reference list {reference}"
         prediction_set = set(prediction)
-        TP = len(reference_set & prediction_set)
-        FP = len(prediction_set - reference_set)
-        FN = len(reference_set - prediction_set)
         # Calculate metrics
-        precision = TP / (TP + FP) if TP + FP > 0 else 0
-        recall = TP / (TP + FN) if TP + FN > 0 else 0
         f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
         return {
             'precision': precision,
             'recall': recall,
             'f1': f1_score
-        }

 # limitations under the License.
 """TODO: Add a description here."""
+from operator import eq
 from typing import Iterable
 import evaluate
         should be a string with tokens separated by spaces.
     references: list of reference for each prediction. Each
         reference should be a string with tokens separated by spaces.
+    eq_fn: function to compare two items. Defaults to the equality operator.
 Returns:
+    recall:
+    precision:
+    f1:
 Examples:
+    >>> jer = evaluate.load("jer")
+    >>> results = jer.compute(references=[["Baris | play | tennis", "Deniz | travel | London"]], predictions=[["Baris | play | tennis"]])
     >>> print(results)
+    {'recall': 0.5, 'precision': 1.0, 'f1': 0.6666666666666666}
 """
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
         # TODO: Download external resources if needed
         pass
+    def _compute(self, predictions, references, eq_fn=eq):
         """Returns the scores"""
         score_dicts = [
             self._compute_single(prediction=prediction, reference=reference)
         ]
         return {('mean_' + key): np.mean([scores[key] for scores in score_dicts]) for key in score_dicts[0].keys()}
+    def _compute_single(self, *, prediction: Iterable[str | tuple | int], reference: Iterable[str | tuple | int], eq_fn=eq):
         reference_set = set(reference)
         assert len(reference) == len(reference_set), f"Duplicates found in the reference list {reference}"
         prediction_set = set(prediction)
+        tp = sum(int(is_in(item, prediction, eq_fn=eq_fn)) for item in reference)
+        fp = len(prediction_set) - tp
+        fn = len(reference_set) - tp
         # Calculate metrics
+        precision = tp / (tp + fp) if tp + fp > 0 else 0
+        recall = tp / (tp + fn) if tp + fn > 0 else 0
         f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
         return {
             'precision': precision,
             'recall': recall,
             'f1': f1_score
+        }
+def is_in(target, collection: Iterable, eq_fn=eq) -> bool:
+    for item in collection:
+        if eq_fn(item, target):
+            return True
+    return False