Spaces:

marksverdhei
/

errant_gec

No application file

marksverdhei commited on Dec 4, 2025

Commit

6e6f86e

verified ·

1 Parent(s): 44444ec

Upload errant_gec.py with huggingface_hub

Files changed (1) hide show

errant_gec.py CHANGED Viewed

@@ -138,12 +138,23 @@ class Errant(evaluate.Metric):
         self._annotators[lang] = annotator
         return annotator
-    def _get_edits(self, annotator, orig_doc, cor_doc):
         """Extract edits between original and corrected documents.
         Returns a set of (o_start, o_end, o_str, c_str) tuples.
         """
-        edits = annotator.annotate(orig_doc, cor_doc)
         edit_set = set()
         for edit in edits:
             # Skip noop edits (no actual change)
@@ -210,8 +221,8 @@ class Errant(evaluate.Metric):
             ref_doc = annotator.parse(reference)
             # Get edit sets
-            hyp_edits = self._get_edits(annotator, orig_doc, hyp_doc)
-            ref_edits = self._get_edits(annotator, orig_doc, ref_doc)
             # Compute TP, FP, FN for this sample
             tp = len(ref_edits & hyp_edits)

         self._annotators[lang] = annotator
         return annotator
+    def _get_edits(self, annotator, orig_doc, cor_doc, lang: str = "en"):
         """Extract edits between original and corrected documents.
         Returns a set of (o_start, o_end, o_str, c_str) tuples.
+        For non-English languages, we skip classification since ERRANT's
+        classifier uses English-specific POS tag mappings.
         """
+        # Use align and merge without classification for non-English
+        # This matches the behavior of errant_parallel with -lev flag
+        alignment = annotator.align(orig_doc, cor_doc, lev=True)
+        edits = annotator.merge(alignment)
+        # Only classify for English (classifier uses English POS tags)
+        if lang == "en":
+            edits = [annotator.classify(edit) for edit in edits]
         edit_set = set()
         for edit in edits:
             # Skip noop edits (no actual change)
             ref_doc = annotator.parse(reference)
             # Get edit sets
+            hyp_edits = self._get_edits(annotator, orig_doc, hyp_doc, lang)
+            ref_edits = self._get_edits(annotator, orig_doc, ref_doc, lang)
             # Compute TP, FP, FN for this sample
             tp = len(ref_edits & hyp_edits)