Spaces:

Gaoussin
/

bm-translator

Running

Gaoussin commited on Dec 13, 2025

Commit

894584d

verified ·

1 Parent(s): 50a21cf

Upload normalize_bm_output.py

Files changed (1) hide show

normalize_bm_output.py ADDED Viewed

+import re
+# Normalize keys once (lowercase)
+CONTRACTIONS = {
+    "ka a": "k'a",
+    "be a": "b'a",
+    "ne be": "n'be",
+    "taa a": "ta'a",
+    "ko o": "k'o"
+}
+# Escape + longest-first (critical for correctness)
+PATTERN = re.compile(
+    r'(?<![^\W\d_])('
+    + '|'.join(sorted(map(re.escape, CONTRACTIONS), key=len, reverse=True))
+    + r')(?![^\W\d_])',
+    flags=re.IGNORECASE | re.UNICODE
+)
+SPACE_QUESTION_RE = re.compile(r'\s*\?')
+def normalize_output(text: str) -> str:
+    # 1) normalize spacing before ?
+    text = SPACE_QUESTION_RE.sub(' ?', text)
+    # 2) expand contractions
+    text = PATTERN.sub(lambda m: CONTRACTIONS[m.group(0).lower()], text)
+    # 3) capitalize first letter safely
+    return text[:1].upper() + text[1:]
+sentence = "a be ka a di ne ma ne be taa a fɛ?"
+print(normalize_text(sentence))