Delete normalize_bm_words.py

#4
by Gaoussin - opened
Files changed (1) hide show
  1. normalize_bm_words.py +0 -38
normalize_bm_words.py DELETED
@@ -1,38 +0,0 @@
1
- import re
2
-
3
- # Normalize keys once (lowercase)
4
- CONTRACTIONS = {
5
- "a'": "aw",
6
- "an": "anw",
7
- "n'": "ne",
8
- "n": "ne",
9
- "ulu": "u",
10
- "b'a": "be a",
11
- "bɛ": "be",
12
- "nka": "nga",
13
- "loru": "duru"
14
- }
15
-
16
- # Escape + longest-first (critical for correctness)
17
- PATTERN = re.compile(
18
- r'(?<![^\W\d_])('
19
- + '|'.join(sorted(map(re.escape, CONTRACTIONS), key=len, reverse=True))
20
- + r')(?![^\W\d_])',
21
- flags=re.IGNORECASE | re.UNICODE
22
- )
23
-
24
- SPACE_QUESTION_RE = re.compile(r'\s*\?')
25
-
26
- def normalize_text(text: str) -> str:
27
- # 1) normalize spacing before ?
28
- text = SPACE_QUESTION_RE.sub(' ?', text)
29
-
30
- # 2) expand contractions
31
- text = PATTERN.sub(lambda m: CONTRACTIONS[m.group(0).lower()], text)
32
-
33
- # 3) capitalize first letter safely
34
- return text[:1].upper() + text[1:]
35
-
36
-
37
- sentence = "bbk'a b'a di n'i sonna. na a sɔɔni? a be na'a nɔfɛ?"
38
- print(normalize_text(sentence))