Delete normalize_bm_output.py

#3
by Gaoussin - opened
Files changed (1) hide show
  1. normalize_bm_output.py +0 -67
normalize_bm_output.py DELETED
@@ -1,67 +0,0 @@
1
- import re
2
-
3
- # Define the contractions dictionary
4
- CONTRACTIONS = {
5
- # Multi-word contractions (keys are space-separated)
6
- "ka a": "k'a",
7
- "a be a": "a b'a",
8
- "be a": "b'a",
9
- "ko o": "k'o",
10
- "di i":"d'i",
11
- "be i":"b'i"
12
- # Example Single-word contraction added:
13
- #"kaa": "k'aa" # Assuming this is a desired single-word contraction
14
- }
15
-
16
- def normalize_bm_output(text: str) -> str:
17
- """
18
- Normalizes specific contractions (both single-word and multi-word)
19
- in a string.
20
- """
21
-
22
- # 1. Ensure the text is lowercase as specified in your requirement
23
- text = text.lower()
24
-
25
- # --- Part 1: Handle Multi-Word Contractions ---
26
-
27
- # Filter for and sort multi-word keys by length descending to prevent partial matches
28
- multi_word_contractions = {k: v for k, v in CONTRACTIONS.items() if ' ' in k}
29
- sorted_multi_word = sorted(multi_word_contractions.items(), key=lambda item: len(item[0]), reverse=True)
30
-
31
- # Apply replacement for multi-word phrases
32
- for original_phrase, contracted_form in sorted_multi_word:
33
- # Create a pattern to match the full phrase, ensuring it's surrounded by
34
- # word boundaries or start/end of string.
35
- # re.escape handles any special characters in the key
36
- pattern = r'\b' + re.escape(original_phrase) + r'\b'
37
-
38
- # Replace the full matched pattern with the contracted form
39
- text = re.sub(pattern, contracted_form, text, flags=re.IGNORECASE)
40
-
41
- # --- Part 2: Handle Single-Word Contractions ---
42
-
43
- # Filter for single-word keys (no spaces)
44
- single_word_contractions = {k: v for k, v in CONTRACTIONS.items() if ' ' not in k}
45
-
46
- # Use a regular expression and a function to map the words based on the dictionary
47
-
48
- def replace_single_word(match):
49
- """Looks up the matched word in the single-word contractions dictionary."""
50
- word = match.group(0)
51
- # Use .get() with the original word as the default to ensure non-contracted
52
- # words are left alone.
53
- return single_word_contractions.get(word, word)
54
-
55
- # The pattern r'\b\w+\b' matches every single whole word in the text.
56
- # The replacement function replace_single_word is called for every match.
57
- text = re.sub(r'\b\w+\b', replace_single_word, text)
58
-
59
- return text[:1].upper() + text[1:]
60
-
61
- # --- Example Usage with both types of contractions ---
62
-
63
- #input_text_4 = "ka a di a be i fɛ kɔgɔ ne be a fɔ."
64
-
65
- #print(f"Original Text: {input_text_4}")
66
- #normalized_4 = normalize_bm_output(input_text_4)
67
- #print(f"Normalized Text: {normalized_4}\n")