Spaces:
Running
Running
| import re | |
| # Define the contractions dictionary | |
| CONTRACTIONS = { | |
| # Multi-word contractions (keys are space-separated) | |
| "ka a": "k'a", | |
| "a be a": "a b'a", | |
| "be a": "b'a", | |
| "ko o": "k'o", | |
| "di i":"d'i", | |
| "be i":"b'i" | |
| # Example Single-word contraction added: | |
| #"kaa": "k'aa" # Assuming this is a desired single-word contraction | |
| } | |
| def normalize_bm_output(text: str) -> str: | |
| """ | |
| Normalizes specific contractions (both single-word and multi-word) | |
| in a string. | |
| """ | |
| # 1. Ensure the text is lowercase as specified in your requirement | |
| text = text.lower() | |
| # --- Part 1: Handle Multi-Word Contractions --- | |
| # Filter for and sort multi-word keys by length descending to prevent partial matches | |
| multi_word_contractions = {k: v for k, v in CONTRACTIONS.items() if ' ' in k} | |
| sorted_multi_word = sorted(multi_word_contractions.items(), key=lambda item: len(item[0]), reverse=True) | |
| # Apply replacement for multi-word phrases | |
| for original_phrase, contracted_form in sorted_multi_word: | |
| # Create a pattern to match the full phrase, ensuring it's surrounded by | |
| # word boundaries or start/end of string. | |
| # re.escape handles any special characters in the key | |
| pattern = r'\b' + re.escape(original_phrase) + r'\b' | |
| # Replace the full matched pattern with the contracted form | |
| text = re.sub(pattern, contracted_form, text, flags=re.IGNORECASE) | |
| # --- Part 2: Handle Single-Word Contractions --- | |
| # Filter for single-word keys (no spaces) | |
| single_word_contractions = {k: v for k, v in CONTRACTIONS.items() if ' ' not in k} | |
| # Use a regular expression and a function to map the words based on the dictionary | |
| def replace_single_word(match): | |
| """Looks up the matched word in the single-word contractions dictionary.""" | |
| word = match.group(0) | |
| # Use .get() with the original word as the default to ensure non-contracted | |
| # words are left alone. | |
| return single_word_contractions.get(word, word) | |
| # The pattern r'\b\w+\b' matches every single whole word in the text. | |
| # The replacement function replace_single_word is called for every match. | |
| text = re.sub(r'\b\w+\b', replace_single_word, text) | |
| return text[:1].upper() + text[1:] | |
| # --- Example Usage with both types of contractions --- | |
| #input_text_4 = "ka a di a be i fɛ kɔgɔ ne be a fɔ." | |
| #print(f"Original Text: {input_text_4}") | |
| #normalized_4 = normalize_bm_output(input_text_4) | |
| #print(f"Normalized Text: {normalized_4}\n") |