import re # Define the contractions dictionary CONTRACTIONS = { # Multi-word contractions (keys are space-separated) "ka a": "k'a", "a be a": "a b'a", "be a": "b'a", "ko o": "k'o", "di i":"d'i", "be i":"b'i" # Example Single-word contraction added: #"kaa": "k'aa" # Assuming this is a desired single-word contraction } def normalize_bm_output(text: str) -> str: """ Normalizes specific contractions (both single-word and multi-word) in a string. """ # 1. Ensure the text is lowercase as specified in your requirement text = text.lower() # --- Part 1: Handle Multi-Word Contractions --- # Filter for and sort multi-word keys by length descending to prevent partial matches multi_word_contractions = {k: v for k, v in CONTRACTIONS.items() if ' ' in k} sorted_multi_word = sorted(multi_word_contractions.items(), key=lambda item: len(item[0]), reverse=True) # Apply replacement for multi-word phrases for original_phrase, contracted_form in sorted_multi_word: # Create a pattern to match the full phrase, ensuring it's surrounded by # word boundaries or start/end of string. # re.escape handles any special characters in the key pattern = r'\b' + re.escape(original_phrase) + r'\b' # Replace the full matched pattern with the contracted form text = re.sub(pattern, contracted_form, text, flags=re.IGNORECASE) # --- Part 2: Handle Single-Word Contractions --- # Filter for single-word keys (no spaces) single_word_contractions = {k: v for k, v in CONTRACTIONS.items() if ' ' not in k} # Use a regular expression and a function to map the words based on the dictionary def replace_single_word(match): """Looks up the matched word in the single-word contractions dictionary.""" word = match.group(0) # Use .get() with the original word as the default to ensure non-contracted # words are left alone. return single_word_contractions.get(word, word) # The pattern r'\b\w+\b' matches every single whole word in the text. # The replacement function replace_single_word is called for every match. text = re.sub(r'\b\w+\b', replace_single_word, text) return text[:1].upper() + text[1:] # --- Example Usage with both types of contractions --- #input_text_4 = "ka a di a be i fɛ kɔgɔ ne be a fɔ." #print(f"Original Text: {input_text_4}") #normalized_4 = normalize_bm_output(input_text_4) #print(f"Normalized Text: {normalized_4}\n")