import json # Normalize the key before mapping def normalization(text, normalization_rule): # Load normalization rule: with open(normalization_rule, "r", encoding="utf-8") as f: replace_dict = json.load(f) # Lowercase the text text = text.lower() # Replace the words for old, new in replace_dict.items(): text = text.replace(old.lower(), new.lower()) # # Remove diacritics # text = unicodedata.normalize('NFD', text) # text = ''.join(c for c in text if unicodedata.category(c) != 'Mn') return text.strip()