vinhngba2704's picture
First commit to this repo
51db8d1
import json
# Normalize the key before mapping
def normalization(text, normalization_rule):
# Load normalization rule:
with open(normalization_rule, "r", encoding="utf-8") as f:
replace_dict = json.load(f)
# Lowercase the text
text = text.lower()
# Replace the words
for old, new in replace_dict.items():
text = text.replace(old.lower(), new.lower())
# # Remove diacritics
# text = unicodedata.normalize('NFD', text)
# text = ''.join(c for c in text if unicodedata.category(c) != 'Mn')
return text.strip()