File size: 568 Bytes
51db8d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import json

# Normalize the key before mapping
def normalization(text, normalization_rule):
    # Load normalization rule:
    with open(normalization_rule, "r", encoding="utf-8") as f:
        replace_dict = json.load(f)    

    # Lowercase the text
    text = text.lower()
    # Replace the words
    for old, new in replace_dict.items():
        text = text.replace(old.lower(), new.lower())
    # # Remove diacritics
    # text = unicodedata.normalize('NFD', text)
    # text = ''.join(c for c in text if unicodedata.category(c) != 'Mn')

    return text.strip()