Spaces:
Running
Running
Update utils/text_norm.py
Browse files- utils/text_norm.py +8 -3
utils/text_norm.py
CHANGED
|
@@ -6,7 +6,12 @@ from utils.norm_config import norm_config
|
|
| 6 |
|
| 7 |
|
| 8 |
def text_normalize(
|
| 9 |
-
text,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
):
|
| 11 |
|
| 12 |
"""Given a text, normalize it by changing to lower case, removing punctuations, removing words that only contain digits and removing extra spaces
|
|
@@ -95,7 +100,7 @@ def text_normalize(
|
|
| 95 |
|
| 96 |
normalized_text = unidecode(normalized_text)
|
| 97 |
|
| 98 |
-
|
| 99 |
-
|
| 100 |
|
| 101 |
return normalized_text
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
def text_normalize(
|
| 9 |
+
text,
|
| 10 |
+
iso_code="xxx",
|
| 11 |
+
lower_case=True,
|
| 12 |
+
remove_numbers=False,
|
| 13 |
+
remove_brackets=False,
|
| 14 |
+
rm_extra_spaces=False,
|
| 15 |
):
|
| 16 |
|
| 17 |
"""Given a text, normalize it by changing to lower case, removing punctuations, removing words that only contain digits and removing extra spaces
|
|
|
|
| 100 |
|
| 101 |
normalized_text = unidecode(normalized_text)
|
| 102 |
|
| 103 |
+
if rm_extra_spaces:
|
| 104 |
+
normalized_text = re.sub(r"\s+", " ", normalized_text).strip()
|
| 105 |
|
| 106 |
return normalized_text
|