Update eval.py
Browse files
eval.py
CHANGED
|
@@ -130,11 +130,10 @@ def normalize_text(original_text: str, dataset: str) -> str:
|
|
| 130 |
text = re.sub('[úùüû]', 'u', text)
|
| 131 |
text = re.sub('[«»]', '', text)
|
| 132 |
text = re.sub('\s+', ' ', text)
|
| 133 |
-
text = re.sub('<
|
| 134 |
-
text = re.sub('<
|
| 135 |
-
text = re.sub('<
|
| 136 |
text = re.sub('<inaudible>', 'xxx', text)
|
| 137 |
-
text = re.sub('[<>]', '', text)
|
| 138 |
|
| 139 |
# # In addition, we can normalize the target text, e.g. removing new lines characters etc...
|
| 140 |
# # note that order is important here!
|
|
@@ -143,7 +142,7 @@ def normalize_text(original_text: str, dataset: str) -> str:
|
|
| 143 |
# for t in token_sequences_to_ignore:
|
| 144 |
# text = " ".join(text.split(t))
|
| 145 |
|
| 146 |
-
return text
|
| 147 |
|
| 148 |
|
| 149 |
def main(args):
|
|
|
|
| 130 |
text = re.sub('[úùüû]', 'u', text)
|
| 131 |
text = re.sub('[«»]', '', text)
|
| 132 |
text = re.sub('\s+', ' ', text)
|
| 133 |
+
text = re.sub('<ee>', 'eee', text)
|
| 134 |
+
text = re.sub('<qq>', 'qqq', text)
|
| 135 |
+
text = re.sub('<mm>', 'mmm', text)
|
| 136 |
text = re.sub('<inaudible>', 'xxx', text)
|
|
|
|
| 137 |
|
| 138 |
# # In addition, we can normalize the target text, e.g. removing new lines characters etc...
|
| 139 |
# # note that order is important here!
|
|
|
|
| 142 |
# for t in token_sequences_to_ignore:
|
| 143 |
# text = " ".join(text.split(t))
|
| 144 |
|
| 145 |
+
return text
|
| 146 |
|
| 147 |
|
| 148 |
def main(args):
|