Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -55,10 +55,7 @@ def clean_and_standardize_text(text):
|
|
| 55 |
text = re.sub(r'\(\s*', '(', text)
|
| 56 |
text = re.sub(r'\s*\)', ')', text)
|
| 57 |
|
| 58 |
-
# 11.
|
| 59 |
-
text = re.sub(r'\b(\d+)\b', '', text)
|
| 60 |
-
|
| 61 |
-
# 12. Improve spacing around punctuations
|
| 62 |
while ' .' in text:
|
| 63 |
text = text.replace(' .', '.')
|
| 64 |
|
|
@@ -72,7 +69,7 @@ def clean_and_standardize_text(text):
|
|
| 72 |
text = text.replace('- -', '-')
|
| 73 |
text = text.replace('. -', '.')
|
| 74 |
|
| 75 |
-
#
|
| 76 |
text = re.sub(r'([.,]){2,}', r'\1', text)
|
| 77 |
text = re.sub(r'(?<=[:.])[:.]+', '', text)
|
| 78 |
|
|
|
|
| 55 |
text = re.sub(r'\(\s*', '(', text)
|
| 56 |
text = re.sub(r'\s*\)', ')', text)
|
| 57 |
|
| 58 |
+
# 11. Improve spacing around punctuations
|
|
|
|
|
|
|
|
|
|
| 59 |
while ' .' in text:
|
| 60 |
text = text.replace(' .', '.')
|
| 61 |
|
|
|
|
| 69 |
text = text.replace('- -', '-')
|
| 70 |
text = text.replace('. -', '.')
|
| 71 |
|
| 72 |
+
# 12. Detect two punctuation marks in a row, keeping the last
|
| 73 |
text = re.sub(r'([.,]){2,}', r'\1', text)
|
| 74 |
text = re.sub(r'(?<=[:.])[:.]+', '', text)
|
| 75 |
|