Spaces:
Sleeping
Sleeping
File size: 702 Bytes
888aba6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
import re
def clean_text(text) -> str:
# Strip and lower
text = text.strip().lower()
# Remove mentions (@username) and hashtags (#tag)
text = re.sub(r'[@#][\w∆]+', '', text)
# Remove extra spaces left behind
text = re.sub(r'\s+', ' ', text)
text = text.replace("\n", " ").replace("\t", " ")
# Remove phone numbers
text = re.sub(r'\b\d{10}\b', '', text)
# Collapse repeated punctuation (e.g. !!!!)
text = re.sub(r'([^\w\s])\1+', r'\1', text)
# Collapse multiple spaces
text = re.sub(r'\s+', ' ', text)
# Fix "\'" like: can\'t, don\'t, etc
text = re.sub(r"\\'", "'", text)
text = re.sub(r"\\'", "'", text)
return text.strip() |