Spaces:
Running
Running
| import re | |
| def clean_text(text) -> str: | |
| # Strip and lower | |
| text = text.strip().lower() | |
| # Remove mentions (@username) and hashtags (#tag) | |
| text = re.sub(r'[@#][\w∆]+', '', text) | |
| # Remove extra spaces left behind | |
| text = re.sub(r'\s+', ' ', text) | |
| text = text.replace("\n", " ").replace("\t", " ") | |
| # Remove phone numbers | |
| text = re.sub(r'\b\d{10}\b', '', text) | |
| # Collapse repeated punctuation (e.g. !!!!) | |
| text = re.sub(r'([^\w\s])\1+', r'\1', text) | |
| # Collapse multiple spaces | |
| text = re.sub(r'\s+', ' ', text) | |
| # Fix "\'" like: can\'t, don\'t, etc | |
| text = re.sub(r"\\'", "'", text) | |
| text = re.sub(r"\\'", "'", text) | |
| return text.strip() |