import re def clean_text(text) -> str: # Strip and lower text = text.strip().lower() # Remove mentions (@username) and hashtags (#tag) text = re.sub(r'[@#][\w∆]+', '', text) # Remove extra spaces left behind text = re.sub(r'\s+', ' ', text) text = text.replace("\n", " ").replace("\t", " ") # Remove phone numbers text = re.sub(r'\b\d{10}\b', '', text) # Collapse repeated punctuation (e.g. !!!!) text = re.sub(r'([^\w\s])\1+', r'\1', text) # Collapse multiple spaces text = re.sub(r'\s+', ' ', text) # Fix "\'" like: can\'t, don\'t, etc text = re.sub(r"\\'", "'", text) text = re.sub(r"\\'", "'", text) return text.strip()