| import re | |
| __all__ = ('remove_emojis', 'convert_uppercase_words_to_lowercase', 'convert_comma_separated_numbers',) | |
| emoji_pattern = re.compile( | |
| "[" | |
| u"\U0001F600-\U0001F64F" # emoticons | |
| u"\U0001F300-\U0001F5FF" # symbols & pictographs | |
| u"\U0001F680-\U0001F6FF" # transport & map symbols | |
| u"\U0001F1E0-\U0001F1FF" # flags (iOS) | |
| u"\U0001F900-\U0001F9FF" # supplemental symbols and pictographs | |
| "]+", re.UNICODE | |
| ) | |
| stars_pattern = re.compile(r'\*[\w\s]+\*', re.UNICODE) | |
| bracket_pattern = re.compile(r'\(*[\w\s]+\)', re.UNICODE) | |
| def remove_emojis(data): | |
| text = re.sub(stars_pattern, '', data) | |
| text = re.sub(bracket_pattern, '', text) | |
| text = re.sub(emoji_pattern, '', text).strip() | |
| return text.strip() | |
| def convert_uppercase_words_to_lowercase(text): | |
| uppercase_words = re.findall(r'\b[A-Z]+\b', text) | |
| for word in uppercase_words: | |
| text = text.replace(word, word.lower()) | |
| return text | |
| def convert_comma_separated_numbers(text): | |
| comma_separated_numbers = re.findall(r'\b\d{1,3}(,\d{3})+\b', text) | |
| for number in comma_separated_numbers: | |
| text = text.replace(number, number.replace(',', '')) | |
| return text | |