File size: 1,186 Bytes
7b64dcd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import re
__all__ = ('remove_emojis', 'convert_uppercase_words_to_lowercase', 'convert_comma_separated_numbers',)
emoji_pattern = re.compile(
"["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map symbols
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
u"\U0001F900-\U0001F9FF" # supplemental symbols and pictographs
"]+", re.UNICODE
)
stars_pattern = re.compile(r'\*[\w\s]+\*', re.UNICODE)
bracket_pattern = re.compile(r'\(*[\w\s]+\)', re.UNICODE)
def remove_emojis(data):
text = re.sub(stars_pattern, '', data)
text = re.sub(bracket_pattern, '', text)
text = re.sub(emoji_pattern, '', text).strip()
return text.strip()
def convert_uppercase_words_to_lowercase(text):
uppercase_words = re.findall(r'\b[A-Z]+\b', text)
for word in uppercase_words:
text = text.replace(word, word.lower())
return text
def convert_comma_separated_numbers(text):
comma_separated_numbers = re.findall(r'\b\d{1,3}(,\d{3})+\b', text)
for number in comma_separated_numbers:
text = text.replace(number, number.replace(',', ''))
return text
|