File size: 1,186 Bytes
7b64dcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import re

__all__ = ('remove_emojis', 'convert_uppercase_words_to_lowercase', 'convert_comma_separated_numbers',)

emoji_pattern = re.compile(
    "["
    u"\U0001F600-\U0001F64F"  # emoticons
    u"\U0001F300-\U0001F5FF"  # symbols & pictographs
    u"\U0001F680-\U0001F6FF"  # transport & map symbols
    u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
    u"\U0001F900-\U0001F9FF"  # supplemental symbols and pictographs
    "]+", re.UNICODE
)

stars_pattern = re.compile(r'\*[\w\s]+\*', re.UNICODE)
bracket_pattern = re.compile(r'\(*[\w\s]+\)', re.UNICODE)


def remove_emojis(data):
    text = re.sub(stars_pattern, '', data)
    text = re.sub(bracket_pattern, '', text)
    text = re.sub(emoji_pattern, '', text).strip()
    return text.strip()


def convert_uppercase_words_to_lowercase(text):
    uppercase_words = re.findall(r'\b[A-Z]+\b', text)

    for word in uppercase_words:
        text = text.replace(word, word.lower())

    return text


def convert_comma_separated_numbers(text):
    comma_separated_numbers = re.findall(r'\b\d{1,3}(,\d{3})+\b', text)

    for number in comma_separated_numbers:
        text = text.replace(number, number.replace(',', ''))

    return text