liumaolin
Rename 'src/VoiceDialogue' to 'src/voice_dialogue'.
511ff0c
import re
__all__ = ('remove_emojis', 'convert_uppercase_words_to_lowercase', 'convert_comma_separated_numbers',)
emoji_pattern = re.compile(
"["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map symbols
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
u"\U0001F900-\U0001F9FF" # supplemental symbols and pictographs
"]+", re.UNICODE
)
stars_pattern = re.compile(r'\*[\w\s]+\*', re.UNICODE)
bracket_pattern = re.compile(r'\(*[\w\s]+\)', re.UNICODE)
def remove_emojis(data):
text = re.sub(stars_pattern, '', data)
text = re.sub(bracket_pattern, '', text)
text = re.sub(emoji_pattern, '', text).strip()
return text.strip()
def convert_uppercase_words_to_lowercase(text):
uppercase_words = re.findall(r'\b[A-Z]+\b', text)
for word in uppercase_words:
text = text.replace(word, word.lower())
return text
def convert_comma_separated_numbers(text):
comma_separated_numbers = re.findall(r'\b\d{1,3}(,\d{3})+\b', text)
for number in comma_separated_numbers:
text = text.replace(number, number.replace(',', ''))
return text