Spaces:
Sleeping
Sleeping
| """ from https://github.com/keithito/tacotron """ | |
| ''' | |
| Cleaners are transformations that run over the input text at both training and eval time. | |
| ''' | |
| import re | |
| from unidecode import unidecode | |
| from phonemizer import phonemize | |
| _whitespace_re = re.compile(r'\s+') | |
| def lowercase(text): | |
| return text.lower() | |
| def collapse_whitespace(text): | |
| return re.sub(_whitespace_re, ' ', text) | |
| def replace_quote(text): | |
| return text.replace('’', "'") | |
| def remove_special_characters(text): | |
| # Define the characters to remove | |
| characters_to_remove = ['«', '»', '–', '[', ']', '{', '}', '|'] | |
| # Remove the characters from the text | |
| for char in characters_to_remove: | |
| text = text.replace(char, '') | |
| return text | |
| def remove_hyphen_at_start(text): | |
| # Check if the text starts with '-' | |
| if text.startswith('-'): | |
| # Remove the hyphen at the start | |
| text = text[1:].lstrip() | |
| return text | |
| def basic_cleaners(text): | |
| '''Basic pipeline that lowercases and collapses whitespace without transliteration.''' | |
| text = lowercase(text) | |
| text = text.replace('å','å') | |
| text = text.replace('´', "'") | |
| text = text.replace('à','a') | |
| text = collapse_whitespace(text) | |
| text = replace_quote(text) | |
| text = remove_special_characters(text) | |
| text = remove_hyphen_at_start(text) | |
| return text | |