| import re |
| import nltk |
| from nltk.tokenize import word_tokenize |
| import phonemizer |
| from phonemizer.backend import EspeakBackend |
| import numpy as np |
|
|
| class TextProcessor: |
| def __init__(self): |
| |
| self.backend = EspeakBackend('en-us') |
| |
| def process(self, text): |
| """ |
| Process text into phonemes with duration and stress markers for singing |
| |
| Args: |
| text (str): Input text to be processed |
| |
| Returns: |
| tuple: (phonemes, durations, stress_markers) |
| """ |
| |
| text = self._clean_text(text) |
| |
| |
| tokens = word_tokenize(text) |
| |
| |
| phonemes = self._text_to_phonemes(text) |
| |
| |
| durations = self._estimate_durations(tokens, phonemes) |
| |
| |
| stress_markers = self._mark_stress(tokens, phonemes) |
| |
| return phonemes, durations, stress_markers |
| |
| def _clean_text(self, text): |
| """Clean and normalize text""" |
| |
| text = text.lower() |
| |
| |
| text = re.sub(r'\s+', ' ', text).strip() |
| |
| |
| text = re.sub(r'[^a-z0-9\s.,!?\'"-]', '', text) |
| |
| return text |
| |
| def _text_to_phonemes(self, text): |
| """Convert text to phoneme sequence""" |
| phonemes = self.backend.phonemize([text], strip=True)[0] |
| |
| |
| phonemes = re.sub(r'\s+', ' ', phonemes).strip() |
| |
| return phonemes |
| |
| def _estimate_durations(self, tokens, phonemes): |
| """Estimate phoneme durations for singing""" |
| |
| phoneme_list = phonemes.split() |
| |
| |
| base_duration = 0.1 |
| |
| |
| durations = [] |
| |
| for p in phoneme_list: |
| |
| if re.search(r'[aeiou]', p): |
| durations.append(base_duration * 2) |
| |
| elif re.search(r'[lrmnw]', p): |
| durations.append(base_duration * 1.5) |
| |
| else: |
| durations.append(base_duration) |
| |
| |
| for i, token in enumerate(tokens): |
| if token in ['.', ',', '!', '?', ';', ':']: |
| |
| durations.append(base_duration * 3 if token in ['.', '!', '?'] else base_duration * 1.5) |
| |
| return durations |
| |
| def _mark_stress(self, tokens, phonemes): |
| """Mark which phonemes should be stressed in singing""" |
| |
| stress_markers = np.zeros(len(phonemes.split())) |
| |
| |
| tagged = nltk.pos_tag(tokens) |
| |
| content_word_indices = [] |
| for i, (word, tag) in enumerate(tagged): |
| |
| if tag.startswith(('N', 'V', 'J', 'R')) and len(word) > 2: |
| content_word_indices.append(i) |
| |
| |
| phoneme_idx = 0 |
| word_idx = 0 |
| |
| phoneme_list = phonemes.split() |
| |
| |
| |
| for i, word in enumerate(tokens): |
| if i in content_word_indices: |
| |
| word_phonemes = len(word) |
| for j in range(word_phonemes): |
| if phoneme_idx + j < len(phoneme_list): |
| phon = phoneme_list[phoneme_idx + j] |
| if re.search(r'[aeiou]', phon): |
| stress_markers[phoneme_idx + j] = 1 |
| break |
| |
| phoneme_idx += len(word) |
| |
| return stress_markers |