Vaishnavi0404 commited on
Commit
726598b
·
verified ·
1 Parent(s): 86e14e9

Create text_processor.py

Browse files
Files changed (1) hide show
  1. text_processor.py +127 -0
text_processor.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import nltk
3
+ from nltk.tokenize import word_tokenize
4
+ import phonemizer
5
+ from phonemizer.backend import EspeakBackend
6
+ import numpy as np
7
+
8
+ class TextProcessor:
9
+ def __init__(self):
10
+ # Initialize phonemizer with English backend
11
+ self.backend = EspeakBackend('en-us')
12
+
13
+ def process(self, text):
14
+ """
15
+ Process text into phonemes with duration and stress markers for singing
16
+
17
+ Args:
18
+ text (str): Input text to be processed
19
+
20
+ Returns:
21
+ tuple: (phonemes, durations, stress_markers)
22
+ """
23
+ # Clean text
24
+ text = self._clean_text(text)
25
+
26
+ # Tokenize
27
+ tokens = word_tokenize(text)
28
+
29
+ # Get phonemes
30
+ phonemes = self._text_to_phonemes(text)
31
+
32
+ # Estimate durations
33
+ durations = self._estimate_durations(tokens, phonemes)
34
+
35
+ # Mark stress for singing emphasis
36
+ stress_markers = self._mark_stress(tokens, phonemes)
37
+
38
+ return phonemes, durations, stress_markers
39
+
40
+ def _clean_text(self, text):
41
+ """Clean and normalize text"""
42
+ # Convert to lowercase
43
+ text = text.lower()
44
+
45
+ # Remove extra whitespace
46
+ text = re.sub(r'\s+', ' ', text).strip()
47
+
48
+ # Remove special characters but keep punctuation important for phrasing
49
+ text = re.sub(r'[^a-z0-9\s.,!?\'"-]', '', text)
50
+
51
+ return text
52
+
53
+ def _text_to_phonemes(self, text):
54
+ """Convert text to phoneme sequence"""
55
+ phonemes = self.backend.phonemize([text], strip=True)[0]
56
+
57
+ # Clean up phoneme representation
58
+ phonemes = re.sub(r'\s+', ' ', phonemes).strip()
59
+
60
+ return phonemes
61
+
62
+ def _estimate_durations(self, tokens, phonemes):
63
+ """Estimate phoneme durations for singing"""
64
+ # Split phonemes into list
65
+ phoneme_list = phonemes.split()
66
+
67
+ # Default duration (in seconds) for each phoneme
68
+ base_duration = 0.1
69
+
70
+ # Assign longer durations to vowels and certain consonants
71
+ durations = []
72
+
73
+ for p in phoneme_list:
74
+ # Vowels get longer duration
75
+ if re.search(r'[aeiou]', p):
76
+ durations.append(base_duration * 2)
77
+ # Certain consonants get medium duration
78
+ elif re.search(r'[lrmnw]', p):
79
+ durations.append(base_duration * 1.5)
80
+ # Other phonemes get standard duration
81
+ else:
82
+ durations.append(base_duration)
83
+
84
+ # Adjust for punctuation (create pauses)
85
+ for i, token in enumerate(tokens):
86
+ if token in ['.', ',', '!', '?', ';', ':']:
87
+ # Add a pause duration at the end of sentences or phrases
88
+ durations.append(base_duration * 3 if token in ['.', '!', '?'] else base_duration * 1.5)
89
+
90
+ return durations
91
+
92
+ def _mark_stress(self, tokens, phonemes):
93
+ """Mark which phonemes should be stressed in singing"""
94
+ # Simple heuristic: mark first syllable of content words
95
+ stress_markers = np.zeros(len(phonemes.split()))
96
+
97
+ # POS tagging to identify content words
98
+ tagged = nltk.pos_tag(tokens)
99
+
100
+ content_word_indices = []
101
+ for i, (word, tag) in enumerate(tagged):
102
+ # Content words: nouns, verbs, adjectives, adverbs
103
+ if tag.startswith(('N', 'V', 'J', 'R')) and len(word) > 2:
104
+ content_word_indices.append(i)
105
+
106
+ # Estimate phoneme positions for content words and mark stress
107
+ phoneme_idx = 0
108
+ word_idx = 0
109
+
110
+ phoneme_list = phonemes.split()
111
+
112
+ # This is a simplified approach - in practice, you'd need
113
+ # a more sophisticated alignment between words and phonemes
114
+ for i, word in enumerate(tokens):
115
+ if i in content_word_indices:
116
+ # Mark the first vowel phoneme of this word
117
+ word_phonemes = len(word) # This is an approximation
118
+ for j in range(word_phonemes):
119
+ if phoneme_idx + j < len(phoneme_list):
120
+ phon = phoneme_list[phoneme_idx + j]
121
+ if re.search(r'[aeiou]', phon):
122
+ stress_markers[phoneme_idx + j] = 1
123
+ break
124
+
125
+ phoneme_idx += len(word) # Approximate phoneme position
126
+
127
+ return stress_markers