sreepathi-ravikumar commited on
Commit
531192c
·
verified ·
1 Parent(s): e0b72ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -80
app.py CHANGED
@@ -36,7 +36,7 @@ API_KEY = "rkmentormindzofficaltokenkey12345"
36
 
37
  VOICE_EN = "en-IN-NeerjaNeural"
38
 
39
- # Pre-compiled regex patterns for speed
40
  URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
41
  TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
42
  BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
@@ -45,40 +45,40 @@ WHITESPACE_PATTERN = re.compile(r'\s+')
45
  SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
46
  SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
47
 
48
-
49
- @lru_cache(maxsize=1024)
50
  def clean_text_for_tts(text):
51
  """Cleans text before TTS with optimized regex and caching."""
52
  if not text:
53
  return ""
54
  text = str(text).strip()
55
  text = html.unescape(text)
56
-
 
57
  text = URL_PATTERN.sub('', text)
58
  text = TAG_PATTERN.sub('', text)
59
  text = BRACKET_PATTERN.sub('', text)
60
  text = SPECIAL_CHAR_PATTERN.sub('', text)
61
  text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
62
-
 
63
  for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
64
  text = text.replace(keyword, '').replace(keyword.upper(), '')
65
-
66
  text = unicodedata.normalize('NFKD', text)
67
  text = WHITESPACE_PATTERN.sub(' ', text)
68
  return text.strip()
69
 
70
-
71
  async def generate_safe_audio(text, voice, semaphore):
72
  """Generate clean audio with rate limiting."""
73
- async with semaphore:
74
  cleaned_text = clean_text_for_tts(text)
75
  if not cleaned_text:
76
  return None
77
-
78
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
79
  fname = temp_file.name
80
  temp_file.close()
81
-
82
  try:
83
  comm = edge_tts.Communicate(cleaned_text, voice=voice)
84
  await comm.save(fname)
@@ -86,28 +86,24 @@ async def generate_safe_audio(text, voice, semaphore):
86
  except Exception as e:
87
  print(f"Error generating audio: {e}")
88
  if os.path.exists(fname):
89
- try:
90
- os.unlink(fname)
91
- except:
92
- pass
93
  return None
94
 
95
-
96
  @lru_cache(maxsize=256)
97
  def smart_text_chunking(text, max_chars=80):
98
  """Cached text chunking for speed."""
99
  text = clean_text_for_tts(text)
100
  if not text:
101
- return tuple()
102
-
103
  sentences = SENTENCE_PATTERN.split(text)
104
  chunks = []
105
-
106
  for sentence in sentences:
107
  sentence = sentence.strip()
108
  if not sentence:
109
  continue
110
-
111
  if len(sentence) <= max_chars:
112
  chunks.append(sentence)
113
  else:
@@ -116,7 +112,7 @@ def smart_text_chunking(text, max_chars=80):
116
  part = part.strip()
117
  if not part:
118
  continue
119
-
120
  if len(part) <= max_chars:
121
  chunks.append(part)
122
  else:
@@ -132,105 +128,109 @@ def smart_text_chunking(text, max_chars=80):
132
  current_chunk = word
133
  if current_chunk:
134
  chunks.append(current_chunk.strip())
135
-
136
  return tuple(chunk for chunk in chunks if chunk.strip())
137
 
138
-
139
  def process_audio_segment_fast(audio_file):
140
  """Fast audio processing in separate thread."""
141
  try:
142
- if not os.path.exists(audio_file):
143
- return None
144
-
145
  segment = AudioSegment.from_file(audio_file)
146
  segment = normalize(segment)
147
-
 
148
  if len(segment) > 200:
149
  try:
150
  segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
151
  except:
152
- pass
153
-
154
  return segment
155
  except Exception as e:
156
  print(f"Warning: Error processing audio segment: {e}")
157
  return None
158
  finally:
 
159
  try:
160
  if os.path.exists(audio_file):
161
  os.unlink(audio_file)
162
  except:
163
  pass
164
 
165
-
166
  async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=10):
167
  """Ultra-optimized bilingual TTS with parallel processing."""
168
  print("Starting optimized bilingual TTS processing...")
169
-
170
  try:
171
  chunks = smart_text_chunking(text)
172
  if not chunks:
173
  print("Error: No valid text chunks after cleaning")
174
  return None
175
-
176
  print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
177
-
178
  is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
179
-
 
180
  semaphore = asyncio.Semaphore(max_concurrent)
181
-
 
182
  tasks = []
183
  for i, chunk in enumerate(chunks):
184
  is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
185
  voice = VOICE_TA if (is_bilingual_tamil and is_tamil) else (VOICE_TA or VOICE_EN)
186
  tasks.append(generate_safe_audio(chunk, voice, semaphore))
187
-
 
188
  audio_files = await asyncio.gather(*tasks, return_exceptions=True)
189
-
190
- processed_audio_files = [f for f in audio_files if isinstance(f, str) and f and os.path.exists(f)]
191
-
 
192
  if not processed_audio_files:
193
  print("Error: No audio was successfully generated")
194
  return None
195
-
196
  print(f"Successfully generated {len(processed_audio_files)} audio segments")
197
-
 
198
  with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
199
  audio_segments = list(executor.map(process_audio_segment_fast, processed_audio_files))
200
-
 
201
  audio_segments = [seg for seg in audio_segments if seg is not None]
202
-
203
  if not audio_segments:
204
  print("Error: No audio segments were successfully processed")
205
  return None
206
-
 
207
  print("Merging audio segments...")
208
  merged_audio = audio_segments[0]
209
  pause = AudioSegment.silent(duration=200)
210
-
211
  for segment in audio_segments[1:]:
212
  merged_audio += pause + segment
213
-
 
214
  print("Applying final audio processing...")
215
  merged_audio = merged_audio.compress_dynamic_range(
216
- threshold=-20.0,
217
- ratio=4.0,
218
- attack=5.0,
219
  release=50.0
220
  )
221
  merged_audio = normalize(merged_audio)
222
-
 
223
  merged_audio.export(output_file, format="mp3", bitrate="192k")
224
- print(f"✅ Audio successfully generated: {output_file}")
225
-
226
  return output_file
227
-
228
  except Exception as main_error:
229
  print(f"Main error in bilingual TTS: {main_error}")
230
- traceback.print_exc()
231
  return None
232
 
233
-
234
  async def generate_tts_optimized(id, lines, lang):
235
  """Optimized TTS generation function."""
236
  voice = {
@@ -267,46 +267,33 @@ async def generate_tts_optimized(id, lines, lang):
267
  "Czech": "cs-CZ-VlastaNeural",
268
  "Hungarian": "hu-HU-NoemiNeural"
269
  }
270
-
271
  audio_name = f"audio{id}.mp3"
272
  audio_path = os.path.join(AUDIO_DIR, audio_name)
273
-
274
  if "&&&" in lang:
275
  listf = lang.split("&&&")
276
  text = listf[0].strip()
277
- lang_name = listf[1].strip() if len(listf) > 1 else "English"
278
  voice_to_use = voice.get(lang_name, VOICE_EN)
279
  else:
280
- text = lines[id] if isinstance(lines, (list, tuple)) and id < len(lines) else str(lines)
281
  voice_to_use = voice.get(lang, VOICE_EN)
282
-
 
283
  output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=15)
284
-
285
  if output and os.path.exists(audio_path):
286
- try:
287
- audio = MP3(audio_path)
288
- duration = audio.info.length
289
- return duration, audio_path
290
- except Exception as e:
291
- print(f"Error reading audio file: {e}")
292
- return None, None
293
-
294
  return None, None
295
 
296
-
297
  def audio_func(id, lines, lang):
298
  """Synchronous wrapper for audio generation."""
299
- try:
300
- loop = asyncio.new_event_loop()
301
- asyncio.set_event_loop(loop)
302
- try:
303
- return loop.run_until_complete(generate_tts_optimized(id, lines, lang))
304
- finally:
305
- loop.close()
306
- except Exception as e:
307
- print(f"Error in audio_func: {e}")
308
- traceback.print_exc()
309
- return None, None
310
 
311
 
312
  def create_manim_script(problem_data, script_path, audio_path, scale=1):
 
36
 
37
  VOICE_EN = "en-IN-NeerjaNeural"
38
 
39
+ # Pre-compiled regex patterns for speed (compiled once, reused many times)
40
  URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
41
  TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
42
  BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
 
45
  SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
46
  SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
47
 
48
+ @lru_cache(maxsize=1024) # Cache cleaned text to avoid re-processing
 
49
  def clean_text_for_tts(text):
50
  """Cleans text before TTS with optimized regex and caching."""
51
  if not text:
52
  return ""
53
  text = str(text).strip()
54
  text = html.unescape(text)
55
+
56
+ # Use pre-compiled patterns (much faster)
57
  text = URL_PATTERN.sub('', text)
58
  text = TAG_PATTERN.sub('', text)
59
  text = BRACKET_PATTERN.sub('', text)
60
  text = SPECIAL_CHAR_PATTERN.sub('', text)
61
  text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
62
+
63
+ # Batch remove keywords (faster than multiple re.sub calls)
64
  for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
65
  text = text.replace(keyword, '').replace(keyword.upper(), '')
66
+
67
  text = unicodedata.normalize('NFKD', text)
68
  text = WHITESPACE_PATTERN.sub(' ', text)
69
  return text.strip()
70
 
 
71
  async def generate_safe_audio(text, voice, semaphore):
72
  """Generate clean audio with rate limiting."""
73
+ async with semaphore: # Limit concurrent TTS requests
74
  cleaned_text = clean_text_for_tts(text)
75
  if not cleaned_text:
76
  return None
77
+
78
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
79
  fname = temp_file.name
80
  temp_file.close()
81
+
82
  try:
83
  comm = edge_tts.Communicate(cleaned_text, voice=voice)
84
  await comm.save(fname)
 
86
  except Exception as e:
87
  print(f"Error generating audio: {e}")
88
  if os.path.exists(fname):
89
+ os.unlink(fname)
 
 
 
90
  return None
91
 
 
92
  @lru_cache(maxsize=256)
93
  def smart_text_chunking(text, max_chars=80):
94
  """Cached text chunking for speed."""
95
  text = clean_text_for_tts(text)
96
  if not text:
97
+ return tuple() # Return tuple for hashability (required by lru_cache)
98
+
99
  sentences = SENTENCE_PATTERN.split(text)
100
  chunks = []
101
+
102
  for sentence in sentences:
103
  sentence = sentence.strip()
104
  if not sentence:
105
  continue
106
+
107
  if len(sentence) <= max_chars:
108
  chunks.append(sentence)
109
  else:
 
112
  part = part.strip()
113
  if not part:
114
  continue
115
+
116
  if len(part) <= max_chars:
117
  chunks.append(part)
118
  else:
 
128
  current_chunk = word
129
  if current_chunk:
130
  chunks.append(current_chunk.strip())
131
+
132
  return tuple(chunk for chunk in chunks if chunk.strip())
133
 
 
134
  def process_audio_segment_fast(audio_file):
135
  """Fast audio processing in separate thread."""
136
  try:
 
 
 
137
  segment = AudioSegment.from_file(audio_file)
138
  segment = normalize(segment)
139
+
140
+ # Only strip silence for longer segments
141
  if len(segment) > 200:
142
  try:
143
  segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
144
  except:
145
+ pass # Skip if fails
146
+
147
  return segment
148
  except Exception as e:
149
  print(f"Warning: Error processing audio segment: {e}")
150
  return None
151
  finally:
152
+ # Cleanup temp file immediately
153
  try:
154
  if os.path.exists(audio_file):
155
  os.unlink(audio_file)
156
  except:
157
  pass
158
 
 
159
  async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=10):
160
  """Ultra-optimized bilingual TTS with parallel processing."""
161
  print("Starting optimized bilingual TTS processing...")
162
+
163
  try:
164
  chunks = smart_text_chunking(text)
165
  if not chunks:
166
  print("Error: No valid text chunks after cleaning")
167
  return None
168
+
169
  print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
170
+
171
  is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
172
+
173
+ # Semaphore to limit concurrent TTS requests (prevents rate limiting)
174
  semaphore = asyncio.Semaphore(max_concurrent)
175
+
176
+ # Prepare all tasks
177
  tasks = []
178
  for i, chunk in enumerate(chunks):
179
  is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
180
  voice = VOICE_TA if (is_bilingual_tamil and is_tamil) else (VOICE_TA or VOICE_EN)
181
  tasks.append(generate_safe_audio(chunk, voice, semaphore))
182
+
183
+ # Generate all audio files concurrently
184
  audio_files = await asyncio.gather(*tasks, return_exceptions=True)
185
+
186
+ # Filter successful files
187
+ processed_audio_files = [f for f in audio_files if isinstance(f, str) and f]
188
+
189
  if not processed_audio_files:
190
  print("Error: No audio was successfully generated")
191
  return None
192
+
193
  print(f"Successfully generated {len(processed_audio_files)} audio segments")
194
+
195
+ # Process audio segments in parallel using ThreadPoolExecutor
196
  with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
197
  audio_segments = list(executor.map(process_audio_segment_fast, processed_audio_files))
198
+
199
+ # Filter out None segments
200
  audio_segments = [seg for seg in audio_segments if seg is not None]
201
+
202
  if not audio_segments:
203
  print("Error: No audio segments were successfully processed")
204
  return None
205
+
206
+ # Merge audio segments (fast concatenation)
207
  print("Merging audio segments...")
208
  merged_audio = audio_segments[0]
209
  pause = AudioSegment.silent(duration=200)
210
+
211
  for segment in audio_segments[1:]:
212
  merged_audio += pause + segment
213
+
214
+ # Apply final processing (compression and normalization)
215
  print("Applying final audio processing...")
216
  merged_audio = merged_audio.compress_dynamic_range(
217
+ threshold=-20.0,
218
+ ratio=4.0,
219
+ attack=5.0,
220
  release=50.0
221
  )
222
  merged_audio = normalize(merged_audio)
223
+
224
+ # Export with high quality
225
  merged_audio.export(output_file, format="mp3", bitrate="192k")
226
+ print(f" Audio successfully generated: {output_file}")
227
+
228
  return output_file
229
+
230
  except Exception as main_error:
231
  print(f"Main error in bilingual TTS: {main_error}")
 
232
  return None
233
 
 
234
  async def generate_tts_optimized(id, lines, lang):
235
  """Optimized TTS generation function."""
236
  voice = {
 
267
  "Czech": "cs-CZ-VlastaNeural",
268
  "Hungarian": "hu-HU-NoemiNeural"
269
  }
270
+
271
  audio_name = f"audio{id}.mp3"
272
  audio_path = os.path.join(AUDIO_DIR, audio_name)
273
+
274
  if "&&&" in lang:
275
  listf = lang.split("&&&")
276
  text = listf[0].strip()
277
+ lang_name = listf[1].strip()
278
  voice_to_use = voice.get(lang_name, VOICE_EN)
279
  else:
280
+ text = lines[id]
281
  voice_to_use = voice.get(lang, VOICE_EN)
282
+
283
+ # Increase max_concurrent for more speed (adjust based on your system)
284
  output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=15)
285
+
286
  if output and os.path.exists(audio_path):
287
+ audio = MP3(audio_path)
288
+ duration = audio.info.length
289
+ return duration, audio_path
290
+
 
 
 
 
291
  return None, None
292
 
 
293
  def audio_func(id, lines, lang):
294
  """Synchronous wrapper for audio generation."""
295
+ return asyncio.run(generate_tts_optimized(id, lines, lang))
296
+
 
 
 
 
 
 
 
 
 
297
 
298
 
299
  def create_manim_script(problem_data, script_path, audio_path, scale=1):