sreepathi-ravikumar commited on
Commit
6e0cf4b
·
verified ·
1 Parent(s): 0bb2b49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +149 -154
app.py CHANGED
@@ -47,7 +47,7 @@ import hashlib
47
  import json
48
  from concurrent.futures import ThreadPoolExecutor
49
  from functools import lru_cache
50
- from typing import List, Tuple, Optional
51
 
52
  import edge_tts
53
  from pydub import AudioSegment
@@ -56,8 +56,6 @@ from mutagen.mp3 import MP3
56
 
57
  # Voice configuration
58
  VOICE_EN = "en-IN-NeerjaNeural"
59
-
60
- # Directory paths - ensure they exist
61
  AUDIO_DIR = os.path.join(os.getcwd(), "audio")
62
  os.makedirs(AUDIO_DIR, exist_ok=True)
63
 
@@ -67,16 +65,16 @@ TAG_PATTERN = re.compile(r'<[^>]*>')
67
  BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
68
  SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
69
  WHITESPACE_PATTERN = re.compile(r'\s+')
70
- # Improved sentence splitting - more conservative
71
- SENTENCE_PATTERN = re.compile(r'(?<=[.!?।॥])\s+(?=[A-ZА-ЯА-Я\u0B80-\u0BFF\u0900-\u097F])')
72
- # Avoid splitting on commas in numbers
73
  SUB_PATTERN = re.compile(r'(?<!\d),(?!\d)\s*')
74
 
75
  # Cache for chunking results
76
- _chunking_cache = {}
77
 
78
  def clean_text_for_tts(text: str) -> str:
79
- """Cleans text before TTS with proper Unicode handling."""
80
  if not text:
81
  return ""
82
 
@@ -106,129 +104,146 @@ def clean_text_for_tts(text: str) -> str:
106
 
107
  return text.strip()
108
 
109
- def _protect_special_patterns(text: str) -> str:
110
- """Protect numbers with commas and abbreviations from being split."""
111
- # Protect numbers with commas: 1,234 -> 1<<COMMA>>234
112
- text = re.sub(r'(\d),(\d)', r'\1<<COMMA>>\2', text)
 
 
 
113
 
114
- # Protect common abbreviations
115
- abbreviations = ['Dr', 'Mr', 'Mrs', 'Ms', 'Prof', 'Sr', 'Jr', 'St', 'etc', 'vs', 'approx', 'no']
116
- for abbr in abbreviations:
117
- text = re.sub(rf'\b{abbr}\.(\s|$)', rf'{abbr}<<DOT>>\1', text, flags=re.IGNORECASE)
118
 
119
- # Protect currency symbols with numbers: $1,234.50 -> <<CURR>>1<<COMMA>>234<<DOT>>50
120
- text = re.sub(r'([$€£¥])(\d[\d,.]*\d)', r'<<CURR>>\2', text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
- return text
123
-
124
- def _restore_special_patterns(text: str) -> str:
125
- """Restore protected patterns."""
126
- text = text.replace('<<COMMA>>', ',')
127
- text = text.replace('<<DOT>>', '.')
128
- text = text.replace('<<CURR>>', '$')
129
- return text
130
 
131
- def smart_text_chunking(text: str, max_chars: int = 250) -> Tuple[str, ...]:
132
  """
133
- Deterministic text chunking with overlap and pattern protection.
134
- Returns the same chunks for the same input always.
135
  """
136
- if not text:
137
- return tuple()
138
-
139
- # Create cache key
140
- cache_key = hashlib.md5(f"{text}_{max_chars}".encode()).hexdigest()
141
- if cache_key in _chunking_cache:
142
- return _chunking_cache[cache_key]
143
-
144
  cleaned = clean_text_for_tts(text)
145
  if not cleaned:
146
- return tuple()
147
 
148
- # Protect special patterns before splitting
149
- protected = _protect_special_patterns(cleaned)
150
-
151
- # Initial sentence splitting
152
- sentences = []
153
- for sentence in SENTENCE_PATTERN.split(protected):
154
- sentence = sentence.strip()
155
- if sentence:
156
- sentences.append(sentence)
157
 
 
158
  chunks = []
159
  current_chunk = ""
160
- overlap_words = []
161
 
162
- for sentence in sentences:
163
- sentence = sentence.strip()
164
- if not sentence:
165
- continue
166
-
167
- # Try adding sentence to current chunk
168
- test_chunk = f"{current_chunk} {sentence}" if current_chunk else sentence
169
- test_chunk = test_chunk.strip()
170
 
171
- if len(test_chunk) <= max_chars:
172
  current_chunk = test_chunk
 
173
  else:
174
- # Need to split current sentence
175
  if current_chunk:
176
- # Add overlap from previous chunk
177
- if overlap_words:
178
- overlap_text = " ".join(overlap_words)
179
- current_chunk = f"{overlap_text} {current_chunk}"
180
- overlap_words = []
181
-
182
  chunks.append(current_chunk)
183
 
184
- # If sentence itself is too long, split by words
185
- if len(sentence) > max_chars:
186
- words = sentence.split()
 
187
  temp_chunk = ""
 
188
 
189
  for word in words:
190
- test = f"{temp_chunk} {word}" if temp_chunk else word
191
  if len(test) <= max_chars:
192
  temp_chunk = test
 
193
  else:
194
  if temp_chunk:
195
- # Save last 5 words for overlap
196
- last_words = temp_chunk.split()[-5:]
197
- overlap_words = last_words.copy()
198
  chunks.append(temp_chunk)
199
  temp_chunk = word
 
200
 
201
  if temp_chunk:
202
  current_chunk = temp_chunk
 
203
  else:
204
- current_chunk = sentence
 
205
 
206
  # Add final chunk
207
  if current_chunk:
208
- if overlap_words:
209
- overlap_text = " ".join(overlap_words)
210
- current_chunk = f"{overlap_text} {current_chunk}"
211
  chunks.append(current_chunk)
212
 
213
- # Restore protected patterns and filter empty chunks
214
- result_chunks = []
215
- for chunk in chunks:
216
- restored = _restore_special_patterns(chunk)
217
- if restored.strip():
218
- result_chunks.append(restored)
 
 
 
 
 
 
 
 
 
 
 
219
 
220
- result = tuple(result_chunks)
221
- _chunking_cache[cache_key] = result
222
- return result
223
 
224
- async def generate_safe_audio(text: str, voice: str, semaphore: asyncio.Semaphore,
225
  chunk_index: int) -> Tuple[Optional[str], int]:
226
- """Generate audio with rate limiting, caching, retry logic, and order preservation."""
227
  if not text or len(text) < 2:
228
  return None, chunk_index
229
 
230
  # Create deterministic cache key
231
- text_hash = hashlib.md5(f"{text}_{voice}".encode()).hexdigest()
 
232
  cache_filename = os.path.join(AUDIO_DIR, f"cache_{text_hash}.mp3")
233
 
234
  # Check disk cache
@@ -241,7 +256,7 @@ async def generate_safe_audio(text: str, voice: str, semaphore: asyncio.Semaphor
241
 
242
  for attempt in range(max_retries):
243
  try:
244
- # Create temp file for generation
245
  with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp:
246
  temp_filename = tmp.name
247
 
@@ -253,14 +268,7 @@ async def generate_safe_audio(text: str, voice: str, semaphore: asyncio.Semaphor
253
  # Move to cache location
254
  os.replace(temp_filename, cache_filename)
255
  return cache_filename, chunk_index
256
- else:
257
- # Clean up temp file
258
- try:
259
- if os.path.exists(temp_filename):
260
- os.unlink(temp_filename)
261
- except:
262
- pass
263
-
264
  except Exception as e:
265
  # Clean up temp file on error
266
  try:
@@ -275,13 +283,12 @@ async def generate_safe_audio(text: str, voice: str, semaphore: asyncio.Semaphor
275
 
276
  # Exponential backoff with jitter
277
  sleep_time = (base_delay * (2 ** attempt)) + random.uniform(0.1, 1.0)
278
- print(f"Rate limit hit on chunk {chunk_index}. Retrying in {sleep_time:.2f}s...")
279
  await asyncio.sleep(sleep_time)
280
 
281
  return None, chunk_index
282
 
283
  def process_audio_segment_fast(audio_data: Tuple[str, int]) -> Tuple[Optional[AudioSegment], int]:
284
- """Process audio segment with proper cleanup and order preservation."""
285
  audio_file, chunk_index = audio_data
286
 
287
  try:
@@ -289,60 +296,54 @@ def process_audio_segment_fast(audio_data: Tuple[str, int]) -> Tuple[Optional[Au
289
  return None, chunk_index
290
 
291
  segment = AudioSegment.from_file(audio_file)
292
- segment = normalize(segment)
293
 
294
- # Only strip silence for longer segments
295
- if len(segment) > 200:
296
- try:
297
- segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
298
- except:
299
- pass
300
 
301
  return segment, chunk_index
302
 
303
  except Exception as e:
304
  print(f"Warning: Error processing audio segment {chunk_index}: {e}")
305
  return None, chunk_index
306
- finally:
307
- # Note: We don't delete cache files as they're reused
308
- pass
309
 
310
- async def bilingual_tts_optimized(text: str, output_file: str = "audio0.mp3",
311
  VOICE_TA: Optional[str] = None, max_concurrent: int = 5) -> Optional[str]:
312
- """Optimized bilingual TTS with parallel processing and order preservation."""
313
- print("Starting optimized bilingual TTS processing...")
314
 
315
  try:
316
- # Get chunks deterministically
317
- chunks = smart_text_chunking(text, max_chars=250)
318
- if not chunks:
319
- print("Error: No valid text chunks after cleaning")
320
  return None
321
 
322
- print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
323
 
324
- # Detect language once for entire text
325
- is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
326
- has_tamil_chars = any('\u0B80' <= char <= '\u0BFF' for char in text)
327
-
328
- # Choose default voice
329
- default_voice = VOICE_TA if (is_bilingual_tamil and has_tamil_chars) else (VOICE_TA or VOICE_EN)
 
 
 
 
 
330
 
331
  # Semaphore for rate limiting
332
  semaphore = asyncio.Semaphore(max_concurrent)
333
 
334
- # Prepare tasks with indices
335
  tasks = []
336
- for i, chunk in enumerate(chunks):
337
- # Use Tamil voice only if chunk contains Tamil characters AND we have Tamil voice
338
- if is_bilingual_tamil and any('\u0B80' <= char <= '\u0BFF' for char in chunk):
339
- voice = VOICE_TA
340
- else:
341
- voice = default_voice
342
-
343
- tasks.append(generate_safe_audio(chunk, voice, semaphore, i))
344
 
345
- # Generate all audio files concurrently
346
  results = await asyncio.gather(*tasks, return_exceptions=False)
347
 
348
  # Filter successful results and maintain order
@@ -350,23 +351,21 @@ async def bilingual_tts_optimized(text: str, output_file: str = "audio0.mp3",
350
  for result in results:
351
  if isinstance(result, tuple) and result[0] and os.path.exists(result[0]):
352
  audio_data.append(result)
353
- elif result is not None:
354
- print(f"Warning: Got unexpected result type: {type(result)}")
355
 
356
  if not audio_data:
357
  print("Error: No audio was successfully generated")
358
  return None
359
 
360
- # Sort by chunk index to guarantee correct order
361
  audio_data.sort(key=lambda x: x[1])
362
 
363
- print(f"Successfully generated {len(audio_data)}/{len(chunks)} audio segments")
364
 
365
  # Process audio segments in parallel
366
  with ThreadPoolExecutor(max_workers=min(len(audio_data), 8)) as executor:
367
  processed = list(executor.map(process_audio_segment_fast, audio_data))
368
 
369
- # Filter out None segments and sort by index
370
  processed = [(seg, idx) for seg, idx in processed if seg is not None]
371
  processed.sort(key=lambda x: x[1])
372
 
@@ -376,23 +375,21 @@ async def bilingual_tts_optimized(text: str, output_file: str = "audio0.mp3",
376
  print("Error: No audio segments were successfully processed")
377
  return None
378
 
379
- print(f"Processed {len(audio_segments)} segments in correct order")
380
 
381
- # Merge audio segments with smooth transitions
382
- print("Merging audio segments...")
383
  merged_audio = audio_segments[0]
384
- pause = AudioSegment.silent(duration=150) # Shorter pause for smoother flow
385
 
386
  for segment in audio_segments[1:]:
387
- merged_audio += pause + segment
 
388
 
389
- # Apply final processing
390
- print("Applying final audio processing...")
391
  try:
392
  merged_audio = merged_audio.compress_dynamic_range(
393
- threshold=-20.0,
394
- ratio=4.0,
395
- attack=5.0,
396
  release=50.0
397
  )
398
  except:
@@ -400,15 +397,14 @@ async def bilingual_tts_optimized(text: str, output_file: str = "audio0.mp3",
400
 
401
  merged_audio = normalize(merged_audio)
402
 
403
- # Export with high quality
404
  merged_audio.export(output_file, format="mp3", bitrate="192k")
405
 
406
- # Verify output
407
  if os.path.exists(output_file) and os.path.getsize(output_file) > 1024:
408
  print(f"✅ Audio successfully generated: {output_file}")
409
  return output_file
410
  else:
411
- print(f"Error: Generated file is empty or missing: {output_file}")
412
  return None
413
 
414
  except Exception as main_error:
@@ -418,7 +414,7 @@ async def bilingual_tts_optimized(text: str, output_file: str = "audio0.mp3",
418
 
419
  async def generate_tts_optimized(id: int, lines, lang: str) -> Tuple[Optional[float], Optional[str]]:
420
  """Optimized TTS generation function."""
421
- voice = {
422
  "English": "en-US-JennyNeural",
423
  "Tamil": "ta-IN-PallaviNeural",
424
  "Hindi": "hi-IN-SwaraNeural",
@@ -460,10 +456,10 @@ async def generate_tts_optimized(id: int, lines, lang: str) -> Tuple[Optional[fl
460
  listf = lang.split("&&&")
461
  text = listf[0].strip()
462
  lang_name = listf[1].strip() if len(listf) > 1 else "English"
463
- voice_to_use = voice.get(lang_name, VOICE_EN)
464
  else:
465
  text = lines[id] if isinstance(lines, (list, tuple)) and id < len(lines) else str(lines)
466
- voice_to_use = voice.get(lang, VOICE_EN)
467
 
468
  # Use max_concurrent=5 for better rate limit handling
469
  output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=5)
@@ -493,7 +489,6 @@ def audio_func(id: int, lines, lang: str) -> Tuple[Optional[float], Optional[str
493
  traceback.print_exc()
494
  return None, None
495
 
496
-
497
  def create_manim_script(problem_data, script_path, audio_path, scale=1):
498
  """Generate Manim script from problem data with robust wrapping."""
499
 
 
47
  import json
48
  from concurrent.futures import ThreadPoolExecutor
49
  from functools import lru_cache
50
+ from typing import List, Tuple, Optional, Dict
51
 
52
  import edge_tts
53
  from pydub import AudioSegment
 
56
 
57
  # Voice configuration
58
  VOICE_EN = "en-IN-NeerjaNeural"
 
 
59
  AUDIO_DIR = os.path.join(os.getcwd(), "audio")
60
  os.makedirs(AUDIO_DIR, exist_ok=True)
61
 
 
65
  BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
66
  SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
67
  WHITESPACE_PATTERN = re.compile(r'\s+')
68
+ # Conservative sentence splitting that doesn't break on abbreviations
69
+ SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+(?=[A-Z])')
70
+ # Avoid splitting on commas inside numbers
71
  SUB_PATTERN = re.compile(r'(?<!\d),(?!\d)\s*')
72
 
73
  # Cache for chunking results
74
+ _chunking_cache: Dict[str, Tuple[str, ...]] = {}
75
 
76
  def clean_text_for_tts(text: str) -> str:
77
+ """Cleans text while preserving Tamil/Indic characters and code-switched punctuation."""
78
  if not text:
79
  return ""
80
 
 
104
 
105
  return text.strip()
106
 
107
+ def split_by_word_boundary(text: str) -> List[str]:
108
+ """
109
+ Intelligently splits text by language boundaries while preserving code-switched words.
110
+ Example: "Voltage னு" → ["Voltage", " னு"]
111
+ """
112
+ if not text:
113
+ return []
114
 
115
+ segments = []
116
+ current_segment = ""
117
+ current_lang = None # 'en', 'ta', or None
 
118
 
119
+ i = 0
120
+ while i < len(text):
121
+ char = text[i]
122
+
123
+ # Detect language of current character
124
+ if '\u0B80' <= char <= '\u0BFF': # Tamil range
125
+ char_lang = 'ta'
126
+ elif char.isalpha() or char in '-':
127
+ char_lang = 'en'
128
+ else:
129
+ char_lang = current_lang # Punctuation/space keeps current language
130
+
131
+ # Start new segment on language boundary
132
+ if current_lang and char_lang and current_lang != char_lang:
133
+ # Don't split on hyphens in code-switched words like "simple-ஆ"
134
+ if char == '-' and i > 0 and i < len(text) - 1:
135
+ # Check if it's a code-switched hyphen (English-Tamil)
136
+ prev_char = text[i-1]
137
+ next_char = text[i+1]
138
+ if prev_char.isalpha() and ('\u0B80' <= next_char <= '\u0BFF'):
139
+ # Keep hyphen with current segment
140
+ current_segment += char
141
+ i += 1
142
+ continue
143
+
144
+ if current_segment.strip():
145
+ segments.append(current_segment)
146
+ current_segment = char
147
+ current_lang = char_lang
148
+ else:
149
+ current_segment += char
150
+ current_lang = char_lang or current_lang
151
+
152
+ i += 1
153
 
154
+ if current_segment.strip():
155
+ segments.append(current_segment)
156
+
157
+ return segments
 
 
 
 
158
 
159
+ def chunk_text_with_overlap(text: str, max_chars: int = 250) -> List[Tuple[str, int]]:
160
  """
161
+ Creates chunks with overlap for smooth transitions.
162
+ Returns list of (chunk_text, chunk_index)
163
  """
164
+ # Clean first
 
 
 
 
 
 
 
165
  cleaned = clean_text_for_tts(text)
166
  if not cleaned:
167
+ return []
168
 
169
+ # Split into segments by language boundary
170
+ segments = split_by_word_boundary(cleaned)
 
 
 
 
 
 
 
171
 
172
+ # Group segments into chunks
173
  chunks = []
174
  current_chunk = ""
175
+ current_words = []
176
 
177
+ for segment in segments:
178
+ test_chunk = current_chunk + segment if current_chunk else segment
179
+ test_words = test_chunk.split()
 
 
 
 
 
180
 
181
+ if len(test_chunk) <= max_chars and len(test_words) <= 20:
182
  current_chunk = test_chunk
183
+ current_words = test_words
184
  else:
185
+ # Need to start new chunk
186
  if current_chunk:
 
 
 
 
 
 
187
  chunks.append(current_chunk)
188
 
189
+ # Handle long segments
190
+ if len(segment) > max_chars:
191
+ # Split long segment by words
192
+ words = segment.split()
193
  temp_chunk = ""
194
+ temp_words = []
195
 
196
  for word in words:
197
+ test = temp_chunk + " " + word if temp_chunk else word
198
  if len(test) <= max_chars:
199
  temp_chunk = test
200
+ temp_words.append(word)
201
  else:
202
  if temp_chunk:
 
 
 
203
  chunks.append(temp_chunk)
204
  temp_chunk = word
205
+ temp_words = [word]
206
 
207
  if temp_chunk:
208
  current_chunk = temp_chunk
209
+ current_words = temp_words
210
  else:
211
+ current_chunk = segment
212
+ current_words = segment.split()
213
 
214
  # Add final chunk
215
  if current_chunk:
 
 
 
216
  chunks.append(current_chunk)
217
 
218
+ # Add overlap between chunks (last 3 words of chunk N become first 3 words of chunk N+1)
219
+ overlapped_chunks = []
220
+ for i, chunk in enumerate(chunks):
221
+ if i > 0:
222
+ # Get last 3 words from previous chunk
223
+ prev_chunk = chunks[i-1]
224
+ prev_words = prev_chunk.split()
225
+ overlap_words = prev_words[-3:] if len(prev_words) >= 3 else prev_words
226
+
227
+ if overlap_words:
228
+ overlap_text = " ".join(overlap_words)
229
+ # Add overlap if it won't make the chunk too long
230
+ test_chunk = overlap_text + " " + chunk
231
+ if len(test_chunk) <= max_chars:
232
+ chunk = test_chunk
233
+
234
+ overlapped_chunks.append((chunk, i))
235
 
236
+ return overlapped_chunks
 
 
237
 
238
+ async def generate_safe_audio(text: str, voice: str, semaphore: asyncio.Semaphore,
239
  chunk_index: int) -> Tuple[Optional[str], int]:
240
+ """Generate audio with rate limiting, caching, and retry logic."""
241
  if not text or len(text) < 2:
242
  return None, chunk_index
243
 
244
  # Create deterministic cache key
245
+ cache_key = f"{text}_{voice}"
246
+ text_hash = hashlib.md5(cache_key.encode('utf-8')).hexdigest()
247
  cache_filename = os.path.join(AUDIO_DIR, f"cache_{text_hash}.mp3")
248
 
249
  # Check disk cache
 
256
 
257
  for attempt in range(max_retries):
258
  try:
259
+ # Create temp file
260
  with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp:
261
  temp_filename = tmp.name
262
 
 
268
  # Move to cache location
269
  os.replace(temp_filename, cache_filename)
270
  return cache_filename, chunk_index
271
+
 
 
 
 
 
 
 
272
  except Exception as e:
273
  # Clean up temp file on error
274
  try:
 
283
 
284
  # Exponential backoff with jitter
285
  sleep_time = (base_delay * (2 ** attempt)) + random.uniform(0.1, 1.0)
 
286
  await asyncio.sleep(sleep_time)
287
 
288
  return None, chunk_index
289
 
290
  def process_audio_segment_fast(audio_data: Tuple[str, int]) -> Tuple[Optional[AudioSegment], int]:
291
+ """Process audio segment with proper cleanup."""
292
  audio_file, chunk_index = audio_data
293
 
294
  try:
 
296
  return None, chunk_index
297
 
298
  segment = AudioSegment.from_file(audio_file)
 
299
 
300
+ # Add micro-padding to prevent clipping
301
+ if len(segment) > 0:
302
+ segment = AudioSegment.silent(duration=50) + segment + AudioSegment.silent(duration=50)
303
+
304
+ segment = normalize(segment)
 
305
 
306
  return segment, chunk_index
307
 
308
  except Exception as e:
309
  print(f"Warning: Error processing audio segment {chunk_index}: {e}")
310
  return None, chunk_index
 
 
 
311
 
312
+ async def bilingual_tts_optimized(text: str, output_file: str = "audio0.mp3",
313
  VOICE_TA: Optional[str] = None, max_concurrent: int = 5) -> Optional[str]:
314
+ """Optimized bilingual TTS with proper ordering and smooth transitions."""
315
+ print("Starting bilingual TTS processing...")
316
 
317
  try:
318
+ # Split text into chunks with overlap
319
+ chunks_with_indices = chunk_text_with_overlap(text, max_chars=250)
320
+ if not chunks_with_indices:
321
+ print("Error: No valid text chunks after processing")
322
  return None
323
 
324
+ print(f"Processing {len(chunks_with_indices)} text chunks...")
325
 
326
+ # Determine which chunks need Tamil voice
327
+ chunks_to_generate = []
328
+ for chunk_text, chunk_index in chunks_with_indices:
329
+ has_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk_text)
330
+
331
+ if VOICE_TA and has_tamil:
332
+ voice = VOICE_TA
333
+ else:
334
+ voice = VOICE_TA or VOICE_EN
335
+
336
+ chunks_to_generate.append((chunk_text, voice, chunk_index))
337
 
338
  # Semaphore for rate limiting
339
  semaphore = asyncio.Semaphore(max_concurrent)
340
 
341
+ # Prepare tasks
342
  tasks = []
343
+ for chunk_text, voice, chunk_index in chunks_to_generate:
344
+ tasks.append(generate_safe_audio(chunk_text, voice, semaphore, chunk_index))
 
 
 
 
 
 
345
 
346
+ # Generate all audio files
347
  results = await asyncio.gather(*tasks, return_exceptions=False)
348
 
349
  # Filter successful results and maintain order
 
351
  for result in results:
352
  if isinstance(result, tuple) and result[0] and os.path.exists(result[0]):
353
  audio_data.append(result)
 
 
354
 
355
  if not audio_data:
356
  print("Error: No audio was successfully generated")
357
  return None
358
 
359
+ # Sort by chunk index
360
  audio_data.sort(key=lambda x: x[1])
361
 
362
+ print(f"Successfully generated {len(audio_data)} audio segments")
363
 
364
  # Process audio segments in parallel
365
  with ThreadPoolExecutor(max_workers=min(len(audio_data), 8)) as executor:
366
  processed = list(executor.map(process_audio_segment_fast, audio_data))
367
 
368
+ # Filter and sort
369
  processed = [(seg, idx) for seg, idx in processed if seg is not None]
370
  processed.sort(key=lambda x: x[1])
371
 
 
375
  print("Error: No audio segments were successfully processed")
376
  return None
377
 
378
+ print(f"Merging {len(audio_segments)} audio segments with crossfade...")
379
 
380
+ # Merge with crossfade for smooth transitions
 
381
  merged_audio = audio_segments[0]
 
382
 
383
  for segment in audio_segments[1:]:
384
+ # Crossfade 30ms for smooth transition
385
+ merged_audio = merged_audio.append(segment, crossfade=30)
386
 
387
+ # Apply compression for consistent volume
 
388
  try:
389
  merged_audio = merged_audio.compress_dynamic_range(
390
+ threshold=-20.0,
391
+ ratio=2.5, # Gentler compression for more natural sound
392
+ attack=5.0,
393
  release=50.0
394
  )
395
  except:
 
397
 
398
  merged_audio = normalize(merged_audio)
399
 
400
+ # Export
401
  merged_audio.export(output_file, format="mp3", bitrate="192k")
402
 
 
403
  if os.path.exists(output_file) and os.path.getsize(output_file) > 1024:
404
  print(f"✅ Audio successfully generated: {output_file}")
405
  return output_file
406
  else:
407
+ print(f"Error: Generated file is empty or missing")
408
  return None
409
 
410
  except Exception as main_error:
 
414
 
415
  async def generate_tts_optimized(id: int, lines, lang: str) -> Tuple[Optional[float], Optional[str]]:
416
  """Optimized TTS generation function."""
417
+ voice_map = {
418
  "English": "en-US-JennyNeural",
419
  "Tamil": "ta-IN-PallaviNeural",
420
  "Hindi": "hi-IN-SwaraNeural",
 
456
  listf = lang.split("&&&")
457
  text = listf[0].strip()
458
  lang_name = listf[1].strip() if len(listf) > 1 else "English"
459
+ voice_to_use = voice_map.get(lang_name, VOICE_EN)
460
  else:
461
  text = lines[id] if isinstance(lines, (list, tuple)) and id < len(lines) else str(lines)
462
+ voice_to_use = voice_map.get(lang, VOICE_EN)
463
 
464
  # Use max_concurrent=5 for better rate limit handling
465
  output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=5)
 
489
  traceback.print_exc()
490
  return None, None
491
 
 
492
  def create_manim_script(problem_data, script_path, audio_path, scale=1):
493
  """Generate Manim script from problem data with robust wrapping."""
494