sreepathi-ravikumar commited on
Commit
13b333e
·
verified ·
1 Parent(s): 5e080af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -281
app.py CHANGED
@@ -37,6 +37,7 @@ API_KEY = "rkmentormindzofficaltokenkey12345"
37
 
38
 
39
 
 
40
  import os
41
  import re
42
  import html
@@ -61,396 +62,283 @@ VOICE_EN = "en-IN-NeerjaNeural"
61
  AUDIO_DIR = os.path.join(os.getcwd(), "audio")
62
  os.makedirs(AUDIO_DIR, exist_ok=True)
63
 
64
- # Pre-compiled regex patterns for speed
65
  URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
66
- TAG_PATTERN = re.compile(r'<[^>]*>')
67
- # Preserve sentence-ending abbreviations
68
- ABBREVIATION_PATTERN = re.compile(r'\b(?:Dr|Mr|Mrs|Ms|Prof|Sr|Jr|Ph\.D|M\.D|B\.A|M\.A)\.')
69
- # Sentence split avoiding abbreviations and numbers
70
- SENTENCE_SPLIT_PATTERN = re.compile(r'(?<!\d)(?<![A-Z])(?<=[.!?।॥])\s+(?=[A-Z\u0B80-\u0BFF])')
71
  WHITESPACE_PATTERN = re.compile(r'\s+')
 
 
 
 
72
 
73
 
74
- def clean_text_for_tts(text, preserve_structure=True):
75
- """
76
- Cleans text for TTS with language-aware preservation.
77
- No caching to avoid cross-contamination between different contexts.
78
- """
79
  if not text:
80
  return ""
81
-
82
  text = str(text).strip()
83
  text = html.unescape(text)
84
 
85
- # Remove URLs
86
  text = URL_PATTERN.sub('', text)
87
-
88
- # Remove HTML tags only (not angle brackets in general)
89
  text = TAG_PATTERN.sub('', text)
 
 
 
90
 
91
- # Only remove truly problematic characters, preserve hyphens, apostrophes
92
- # Preserve: hyphens, apostrophes, numbers with commas, currency symbols
93
- if preserve_structure:
94
- # Only remove control characters and extreme special chars
95
- text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]', '', text)
96
- text = re.sub(r'[{}[\]\\`~]', '', text)
97
- else:
98
- # More aggressive cleaning
99
- text = re.sub(r'[#@$%^&*_+=|\\`~{}[\]]', '', text)
100
-
101
- # Normalize line breaks to spaces
102
- text = text.replace('\n', ' ').replace('\t', ' ').replace('\r', ' ')
103
 
104
- # SSML keyword removal - only remove if they appear as XML-like tags or attributes
105
- # Don't remove legitimate usage in normal text
106
- text = re.sub(r'</?(?:voice|speak|prosody|ssml)[^>]*>', '', text, flags=re.IGNORECASE)
107
- text = re.sub(r'\bxmlns\s*=\s*["\'][^"\']*["\']', '', text, flags=re.IGNORECASE)
108
-
109
- # Use NFC (Canonical Composition) instead of NFKD for better Unicode preservation
110
- # NFC preserves grapheme clusters in Tamil and other Indic scripts
111
  text = unicodedata.normalize('NFC', text)
112
-
113
- # Collapse multiple spaces
114
  text = WHITESPACE_PATTERN.sub(' ', text)
115
-
116
  return text.strip()
117
 
118
 
119
- def detect_language_segments(text):
120
- """
121
- Detects language at the text level (not chunk level) to avoid mid-sentence voice switching.
122
- Returns a single dominant language code.
123
- """
124
- if not text:
125
- return 'en'
126
-
127
- # Count Unicode ranges
128
- tamil_chars = sum(1 for c in text if '\u0B80' <= c <= '\u0BFF')
129
- devanagari_chars = sum(1 for c in text if '\u0900' <= c <= '\u097F')
130
- malayalam_chars = sum(1 for c in text if '\u0D00' <= c <= '\u0D7F')
131
- kannada_chars = sum(1 for c in text if '\u0C80' <= c <= '\u0CFF')
132
- telugu_chars = sum(1 for c in text if '\u0C00' <= c <= '\u0C7F')
133
-
134
- # Return dominant script
135
- max_chars = max(tamil_chars, devanagari_chars, malayalam_chars, kannada_chars, telugu_chars)
136
 
137
- if tamil_chars == max_chars and tamil_chars > 5:
138
- return 'ta'
139
- elif devanagari_chars == max_chars and devanagari_chars > 5:
140
- return 'hi'
141
- elif malayalam_chars == max_chars and malayalam_chars > 5:
142
- return 'ml'
143
- elif kannada_chars == max_chars and kannada_chars > 5:
144
- return 'kn'
145
- elif telugu_chars == max_chars and telugu_chars > 5:
146
- return 'te'
147
 
148
- return 'en'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
 
151
- def smart_text_chunking(text, max_chars=350):
 
152
  """
153
- Improved chunking that preserves word order, handles abbreviations, and maintains context.
154
- Deterministic splitting for cache consistency.
155
  """
156
- text = clean_text_for_tts(text, preserve_structure=True)
157
  if not text:
158
- return []
159
-
160
- # Protect abbreviations by replacing periods temporarily
161
- protected_text = ABBREVIATION_PATTERN.sub(lambda m: m.group(0).replace('.', '<<<DOT>>>'), text)
162
 
163
- # Split on sentence boundaries
164
- sentences = SENTENCE_SPLIT_PATTERN.split(protected_text)
165
-
166
- # Restore abbreviations
167
- sentences = [s.replace('<<<DOT>>>', '.') for s in sentences]
168
 
 
169
  chunks = []
170
- current_chunk = ""
171
 
172
  for sentence in sentences:
173
  sentence = sentence.strip()
174
  if not sentence:
175
  continue
176
 
177
- # If adding this sentence keeps us under limit, add it
178
- test_chunk = f"{current_chunk} {sentence}".strip() if current_chunk else sentence
179
 
180
- if len(test_chunk) <= max_chars:
181
- current_chunk = test_chunk
182
  else:
183
- # Save current chunk if it exists
184
- if current_chunk:
185
- chunks.append(current_chunk)
186
 
187
- # If single sentence is too long, split carefully
188
- if len(sentence) > max_chars:
189
- # Split on natural boundaries: semicolons, colons, dashes
190
- # But NOT on commas inside numbers or hyphens in compound words
191
-
192
- # First protect numbers with commas
193
- protected_sentence = re.sub(r'(\d+),(\d+)', r'\1<<<COMMA>>>\2', sentence)
194
 
195
- # Split on safe punctuation
196
- sub_parts = re.split(r'(?<=[;:—])\s+', protected_sentence)
197
 
198
- # Restore commas in numbers
199
- sub_parts = [p.replace('<<<COMMA>>>', ',') for p in sub_parts]
200
-
201
- for part in sub_parts:
202
- part = part.strip()
203
- if not part:
204
- continue
205
 
206
- if len(part) <= max_chars:
207
- if current_chunk and len(current_chunk) + len(part) + 1 <= max_chars:
208
- current_chunk = f"{current_chunk} {part}"
209
- else:
210
- if current_chunk:
211
- chunks.append(current_chunk)
212
- current_chunk = part
213
- else:
214
- # Last resort: split on word boundaries with overlap for continuity
215
  words = part.split()
216
  word_chunk = ""
217
 
218
- for i, word in enumerate(words):
219
- test_word_chunk = f"{word_chunk} {word}".strip() if word_chunk else word
220
-
221
  if len(test_word_chunk) <= max_chars:
222
  word_chunk = test_word_chunk
223
  else:
224
  if word_chunk:
225
- # Add overlap: include first word of next chunk in previous
226
- if i + 1 < len(words):
227
- overlap_chunk = f"{word_chunk} {words[i]}"
228
- if len(overlap_chunk) <= max_chars:
229
- chunks.append(overlap_chunk)
230
- else:
231
- chunks.append(word_chunk)
232
- else:
233
- chunks.append(word_chunk)
234
  word_chunk = word
235
 
236
  if word_chunk:
237
  current_chunk = word_chunk
238
- else:
239
- current_chunk = sentence
240
-
241
- # Don't forget the last chunk
242
- if current_chunk:
243
- chunks.append(current_chunk)
244
-
245
- return [c.strip() for c in chunks if c.strip()]
246
-
247
-
248
- async def generate_safe_audio(text, voice, semaphore, chunk_index=0):
249
- """
250
- Generate audio with robust retries, caching, and exponential backoff.
251
- Includes chunk_index for debugging and ordering verification.
252
- """
253
- # Create cache key with voice to avoid cross-language contamination
254
- cache_key = f"{text}_{voice}_{chunk_index}"
255
- text_hash = hashlib.md5(cache_key.encode('utf-8')).hexdigest()
256
- cache_filename = os.path.join(AUDIO_DIR, f"cache_{text_hash}.mp3")
257
-
258
- # Check cache
259
- if os.path.exists(cache_filename):
260
- try:
261
- if os.path.getsize(cache_filename) > 1024: # At least 1KB
262
- print(f"✓ Using cached audio for chunk {chunk_index}")
263
- return cache_filename, chunk_index
264
- except Exception:
265
- pass
266
 
267
- async with semaphore:
268
- cleaned_text = clean_text_for_tts(text, preserve_structure=True)
269
- if not cleaned_text or len(cleaned_text) < 2:
270
- print(f"✗ Chunk {chunk_index} has no valid content after cleaning")
271
- return None, chunk_index
272
-
273
- # Retry configuration
274
- max_retries = 3
275
- base_delay = 2.0
276
-
277
- for attempt in range(max_retries):
278
- try:
279
- print(f"→ Generating chunk {chunk_index} (attempt {attempt + 1}): {cleaned_text[:50]}...")
280
- comm = edge_tts.Communicate(cleaned_text, voice=voice)
281
- await comm.save(cache_filename)
282
-
283
- # Validate file
284
- if os.path.exists(cache_filename) and os.path.getsize(cache_filename) > 1024:
285
- print(f"✓ Generated chunk {chunk_index}")
286
- return cache_filename, chunk_index
287
- else:
288
- print(f"✗ Chunk {chunk_index} file too small or missing")
289
-
290
- except Exception as e:
291
- if attempt == max_retries - 1:
292
- print(f"✗ Failed chunk {chunk_index} after {max_retries} attempts: {e}")
293
- return None, chunk_index
294
-
295
- # Exponential backoff with jitter
296
- sleep_time = (base_delay * (2 ** attempt)) + random.uniform(0.1, 1.0)
297
- print(f"⚠ Chunk {chunk_index} rate limit/error. Retrying in {sleep_time:.2f}s...")
298
- await asyncio.sleep(sleep_time)
299
-
300
- return None, chunk_index
301
 
302
 
303
  def process_audio_segment_fast(audio_data):
304
  """
305
- Fast audio processing with ordering preservation.
306
  Input: (audio_file, chunk_index)
307
  Output: (segment, chunk_index)
308
  """
309
  audio_file, chunk_index = audio_data
310
- segment = None
311
 
312
  try:
313
  if not audio_file or not os.path.exists(audio_file):
314
  return None, chunk_index
315
-
316
- segment = AudioSegment.from_file(audio_file)
317
 
318
- # Gentle normalization
319
- if segment.dBFS < -30:
320
- segment = segment.apply_gain(-segment.dBFS - 20)
321
 
322
- # Light silence trimming (preserve natural pauses)
323
- if len(segment) > 500:
324
  try:
325
- segment = segment.strip_silence(
326
- silence_len=100,
327
- silence_thresh=-45,
328
- padding=100
329
- )
330
- except Exception:
331
- pass
332
 
333
  return segment, chunk_index
334
 
335
  except Exception as e:
336
- print(f" Error processing audio segment {chunk_index}: {e}")
337
  return None, chunk_index
338
 
339
 
340
- async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=4):
341
  """
342
- Optimized bilingual TTS with proper ordering, overlap handling, and language detection.
 
343
  """
344
- print(f"\n{'='*60}")
345
- print(f"Starting TTS processing: {len(text)} chars")
346
- print(f"{'='*60}")
347
 
348
  try:
349
- # Detect primary language ONCE for entire text
350
- primary_lang = detect_language_segments(text)
351
- print(f"Detected primary language: {primary_lang}")
352
-
353
- # Chunk text deterministically
354
- chunks = smart_text_chunking(text, max_chars=350)
355
-
356
  if not chunks:
357
- print(" No valid text chunks after cleaning")
358
  return None
359
 
360
- print(f"Split into {len(chunks)} chunks")
361
- for i, chunk in enumerate(chunks[:3]):
362
- print(f" Chunk {i}: {chunk[:60]}...")
363
 
364
- # Determine voice
365
- if VOICE_TA and ("ta-IN" in VOICE_TA and primary_lang == 'ta'):
366
- voice = VOICE_TA
367
- else:
368
- voice = VOICE_TA or VOICE_EN
369
-
370
- print(f"Using voice: {voice}")
371
 
372
- # Create semaphore for rate limiting
373
  semaphore = asyncio.Semaphore(max_concurrent)
374
 
375
- # Generate all audio with index tracking
376
- tasks = [
377
- generate_safe_audio(chunk, voice, semaphore, chunk_index=i)
378
- for i, chunk in enumerate(chunks)
379
- ]
 
380
 
 
381
  results = await asyncio.gather(*tasks, return_exceptions=True)
382
 
383
- # Filter and sort by index to preserve order
384
- valid_results = [
385
- (audio_file, idx)
386
- for audio_file, idx in results
387
- if not isinstance(audio_file, Exception) and audio_file and os.path.exists(audio_file)
388
- ]
389
 
390
- if not valid_results:
391
- print(" No audio was successfully generated")
392
  return None
393
 
394
  # Sort by chunk index to guarantee correct order
395
- valid_results.sort(key=lambda x: x[1])
396
 
397
- print(f" Generated {len(valid_results)}/{len(chunks)} audio segments")
398
 
399
- # Process audio with ordering
400
- with ThreadPoolExecutor(max_workers=min(len(valid_results), 8)) as executor:
401
- processed = list(executor.map(process_audio_segment_fast, valid_results))
402
 
403
- # Sort again after processing and filter None
404
  processed = [(seg, idx) for seg, idx in processed if seg is not None]
405
  processed.sort(key=lambda x: x[1])
406
 
407
  audio_segments = [seg for seg, idx in processed]
408
 
409
  if not audio_segments:
410
- print(" No audio segments were successfully processed")
411
  return None
412
 
413
- print(f"Processed {len(audio_segments)} segments in correct order")
414
 
415
- # Merge with natural pauses
416
  print("Merging audio segments...")
417
  merged_audio = audio_segments[0]
418
- pause = AudioSegment.silent(duration=180)
419
 
420
- for i, segment in enumerate(audio_segments[1:], 1):
421
  merged_audio += pause + segment
422
 
423
- # Final processing
424
  print("Applying final audio processing...")
425
-
426
- # Gentle compression
427
  merged_audio = merged_audio.compress_dynamic_range(
428
- threshold=-20.0,
429
- ratio=3.0,
430
- attack=5.0,
431
  release=50.0
432
  )
 
433
 
434
- # Final normalization
435
- merged_audio = normalize(merged_audio, headroom=0.1)
436
-
437
- # Export
438
  merged_audio.export(output_file, format="mp3", bitrate="192k")
439
  print(f"✅ Audio successfully generated: {output_file}")
440
- print(f" Duration: {len(merged_audio)/1000:.2f}s")
441
- print(f"{'='*60}\n")
442
 
443
  return output_file
444
 
445
  except Exception as main_error:
446
- print(f"Main error in bilingual TTS: {main_error}")
447
  traceback.print_exc()
448
  return None
449
 
450
 
451
  async def generate_tts_optimized(id, lines, lang):
452
- """Optimized TTS generation function with proper error handling."""
453
- voice_map = {
454
  "English": "en-US-JennyNeural",
455
  "Tamil": "ta-IN-PallaviNeural",
456
  "Hindi": "hi-IN-SwaraNeural",
@@ -488,21 +376,17 @@ async def generate_tts_optimized(id, lines, lang):
488
  audio_name = f"audio{id}.mp3"
489
  audio_path = os.path.join(AUDIO_DIR, audio_name)
490
 
491
- # Parse input
492
  if "&&&" in lang:
493
- parts = lang.split("&&&")
494
- text = parts[0].strip()
495
- lang_name = parts[1].strip() if len(parts) > 1 else "English"
496
- voice_to_use = voice_map.get(lang_name, VOICE_EN)
497
  else:
498
- if isinstance(lines, (list, tuple)) and 0 <= id < len(lines):
499
- text = str(lines[id])
500
- else:
501
- text = str(lines)
502
- voice_to_use = voice_map.get(lang, VOICE_EN)
503
 
504
- # Generate audio
505
- output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=4)
506
 
507
  if output and os.path.exists(audio_path):
508
  try:
@@ -510,7 +394,7 @@ async def generate_tts_optimized(id, lines, lang):
510
  duration = audio.info.length
511
  return duration, audio_path
512
  except Exception as e:
513
- print(f"Error reading audio file metadata: {e}")
514
  return None, None
515
 
516
  return None, None
@@ -526,14 +410,12 @@ def audio_func(id, lines, lang):
526
  finally:
527
  loop.close()
528
  except Exception as e:
529
- print(f"Error in audio_func: {e}")
530
  traceback.print_exc()
531
  return None, None
532
 
533
 
534
 
535
-
536
-
537
  def create_manim_script(problem_data, script_path, audio_path, scale=1):
538
  """Generate Manim script from problem data with robust wrapping."""
539
 
 
37
 
38
 
39
 
40
+
41
  import os
42
  import re
43
  import html
 
62
  AUDIO_DIR = os.path.join(os.getcwd(), "audio")
63
  os.makedirs(AUDIO_DIR, exist_ok=True)
64
 
65
+ # Pre-compiled regex patterns for speed (compiled once, reused many times)
66
  URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
67
+ TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
68
+ BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
69
+ SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
 
 
70
  WHITESPACE_PATTERN = re.compile(r'\s+')
71
+ # More conservative sentence splitting to avoid breaking mid-word
72
+ SENTENCE_PATTERN = re.compile(r'(?<=[.!?।॥])\s+(?=[A-ZА-ЯА-Я\u0B80-\u0BFF\u0900-\u097F])')
73
+ # Avoid splitting on colons that are part of numbers (like time 5:30)
74
+ SUB_PATTERN = re.compile(r'(?<=[,;])\s+')
75
 
76
 
77
+ @lru_cache(maxsize=1024)
78
+ def clean_text_for_tts(text):
79
+ """Cleans text before TTS with optimized regex and caching."""
 
 
80
  if not text:
81
  return ""
 
82
  text = str(text).strip()
83
  text = html.unescape(text)
84
 
85
+ # Use pre-compiled patterns (much faster)
86
  text = URL_PATTERN.sub('', text)
 
 
87
  text = TAG_PATTERN.sub('', text)
88
+ text = BRACKET_PATTERN.sub('', text)
89
+ text = SPECIAL_CHAR_PATTERN.sub('', text)
90
+ text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
91
 
92
+ # Batch remove keywords (faster than multiple re.sub calls)
93
+ # But only if they appear as standalone words or in SSML context
94
+ for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
95
+ # Remove only if surrounded by whitespace or special chars (not part of words)
96
+ text = re.sub(rf'\b{keyword}\b', '', text, flags=re.IGNORECASE)
 
 
 
 
 
 
 
97
 
98
+ # Use NFC normalization instead of NFKD to preserve Tamil/Indic characters better
 
 
 
 
 
 
99
  text = unicodedata.normalize('NFC', text)
 
 
100
  text = WHITESPACE_PATTERN.sub(' ', text)
 
101
  return text.strip()
102
 
103
 
104
+ async def generate_safe_audio(text, voice, semaphore, chunk_index):
105
+ """Generate clean audio with rate limiting, caching, and retry logic."""
106
+ # Create deterministic cache key
107
+ cache_key = f"{text}_{voice}"
108
+ text_hash = hashlib.md5(cache_key.encode('utf-8')).hexdigest()
109
+ cache_filename = os.path.join(AUDIO_DIR, f"cache_{text_hash}.mp3")
 
 
 
 
 
 
 
 
 
 
 
110
 
111
+ # Check disk cache first
112
+ if os.path.exists(cache_filename) and os.path.getsize(cache_filename) > 1024:
113
+ return cache_filename, chunk_index
 
 
 
 
 
 
 
114
 
115
+ async with semaphore: # Limit concurrent TTS requests
116
+ cleaned_text = clean_text_for_tts(text)
117
+ if not cleaned_text or len(cleaned_text) < 2:
118
+ return None, chunk_index
119
+
120
+ # Retry configuration
121
+ max_retries = 3
122
+ base_delay = 2.0
123
+
124
+ for attempt in range(max_retries):
125
+ try:
126
+ comm = edge_tts.Communicate(cleaned_text, voice=voice)
127
+ await comm.save(cache_filename)
128
+
129
+ # Verify file was created successfully
130
+ if os.path.exists(cache_filename) and os.path.getsize(cache_filename) > 1024:
131
+ return cache_filename, chunk_index
132
+
133
+ except Exception as e:
134
+ if attempt == max_retries - 1:
135
+ print(f"Failed to generate audio chunk {chunk_index} after {max_retries} attempts: {e}")
136
+ return None, chunk_index
137
+
138
+ # Exponential backoff with jitter to avoid thundering herd
139
+ sleep_time = (base_delay * (2 ** attempt)) + random.uniform(0.1, 1.0)
140
+ print(f"Rate limit hit on chunk {chunk_index}. Retrying in {sleep_time:.2f}s...")
141
+ await asyncio.sleep(sleep_time)
142
+
143
+ return None, chunk_index
144
 
145
 
146
+ @lru_cache(maxsize=256)
147
+ def smart_text_chunking(text, max_chars=250):
148
  """
149
+ Cached text chunking with improved algorithm to preserve word order and context.
150
+ Increased max_chars to reduce total number of API calls.
151
  """
152
+ text = clean_text_for_tts(text)
153
  if not text:
154
+ return tuple() # Return tuple for hashability (required by lru_cache)
 
 
 
155
 
156
+ # Protect common abbreviations
157
+ text = re.sub(r'\b(Dr|Mr|Mrs|Ms|Prof|Sr|Jr)\.\s', r'\1<<DOT>> ', text)
 
 
 
158
 
159
+ sentences = SENTENCE_PATTERN.split(text)
160
  chunks = []
 
161
 
162
  for sentence in sentences:
163
  sentence = sentence.strip()
164
  if not sentence:
165
  continue
166
 
167
+ # Restore protected periods
168
+ sentence = sentence.replace('<<DOT>>', '.')
169
 
170
+ if len(sentence) <= max_chars:
171
+ chunks.append(sentence)
172
  else:
173
+ # Try splitting on commas/semicolons first
174
+ sub_parts = SUB_PATTERN.split(sentence)
175
+ current_chunk = ""
176
 
177
+ for part in sub_parts:
178
+ part = part.strip()
179
+ if not part:
180
+ continue
 
 
 
181
 
182
+ # Try to add to current chunk
183
+ test_chunk = f"{current_chunk}, {part}" if current_chunk else part
184
 
185
+ if len(test_chunk) <= max_chars:
186
+ current_chunk = test_chunk
187
+ else:
188
+ # Save current chunk if exists
189
+ if current_chunk:
190
+ chunks.append(current_chunk.strip())
 
191
 
192
+ # If part itself is too long, split by words
193
+ if len(part) > max_chars:
 
 
 
 
 
 
 
194
  words = part.split()
195
  word_chunk = ""
196
 
197
+ for word in words:
198
+ test_word_chunk = f"{word_chunk} {word}" if word_chunk else word
 
199
  if len(test_word_chunk) <= max_chars:
200
  word_chunk = test_word_chunk
201
  else:
202
  if word_chunk:
203
+ chunks.append(word_chunk.strip())
 
 
 
 
 
 
 
 
204
  word_chunk = word
205
 
206
  if word_chunk:
207
  current_chunk = word_chunk
208
+ else:
209
+ current_chunk = part
210
+
211
+ # Don't forget last chunk
212
+ if current_chunk:
213
+ chunks.append(current_chunk.strip())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
+ # Filter empty chunks
216
+ return tuple(chunk for chunk in chunks if chunk.strip())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
 
219
  def process_audio_segment_fast(audio_data):
220
  """
221
+ Fast audio processing in separate thread with ordering preserved.
222
  Input: (audio_file, chunk_index)
223
  Output: (segment, chunk_index)
224
  """
225
  audio_file, chunk_index = audio_data
 
226
 
227
  try:
228
  if not audio_file or not os.path.exists(audio_file):
229
  return None, chunk_index
 
 
230
 
231
+ segment = AudioSegment.from_file(audio_file)
232
+ segment = normalize(segment)
 
233
 
234
+ # Only strip silence for longer segments
235
+ if len(segment) > 200:
236
  try:
237
+ segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
238
+ except:
239
+ pass # Skip if fails
 
 
 
 
240
 
241
  return segment, chunk_index
242
 
243
  except Exception as e:
244
+ print(f"Warning: Error processing audio segment {chunk_index}: {e}")
245
  return None, chunk_index
246
 
247
 
248
+ async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=5):
249
  """
250
+ Ultra-optimized bilingual TTS with parallel processing.
251
+ Reduced max_concurrent to 5 for better rate limit compliance.
252
  """
253
+ print("Starting optimized bilingual TTS processing...")
 
 
254
 
255
  try:
256
+ chunks = smart_text_chunking(text, max_chars=250)
 
 
 
 
 
 
257
  if not chunks:
258
+ print("Error: No valid text chunks after cleaning")
259
  return None
260
 
261
+ print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
 
 
262
 
263
+ is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
 
 
 
 
 
 
264
 
265
+ # Semaphore to limit concurrent TTS requests (prevents rate limiting)
266
  semaphore = asyncio.Semaphore(max_concurrent)
267
 
268
+ # Prepare all tasks with index tracking
269
+ tasks = []
270
+ for i, chunk in enumerate(chunks):
271
+ is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
272
+ voice = VOICE_TA if (is_bilingual_tamil and is_tamil) else (VOICE_TA or VOICE_EN)
273
+ tasks.append(generate_safe_audio(chunk, voice, semaphore, i))
274
 
275
+ # Generate all audio files concurrently
276
  results = await asyncio.gather(*tasks, return_exceptions=True)
277
 
278
+ # Filter successful files and maintain order
279
+ audio_data = []
280
+ for result in results:
281
+ if isinstance(result, tuple) and result[0] and os.path.exists(result[0]):
282
+ audio_data.append(result)
 
283
 
284
+ if not audio_data:
285
+ print("Error: No audio was successfully generated")
286
  return None
287
 
288
  # Sort by chunk index to guarantee correct order
289
+ audio_data.sort(key=lambda x: x[1])
290
 
291
+ print(f"Successfully generated {len(audio_data)}/{len(chunks)} audio segments")
292
 
293
+ # Process audio segments in parallel using ThreadPoolExecutor
294
+ with ThreadPoolExecutor(max_workers=min(len(audio_data), 8)) as executor:
295
+ processed = list(executor.map(process_audio_segment_fast, audio_data))
296
 
297
+ # Filter out None segments and sort by index
298
  processed = [(seg, idx) for seg, idx in processed if seg is not None]
299
  processed.sort(key=lambda x: x[1])
300
 
301
  audio_segments = [seg for seg, idx in processed]
302
 
303
  if not audio_segments:
304
+ print("Error: No audio segments were successfully processed")
305
  return None
306
 
307
+ print(f"Processed {len(audio_segments)} segments in correct order")
308
 
309
+ # Merge audio segments (fast concatenation)
310
  print("Merging audio segments...")
311
  merged_audio = audio_segments[0]
312
+ pause = AudioSegment.silent(duration=180) # Slightly shorter pause for smoother flow
313
 
314
+ for segment in audio_segments[1:]:
315
  merged_audio += pause + segment
316
 
317
+ # Apply final processing (compression and normalization)
318
  print("Applying final audio processing...")
 
 
319
  merged_audio = merged_audio.compress_dynamic_range(
320
+ threshold=-20.0,
321
+ ratio=4.0,
322
+ attack=5.0,
323
  release=50.0
324
  )
325
+ merged_audio = normalize(merged_audio)
326
 
327
+ # Export with high quality
 
 
 
328
  merged_audio.export(output_file, format="mp3", bitrate="192k")
329
  print(f"✅ Audio successfully generated: {output_file}")
 
 
330
 
331
  return output_file
332
 
333
  except Exception as main_error:
334
+ print(f"Main error in bilingual TTS: {main_error}")
335
  traceback.print_exc()
336
  return None
337
 
338
 
339
  async def generate_tts_optimized(id, lines, lang):
340
+ """Optimized TTS generation function."""
341
+ voice = {
342
  "English": "en-US-JennyNeural",
343
  "Tamil": "ta-IN-PallaviNeural",
344
  "Hindi": "hi-IN-SwaraNeural",
 
376
  audio_name = f"audio{id}.mp3"
377
  audio_path = os.path.join(AUDIO_DIR, audio_name)
378
 
 
379
  if "&&&" in lang:
380
+ listf = lang.split("&&&")
381
+ text = listf[0].strip()
382
+ lang_name = listf[1].strip() if len(listf) > 1 else "English"
383
+ voice_to_use = voice.get(lang_name, VOICE_EN)
384
  else:
385
+ text = lines[id] if isinstance(lines, (list, tuple)) and id < len(lines) else str(lines)
386
+ voice_to_use = voice.get(lang, VOICE_EN)
 
 
 
387
 
388
+ # Use max_concurrent=5 for better rate limit handling
389
+ output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=5)
390
 
391
  if output and os.path.exists(audio_path):
392
  try:
 
394
  duration = audio.info.length
395
  return duration, audio_path
396
  except Exception as e:
397
+ print(f"Error reading audio file: {e}")
398
  return None, None
399
 
400
  return None, None
 
410
  finally:
411
  loop.close()
412
  except Exception as e:
413
+ print(f"Error in audio_func: {e}")
414
  traceback.print_exc()
415
  return None, None
416
 
417
 
418
 
 
 
419
  def create_manim_script(problem_data, script_path, audio_path, scale=1):
420
  """Generate Manim script from problem data with robust wrapping."""
421