sreepathi-ravikumar commited on
Commit
0bb2b49
·
verified ·
1 Parent(s): 13b333e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +223 -145
app.py CHANGED
@@ -35,9 +35,6 @@ os.makedirs(AUDIO_DIR, exist_ok=True)
35
  API_KEY = "rkmentormindzofficaltokenkey12345"
36
 
37
 
38
-
39
-
40
-
41
  import os
42
  import re
43
  import html
@@ -47,8 +44,10 @@ import tempfile
47
  import traceback
48
  import random
49
  import hashlib
 
50
  from concurrent.futures import ThreadPoolExecutor
51
  from functools import lru_cache
 
52
 
53
  import edge_tts
54
  from pydub import AudioSegment
@@ -62,166 +61,227 @@ VOICE_EN = "en-IN-NeerjaNeural"
62
  AUDIO_DIR = os.path.join(os.getcwd(), "audio")
63
  os.makedirs(AUDIO_DIR, exist_ok=True)
64
 
65
- # Pre-compiled regex patterns for speed (compiled once, reused many times)
66
  URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
67
- TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
68
  BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
69
  SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
70
  WHITESPACE_PATTERN = re.compile(r'\s+')
71
- # More conservative sentence splitting to avoid breaking mid-word
72
  SENTENCE_PATTERN = re.compile(r'(?<=[.!?।॥])\s+(?=[A-ZА-ЯА-Я\u0B80-\u0BFF\u0900-\u097F])')
73
- # Avoid splitting on colons that are part of numbers (like time 5:30)
74
- SUB_PATTERN = re.compile(r'(?<=[,;])\s+')
75
 
 
 
76
 
77
- @lru_cache(maxsize=1024)
78
- def clean_text_for_tts(text):
79
- """Cleans text before TTS with optimized regex and caching."""
80
  if not text:
81
  return ""
 
82
  text = str(text).strip()
83
  text = html.unescape(text)
84
 
85
- # Use pre-compiled patterns (much faster)
86
  text = URL_PATTERN.sub('', text)
 
 
87
  text = TAG_PATTERN.sub('', text)
 
 
88
  text = BRACKET_PATTERN.sub('', text)
 
 
89
  text = SPECIAL_CHAR_PATTERN.sub('', text)
90
- text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
91
 
92
- # Batch remove keywords (faster than multiple re.sub calls)
93
- # But only if they appear as standalone words or in SSML context
94
- for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
95
- # Remove only if surrounded by whitespace or special chars (not part of words)
96
- text = re.sub(rf'\b{keyword}\b', '', text, flags=re.IGNORECASE)
97
 
98
- # Use NFC normalization instead of NFKD to preserve Tamil/Indic characters better
99
  text = unicodedata.normalize('NFC', text)
 
 
100
  text = WHITESPACE_PATTERN.sub(' ', text)
 
101
  return text.strip()
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- async def generate_safe_audio(text, voice, semaphore, chunk_index):
105
- """Generate clean audio with rate limiting, caching, and retry logic."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  # Create deterministic cache key
107
- cache_key = f"{text}_{voice}"
108
- text_hash = hashlib.md5(cache_key.encode('utf-8')).hexdigest()
109
  cache_filename = os.path.join(AUDIO_DIR, f"cache_{text_hash}.mp3")
110
 
111
- # Check disk cache first
112
  if os.path.exists(cache_filename) and os.path.getsize(cache_filename) > 1024:
113
  return cache_filename, chunk_index
114
 
115
- async with semaphore: # Limit concurrent TTS requests
116
- cleaned_text = clean_text_for_tts(text)
117
- if not cleaned_text or len(cleaned_text) < 2:
118
- return None, chunk_index
119
-
120
- # Retry configuration
121
  max_retries = 3
122
  base_delay = 2.0
123
 
124
  for attempt in range(max_retries):
125
  try:
126
- comm = edge_tts.Communicate(cleaned_text, voice=voice)
127
- await comm.save(cache_filename)
 
128
 
129
- # Verify file was created successfully
130
- if os.path.exists(cache_filename) and os.path.getsize(cache_filename) > 1024:
 
 
 
 
 
131
  return cache_filename, chunk_index
 
 
 
 
 
 
 
132
 
133
  except Exception as e:
 
 
 
 
 
 
 
134
  if attempt == max_retries - 1:
135
  print(f"Failed to generate audio chunk {chunk_index} after {max_retries} attempts: {e}")
136
  return None, chunk_index
137
 
138
- # Exponential backoff with jitter to avoid thundering herd
139
  sleep_time = (base_delay * (2 ** attempt)) + random.uniform(0.1, 1.0)
140
  print(f"Rate limit hit on chunk {chunk_index}. Retrying in {sleep_time:.2f}s...")
141
  await asyncio.sleep(sleep_time)
142
 
143
  return None, chunk_index
144
 
145
-
146
- @lru_cache(maxsize=256)
147
- def smart_text_chunking(text, max_chars=250):
148
- """
149
- Cached text chunking with improved algorithm to preserve word order and context.
150
- Increased max_chars to reduce total number of API calls.
151
- """
152
- text = clean_text_for_tts(text)
153
- if not text:
154
- return tuple() # Return tuple for hashability (required by lru_cache)
155
-
156
- # Protect common abbreviations
157
- text = re.sub(r'\b(Dr|Mr|Mrs|Ms|Prof|Sr|Jr)\.\s', r'\1<<DOT>> ', text)
158
-
159
- sentences = SENTENCE_PATTERN.split(text)
160
- chunks = []
161
-
162
- for sentence in sentences:
163
- sentence = sentence.strip()
164
- if not sentence:
165
- continue
166
-
167
- # Restore protected periods
168
- sentence = sentence.replace('<<DOT>>', '.')
169
-
170
- if len(sentence) <= max_chars:
171
- chunks.append(sentence)
172
- else:
173
- # Try splitting on commas/semicolons first
174
- sub_parts = SUB_PATTERN.split(sentence)
175
- current_chunk = ""
176
-
177
- for part in sub_parts:
178
- part = part.strip()
179
- if not part:
180
- continue
181
-
182
- # Try to add to current chunk
183
- test_chunk = f"{current_chunk}, {part}" if current_chunk else part
184
-
185
- if len(test_chunk) <= max_chars:
186
- current_chunk = test_chunk
187
- else:
188
- # Save current chunk if exists
189
- if current_chunk:
190
- chunks.append(current_chunk.strip())
191
-
192
- # If part itself is too long, split by words
193
- if len(part) > max_chars:
194
- words = part.split()
195
- word_chunk = ""
196
-
197
- for word in words:
198
- test_word_chunk = f"{word_chunk} {word}" if word_chunk else word
199
- if len(test_word_chunk) <= max_chars:
200
- word_chunk = test_word_chunk
201
- else:
202
- if word_chunk:
203
- chunks.append(word_chunk.strip())
204
- word_chunk = word
205
-
206
- if word_chunk:
207
- current_chunk = word_chunk
208
- else:
209
- current_chunk = part
210
-
211
- # Don't forget last chunk
212
- if current_chunk:
213
- chunks.append(current_chunk.strip())
214
-
215
- # Filter empty chunks
216
- return tuple(chunk for chunk in chunks if chunk.strip())
217
-
218
-
219
- def process_audio_segment_fast(audio_data):
220
- """
221
- Fast audio processing in separate thread with ordering preserved.
222
- Input: (audio_file, chunk_index)
223
- Output: (segment, chunk_index)
224
- """
225
  audio_file, chunk_index = audio_data
226
 
227
  try:
@@ -236,23 +296,24 @@ def process_audio_segment_fast(audio_data):
236
  try:
237
  segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
238
  except:
239
- pass # Skip if fails
240
 
241
  return segment, chunk_index
242
 
243
  except Exception as e:
244
  print(f"Warning: Error processing audio segment {chunk_index}: {e}")
245
  return None, chunk_index
 
 
 
246
 
247
-
248
- async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=5):
249
- """
250
- Ultra-optimized bilingual TTS with parallel processing.
251
- Reduced max_concurrent to 5 for better rate limit compliance.
252
- """
253
  print("Starting optimized bilingual TTS processing...")
254
 
255
  try:
 
256
  chunks = smart_text_chunking(text, max_chars=250)
257
  if not chunks:
258
  print("Error: No valid text chunks after cleaning")
@@ -260,26 +321,37 @@ async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None,
260
 
261
  print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
262
 
 
263
  is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
 
264
 
265
- # Semaphore to limit concurrent TTS requests (prevents rate limiting)
 
 
 
266
  semaphore = asyncio.Semaphore(max_concurrent)
267
 
268
- # Prepare all tasks with index tracking
269
  tasks = []
270
  for i, chunk in enumerate(chunks):
271
- is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
272
- voice = VOICE_TA if (is_bilingual_tamil and is_tamil) else (VOICE_TA or VOICE_EN)
 
 
 
 
273
  tasks.append(generate_safe_audio(chunk, voice, semaphore, i))
274
 
275
  # Generate all audio files concurrently
276
- results = await asyncio.gather(*tasks, return_exceptions=True)
277
 
278
- # Filter successful files and maintain order
279
  audio_data = []
280
  for result in results:
281
  if isinstance(result, tuple) and result[0] and os.path.exists(result[0]):
282
  audio_data.append(result)
 
 
283
 
284
  if not audio_data:
285
  print("Error: No audio was successfully generated")
@@ -290,7 +362,7 @@ async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None,
290
 
291
  print(f"Successfully generated {len(audio_data)}/{len(chunks)} audio segments")
292
 
293
- # Process audio segments in parallel using ThreadPoolExecutor
294
  with ThreadPoolExecutor(max_workers=min(len(audio_data), 8)) as executor:
295
  processed = list(executor.map(process_audio_segment_fast, audio_data))
296
 
@@ -306,37 +378,45 @@ async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None,
306
 
307
  print(f"Processed {len(audio_segments)} segments in correct order")
308
 
309
- # Merge audio segments (fast concatenation)
310
  print("Merging audio segments...")
311
  merged_audio = audio_segments[0]
312
- pause = AudioSegment.silent(duration=180) # Slightly shorter pause for smoother flow
313
 
314
  for segment in audio_segments[1:]:
315
  merged_audio += pause + segment
316
 
317
- # Apply final processing (compression and normalization)
318
  print("Applying final audio processing...")
319
- merged_audio = merged_audio.compress_dynamic_range(
320
- threshold=-20.0,
321
- ratio=4.0,
322
- attack=5.0,
323
- release=50.0
324
- )
 
 
 
 
325
  merged_audio = normalize(merged_audio)
326
 
327
  # Export with high quality
328
  merged_audio.export(output_file, format="mp3", bitrate="192k")
329
- print(f"✅ Audio successfully generated: {output_file}")
330
 
331
- return output_file
 
 
 
 
 
 
332
 
333
  except Exception as main_error:
334
  print(f"Main error in bilingual TTS: {main_error}")
335
  traceback.print_exc()
336
  return None
337
 
338
-
339
- async def generate_tts_optimized(id, lines, lang):
340
  """Optimized TTS generation function."""
341
  voice = {
342
  "English": "en-US-JennyNeural",
@@ -399,8 +479,7 @@ async def generate_tts_optimized(id, lines, lang):
399
 
400
  return None, None
401
 
402
-
403
- def audio_func(id, lines, lang):
404
  """Synchronous wrapper for audio generation."""
405
  try:
406
  loop = asyncio.new_event_loop()
@@ -415,7 +494,6 @@ def audio_func(id, lines, lang):
415
  return None, None
416
 
417
 
418
-
419
  def create_manim_script(problem_data, script_path, audio_path, scale=1):
420
  """Generate Manim script from problem data with robust wrapping."""
421
 
 
35
  API_KEY = "rkmentormindzofficaltokenkey12345"
36
 
37
 
 
 
 
38
  import os
39
  import re
40
  import html
 
44
  import traceback
45
  import random
46
  import hashlib
47
+ import json
48
  from concurrent.futures import ThreadPoolExecutor
49
  from functools import lru_cache
50
+ from typing import List, Tuple, Optional
51
 
52
  import edge_tts
53
  from pydub import AudioSegment
 
61
  AUDIO_DIR = os.path.join(os.getcwd(), "audio")
62
  os.makedirs(AUDIO_DIR, exist_ok=True)
63
 
64
+ # Pre-compiled regex patterns
65
  URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
66
+ TAG_PATTERN = re.compile(r'<[^>]*>')
67
  BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
68
  SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
69
  WHITESPACE_PATTERN = re.compile(r'\s+')
70
+ # Improved sentence splitting - more conservative
71
  SENTENCE_PATTERN = re.compile(r'(?<=[.!?।॥])\s+(?=[A-ZА-ЯА-Я\u0B80-\u0BFF\u0900-\u097F])')
72
+ # Avoid splitting on commas in numbers
73
+ SUB_PATTERN = re.compile(r'(?<!\d),(?!\d)\s*')
74
 
75
+ # Cache for chunking results
76
+ _chunking_cache = {}
77
 
78
+ def clean_text_for_tts(text: str) -> str:
79
+ """Cleans text before TTS with proper Unicode handling."""
 
80
  if not text:
81
  return ""
82
+
83
  text = str(text).strip()
84
  text = html.unescape(text)
85
 
86
+ # Remove URLs
87
  text = URL_PATTERN.sub('', text)
88
+
89
+ # Remove HTML/XML tags but preserve content
90
  text = TAG_PATTERN.sub('', text)
91
+
92
+ # Remove brackets
93
  text = BRACKET_PATTERN.sub('', text)
94
+
95
+ # Remove special characters but preserve punctuation needed for TTS
96
  text = SPECIAL_CHAR_PATTERN.sub('', text)
 
97
 
98
+ # Replace newlines/tabs with spaces
99
+ text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
 
 
 
100
 
101
+ # Use NFC normalization to preserve Tamil/Indic characters
102
  text = unicodedata.normalize('NFC', text)
103
+
104
+ # Collapse multiple whitespace
105
  text = WHITESPACE_PATTERN.sub(' ', text)
106
+
107
  return text.strip()
108
 
109
+ def _protect_special_patterns(text: str) -> str:
110
+ """Protect numbers with commas and abbreviations from being split."""
111
+ # Protect numbers with commas: 1,234 -> 1<<COMMA>>234
112
+ text = re.sub(r'(\d),(\d)', r'\1<<COMMA>>\2', text)
113
+
114
+ # Protect common abbreviations
115
+ abbreviations = ['Dr', 'Mr', 'Mrs', 'Ms', 'Prof', 'Sr', 'Jr', 'St', 'etc', 'vs', 'approx', 'no']
116
+ for abbr in abbreviations:
117
+ text = re.sub(rf'\b{abbr}\.(\s|$)', rf'{abbr}<<DOT>>\1', text, flags=re.IGNORECASE)
118
+
119
+ # Protect currency symbols with numbers: $1,234.50 -> <<CURR>>1<<COMMA>>234<<DOT>>50
120
+ text = re.sub(r'([$€£¥])(\d[\d,.]*\d)', r'<<CURR>>\2', text)
121
+
122
+ return text
123
+
124
+ def _restore_special_patterns(text: str) -> str:
125
+ """Restore protected patterns."""
126
+ text = text.replace('<<COMMA>>', ',')
127
+ text = text.replace('<<DOT>>', '.')
128
+ text = text.replace('<<CURR>>', '$')
129
+ return text
130
 
131
+ def smart_text_chunking(text: str, max_chars: int = 250) -> Tuple[str, ...]:
132
+ """
133
+ Deterministic text chunking with overlap and pattern protection.
134
+ Returns the same chunks for the same input always.
135
+ """
136
+ if not text:
137
+ return tuple()
138
+
139
+ # Create cache key
140
+ cache_key = hashlib.md5(f"{text}_{max_chars}".encode()).hexdigest()
141
+ if cache_key in _chunking_cache:
142
+ return _chunking_cache[cache_key]
143
+
144
+ cleaned = clean_text_for_tts(text)
145
+ if not cleaned:
146
+ return tuple()
147
+
148
+ # Protect special patterns before splitting
149
+ protected = _protect_special_patterns(cleaned)
150
+
151
+ # Initial sentence splitting
152
+ sentences = []
153
+ for sentence in SENTENCE_PATTERN.split(protected):
154
+ sentence = sentence.strip()
155
+ if sentence:
156
+ sentences.append(sentence)
157
+
158
+ chunks = []
159
+ current_chunk = ""
160
+ overlap_words = []
161
+
162
+ for sentence in sentences:
163
+ sentence = sentence.strip()
164
+ if not sentence:
165
+ continue
166
+
167
+ # Try adding sentence to current chunk
168
+ test_chunk = f"{current_chunk} {sentence}" if current_chunk else sentence
169
+ test_chunk = test_chunk.strip()
170
+
171
+ if len(test_chunk) <= max_chars:
172
+ current_chunk = test_chunk
173
+ else:
174
+ # Need to split current sentence
175
+ if current_chunk:
176
+ # Add overlap from previous chunk
177
+ if overlap_words:
178
+ overlap_text = " ".join(overlap_words)
179
+ current_chunk = f"{overlap_text} {current_chunk}"
180
+ overlap_words = []
181
+
182
+ chunks.append(current_chunk)
183
+
184
+ # If sentence itself is too long, split by words
185
+ if len(sentence) > max_chars:
186
+ words = sentence.split()
187
+ temp_chunk = ""
188
+
189
+ for word in words:
190
+ test = f"{temp_chunk} {word}" if temp_chunk else word
191
+ if len(test) <= max_chars:
192
+ temp_chunk = test
193
+ else:
194
+ if temp_chunk:
195
+ # Save last 5 words for overlap
196
+ last_words = temp_chunk.split()[-5:]
197
+ overlap_words = last_words.copy()
198
+ chunks.append(temp_chunk)
199
+ temp_chunk = word
200
+
201
+ if temp_chunk:
202
+ current_chunk = temp_chunk
203
+ else:
204
+ current_chunk = sentence
205
+
206
+ # Add final chunk
207
+ if current_chunk:
208
+ if overlap_words:
209
+ overlap_text = " ".join(overlap_words)
210
+ current_chunk = f"{overlap_text} {current_chunk}"
211
+ chunks.append(current_chunk)
212
+
213
+ # Restore protected patterns and filter empty chunks
214
+ result_chunks = []
215
+ for chunk in chunks:
216
+ restored = _restore_special_patterns(chunk)
217
+ if restored.strip():
218
+ result_chunks.append(restored)
219
+
220
+ result = tuple(result_chunks)
221
+ _chunking_cache[cache_key] = result
222
+ return result
223
+
224
+ async def generate_safe_audio(text: str, voice: str, semaphore: asyncio.Semaphore,
225
+ chunk_index: int) -> Tuple[Optional[str], int]:
226
+ """Generate audio with rate limiting, caching, retry logic, and order preservation."""
227
+ if not text or len(text) < 2:
228
+ return None, chunk_index
229
+
230
  # Create deterministic cache key
231
+ text_hash = hashlib.md5(f"{text}_{voice}".encode()).hexdigest()
 
232
  cache_filename = os.path.join(AUDIO_DIR, f"cache_{text_hash}.mp3")
233
 
234
+ # Check disk cache
235
  if os.path.exists(cache_filename) and os.path.getsize(cache_filename) > 1024:
236
  return cache_filename, chunk_index
237
 
238
+ async with semaphore:
 
 
 
 
 
239
  max_retries = 3
240
  base_delay = 2.0
241
 
242
  for attempt in range(max_retries):
243
  try:
244
+ # Create temp file for generation
245
+ with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp:
246
+ temp_filename = tmp.name
247
 
248
+ comm = edge_tts.Communicate(text, voice=voice)
249
+ await comm.save(temp_filename)
250
+
251
+ # Verify successful generation
252
+ if os.path.exists(temp_filename) and os.path.getsize(temp_filename) > 1024:
253
+ # Move to cache location
254
+ os.replace(temp_filename, cache_filename)
255
  return cache_filename, chunk_index
256
+ else:
257
+ # Clean up temp file
258
+ try:
259
+ if os.path.exists(temp_filename):
260
+ os.unlink(temp_filename)
261
+ except:
262
+ pass
263
 
264
  except Exception as e:
265
+ # Clean up temp file on error
266
+ try:
267
+ if os.path.exists(temp_filename):
268
+ os.unlink(temp_filename)
269
+ except:
270
+ pass
271
+
272
  if attempt == max_retries - 1:
273
  print(f"Failed to generate audio chunk {chunk_index} after {max_retries} attempts: {e}")
274
  return None, chunk_index
275
 
276
+ # Exponential backoff with jitter
277
  sleep_time = (base_delay * (2 ** attempt)) + random.uniform(0.1, 1.0)
278
  print(f"Rate limit hit on chunk {chunk_index}. Retrying in {sleep_time:.2f}s...")
279
  await asyncio.sleep(sleep_time)
280
 
281
  return None, chunk_index
282
 
283
+ def process_audio_segment_fast(audio_data: Tuple[str, int]) -> Tuple[Optional[AudioSegment], int]:
284
+ """Process audio segment with proper cleanup and order preservation."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  audio_file, chunk_index = audio_data
286
 
287
  try:
 
296
  try:
297
  segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
298
  except:
299
+ pass
300
 
301
  return segment, chunk_index
302
 
303
  except Exception as e:
304
  print(f"Warning: Error processing audio segment {chunk_index}: {e}")
305
  return None, chunk_index
306
+ finally:
307
+ # Note: We don't delete cache files as they're reused
308
+ pass
309
 
310
+ async def bilingual_tts_optimized(text: str, output_file: str = "audio0.mp3",
311
+ VOICE_TA: Optional[str] = None, max_concurrent: int = 5) -> Optional[str]:
312
+ """Optimized bilingual TTS with parallel processing and order preservation."""
 
 
 
313
  print("Starting optimized bilingual TTS processing...")
314
 
315
  try:
316
+ # Get chunks deterministically
317
  chunks = smart_text_chunking(text, max_chars=250)
318
  if not chunks:
319
  print("Error: No valid text chunks after cleaning")
 
321
 
322
  print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
323
 
324
+ # Detect language once for entire text
325
  is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
326
+ has_tamil_chars = any('\u0B80' <= char <= '\u0BFF' for char in text)
327
 
328
+ # Choose default voice
329
+ default_voice = VOICE_TA if (is_bilingual_tamil and has_tamil_chars) else (VOICE_TA or VOICE_EN)
330
+
331
+ # Semaphore for rate limiting
332
  semaphore = asyncio.Semaphore(max_concurrent)
333
 
334
+ # Prepare tasks with indices
335
  tasks = []
336
  for i, chunk in enumerate(chunks):
337
+ # Use Tamil voice only if chunk contains Tamil characters AND we have Tamil voice
338
+ if is_bilingual_tamil and any('\u0B80' <= char <= '\u0BFF' for char in chunk):
339
+ voice = VOICE_TA
340
+ else:
341
+ voice = default_voice
342
+
343
  tasks.append(generate_safe_audio(chunk, voice, semaphore, i))
344
 
345
  # Generate all audio files concurrently
346
+ results = await asyncio.gather(*tasks, return_exceptions=False)
347
 
348
+ # Filter successful results and maintain order
349
  audio_data = []
350
  for result in results:
351
  if isinstance(result, tuple) and result[0] and os.path.exists(result[0]):
352
  audio_data.append(result)
353
+ elif result is not None:
354
+ print(f"Warning: Got unexpected result type: {type(result)}")
355
 
356
  if not audio_data:
357
  print("Error: No audio was successfully generated")
 
362
 
363
  print(f"Successfully generated {len(audio_data)}/{len(chunks)} audio segments")
364
 
365
+ # Process audio segments in parallel
366
  with ThreadPoolExecutor(max_workers=min(len(audio_data), 8)) as executor:
367
  processed = list(executor.map(process_audio_segment_fast, audio_data))
368
 
 
378
 
379
  print(f"Processed {len(audio_segments)} segments in correct order")
380
 
381
+ # Merge audio segments with smooth transitions
382
  print("Merging audio segments...")
383
  merged_audio = audio_segments[0]
384
+ pause = AudioSegment.silent(duration=150) # Shorter pause for smoother flow
385
 
386
  for segment in audio_segments[1:]:
387
  merged_audio += pause + segment
388
 
389
+ # Apply final processing
390
  print("Applying final audio processing...")
391
+ try:
392
+ merged_audio = merged_audio.compress_dynamic_range(
393
+ threshold=-20.0,
394
+ ratio=4.0,
395
+ attack=5.0,
396
+ release=50.0
397
+ )
398
+ except:
399
+ pass # Skip if compression fails
400
+
401
  merged_audio = normalize(merged_audio)
402
 
403
  # Export with high quality
404
  merged_audio.export(output_file, format="mp3", bitrate="192k")
 
405
 
406
+ # Verify output
407
+ if os.path.exists(output_file) and os.path.getsize(output_file) > 1024:
408
+ print(f"✅ Audio successfully generated: {output_file}")
409
+ return output_file
410
+ else:
411
+ print(f"Error: Generated file is empty or missing: {output_file}")
412
+ return None
413
 
414
  except Exception as main_error:
415
  print(f"Main error in bilingual TTS: {main_error}")
416
  traceback.print_exc()
417
  return None
418
 
419
+ async def generate_tts_optimized(id: int, lines, lang: str) -> Tuple[Optional[float], Optional[str]]:
 
420
  """Optimized TTS generation function."""
421
  voice = {
422
  "English": "en-US-JennyNeural",
 
479
 
480
  return None, None
481
 
482
+ def audio_func(id: int, lines, lang: str) -> Tuple[Optional[float], Optional[str]]:
 
483
  """Synchronous wrapper for audio generation."""
484
  try:
485
  loop = asyncio.new_event_loop()
 
494
  return None, None
495
 
496
 
 
497
  def create_manim_script(problem_data, script_path, audio_path, scale=1):
498
  """Generate Manim script from problem data with robust wrapping."""
499