sreepathi-ravikumar commited on
Commit
29df12a
·
verified ·
1 Parent(s): c10b63b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -182
app.py CHANGED
@@ -34,19 +34,20 @@ os.makedirs(AUDIO_DIR, exist_ok=True)
34
  # API Key for security (optional)
35
  API_KEY = "rkmentormindzofficaltokenkey12345"
36
 
37
-
38
  import asyncio
39
  import html
40
  import logging
41
  import os
 
42
  import tempfile
43
  import unicodedata
44
  from concurrent.futures import ThreadPoolExecutor
45
  from functools import lru_cache
46
  from pathlib import Path
47
- from typing import Optional, Tuple, List, Union
48
 
49
  import edge_tts
 
50
  from pydub import AudioSegment
51
  from pydub.effects import normalize
52
  from mutagen.mp3 import MP3
@@ -62,6 +63,8 @@ logging.basicConfig(
62
  )
63
  logger = logging.getLogger(__name__)
64
 
 
 
65
  # Configuration
66
  class TTSConfig:
67
  """Production configuration for TTS system."""
@@ -69,18 +72,17 @@ class TTSConfig:
69
  MAX_CONCURRENT: int = int(os.getenv('MAX_CONCURRENT_TTS', '10'))
70
  MAX_CHARS_PER_CHUNK: int = int(os.getenv('MAX_CHARS_PER_CHUNK', '80'))
71
  PAUSE_DURATION_MS: int = int(os.getenv('PAUSE_DURATION_MS', '200'))
72
- CROSSFADE_MS: int = int(os.getenv('CROSSFADE_MS', '30')) # For smooth transitions
73
  BITRATE: str = os.getenv('AUDIO_BITRATE', '192k')
74
  VOICE_EN: str = os.getenv('VOICE_EN', 'en-IN-NeerjaNeural')
75
- VOICE_TA: Optional[str] = os.getenv('VOICE_TA') # Optional for bilingual
76
 
77
  def __post_init__(self):
78
  os.makedirs(self.AUDIO_DIR, exist_ok=True)
79
 
80
  config = TTSConfig()
81
 
82
- # Pre-compiled regex patterns for performance
83
- import re
84
  URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
85
  TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
86
  BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
@@ -89,22 +91,58 @@ WHITESPACE_PATTERN = re.compile(r'\s+')
89
  SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
90
  SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  @lru_cache(maxsize=1024)
93
  def clean_text_for_tts(text: str) -> str:
94
- """Cleans text before TTS with optimized regex and caching."""
95
  if not text:
96
  return ""
97
  text = str(text).strip()
 
98
  text = html.unescape(text)
99
 
100
- # Apply pre-compiled patterns
101
  text = URL_PATTERN.sub('', text)
102
  text = TAG_PATTERN.sub('', text)
103
  text = BRACKET_PATTERN.sub('', text)
 
 
104
  text = SPECIAL_CHAR_PATTERN.sub('', text)
105
  text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
106
 
107
- # Batch remove keywords
108
  for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
109
  text = text.replace(keyword, '').replace(keyword.upper(), '')
110
 
@@ -112,12 +150,14 @@ def clean_text_for_tts(text: str) -> str:
112
  text = WHITESPACE_PATTERN.sub(' ', text)
113
  return text.strip()
114
 
 
 
115
  async def generate_safe_audio(text: str, voice: str, semaphore: asyncio.Semaphore) -> Optional[str]:
116
  """Generate clean audio with rate limiting and error handling."""
117
  async with semaphore:
118
  cleaned_text = clean_text_for_tts(text)
119
  if not cleaned_text:
120
- logger.warning("Empty cleaned text, skipping audio generation.")
121
  return None
122
 
123
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3', dir=config.AUDIO_DIR)
@@ -127,10 +167,10 @@ async def generate_safe_audio(text: str, voice: str, semaphore: asyncio.Semaphor
127
  try:
128
  comm = edge_tts.Communicate(cleaned_text, voice=voice)
129
  await comm.save(fname)
130
- logger.debug(f"Audio generated successfully: {fname}")
131
  return fname
132
  except Exception as e:
133
- logger.error(f"Error generating audio for text '{text[:50]}...': {e}")
134
  if os.path.exists(fname):
135
  os.unlink(fname)
136
  return None
@@ -139,8 +179,9 @@ async def generate_safe_audio(text: str, voice: str, semaphore: asyncio.Semaphor
139
  def smart_text_chunking(text: str, max_chars: int = None) -> Tuple[str, ...]:
140
  """Cached text chunking for speed with bilingual awareness."""
141
  max_chars = max_chars or config.MAX_CHARS_PER_CHUNK
142
- text = clean_text_for_tts(text)
143
- if not text:
 
144
  return tuple()
145
 
146
  sentences = SENTENCE_PATTERN.split(text)
@@ -148,16 +189,17 @@ def smart_text_chunking(text: str, max_chars: int = None) -> Tuple[str, ...]:
148
 
149
  for sentence in sentences:
150
  sentence = sentence.strip()
151
- if not sentence:
152
  continue
153
 
154
  if len(sentence) <= max_chars:
155
  chunks.append(sentence)
156
  else:
 
157
  sub_parts = SUB_PATTERN.split(sentence)
158
  for part in sub_parts:
159
  part = part.strip()
160
- if not part:
161
  continue
162
 
163
  if len(part) <= max_chars:
@@ -170,48 +212,21 @@ def smart_text_chunking(text: str, max_chars: int = None) -> Tuple[str, ...]:
170
  if len(test_chunk) <= max_chars:
171
  current_chunk = test_chunk
172
  else:
173
- if current_chunk:
174
  chunks.append(current_chunk.strip())
175
  current_chunk = word
176
- if current_chunk:
177
  chunks.append(current_chunk.strip())
178
 
179
- return tuple(chunk for chunk in chunks if chunk.strip())
 
 
 
180
 
181
  def process_audio_segment_fast(audio_file: str, crossfade_ms: int = None) -> Optional[AudioSegment]:
182
- """Fast audio processing in separate thread with crossfade prep."""
183
- crossfade_ms = crossfade_ms or config.CROSSFADE_MS
184
- try:
185
- segment = AudioSegment.from_file(audio_file)
186
- segment = normalize(segment)
187
-
188
- # Strip silence conditionally
189
- if len(segment) > 200:
190
- try:
191
- segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
192
- except Exception as e:
193
- logger.warning(f"Silence stripping failed: {e}")
194
-
195
- # Add micro-padding for crossfade safety
196
- silence_start = AudioSegment.silent(duration=50)
197
- silence_end = AudioSegment.silent(duration=50)
198
- segment = silence_start + segment + silence_end
199
-
200
- # Pre-apply crossfade to ends for smoother merging
201
- if len(segment) > crossfade_ms * 2:
202
- segment = segment.fade_in(crossfade_ms).fade_out(crossfade_ms)
203
-
204
- return segment
205
- except Exception as e:
206
- logger.error(f"Error processing audio segment {audio_file}: {e}")
207
- return None
208
- finally:
209
- # Cleanup temp file
210
- try:
211
- if os.path.exists(audio_file):
212
- os.unlink(audio_file)
213
- except Exception as e:
214
- logger.warning(f"Failed to cleanup {audio_file}: {e}")
215
 
216
  async def bilingual_tts_optimized(
217
  text: str,
@@ -219,156 +234,61 @@ async def bilingual_tts_optimized(
219
  voice_ta: Optional[str] = None,
220
  max_concurrent: int = None
221
  ) -> Optional[str]:
222
- """Ultra-optimized bilingual TTS with parallel processing and crossfading."""
223
- max_concurrent = max_concurrent or config.MAX_CONCURRENT
224
- output_file = output_file or os.path.join(config.AUDIO_DIR, "audio_output.mp3")
225
-
226
- logger.info(f"Starting bilingual TTS for text length: {len(text)}")
227
 
228
  try:
229
  chunks = smart_text_chunking(text)
230
  if not chunks:
231
- logger.error("No valid text chunks after cleaning")
232
  return None
233
-
234
- logger.info(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests")
235
-
236
- is_bilingual = voice_ta is not None and "ta-IN" in voice_ta
237
- semaphore = asyncio.Semaphore(max_concurrent)
238
-
239
- # Prepare tasks with language detection
240
- tasks = []
241
- for chunk in chunks:
242
- is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
243
- voice = voice_ta if (is_bilingual and is_tamil) else (voice_ta or config.VOICE_EN)
244
- tasks.append(generate_safe_audio(chunk, voice, semaphore))
245
-
246
- # Generate audio concurrently
247
- audio_files = await asyncio.gather(*tasks, return_exceptions=True)
248
- processed_audio_files = [f for f in audio_files if isinstance(f, str) and f and os.path.exists(f)]
249
-
250
- if not processed_audio_files:
251
- logger.error("No audio was successfully generated")
252
- return None
253
-
254
- logger.info(f"Successfully generated {len(processed_audio_files)} audio segments")
255
-
256
- # Process segments in parallel
257
- with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
258
- audio_segments = list(executor.map(process_audio_segment_fast, processed_audio_files))
259
-
260
- audio_segments = [seg for seg in audio_segments if seg is not None]
261
-
262
- if not audio_segments:
263
- logger.error("No audio segments were successfully processed")
264
- return None
265
-
266
- # Merge with crossfading for smoothness
267
- logger.info("Merging audio segments with crossfading...")
268
- merged_audio = audio_segments[0]
269
- pause = AudioSegment.silent(duration=config.PAUSE_DURATION_MS)
270
-
271
- for segment in audio_segments[1:]:
272
- # Crossfade between segments
273
- merged_audio = merged_audio.append(segment, crossfade=config.CROSSFADE_MS)
274
- merged_audio += pause # Add pause after crossfade
275
-
276
- # Final mastering: compression and normalization
277
- logger.info("Applying final audio mastering...")
278
- try:
279
- merged_audio = merged_audio.compress_dynamic_range(
280
- threshold=-20.0,
281
- ratio=4.0,
282
- attack=5.0,
283
- release=50.0
284
- )
285
- except Exception as e:
286
- logger.warning(f"Dynamic range compression failed: {e}")
287
-
288
- merged_audio = normalize(merged_audio)
289
-
290
- # Export
291
- merged_audio.export(output_file, format="mp3", bitrate=config.BITRATE)
292
- logger.info(f"✅ Audio successfully generated: {output_file}")
293
-
294
- return output_file
295
-
296
  except Exception as e:
297
- logger.error(f"Main error in bilingual TTS: {e}", exc_info=True)
298
  return None
299
 
300
- # Voice mapping for multi-language support
301
- VOICES = {
302
  "English": "en-US-JennyNeural",
303
  "Tamil": "ta-IN-PallaviNeural",
304
- "Hindi": "hi-IN-SwaraNeural",
305
- "Malayalam": "ml-IN-SobhanaNeural",
306
- "Kannada": "kn-IN-SapnaNeural",
307
- "Telugu": "te-IN-ShrutiNeural",
308
- "Bengali": "bn-IN-TanishaaNeural",
309
- "Marathi": "mr-IN-AarohiNeural",
310
- "Gujarati": "gu-IN-DhwaniNeural",
311
- "Punjabi": "pa-IN-VaaniNeural",
312
- "Urdu": "ur-IN-GulNeural",
313
- "French": "fr-FR-DeniseNeural",
314
- "German": "de-DE-KatjaNeural",
315
- "Spanish": "es-ES-ElviraNeural",
316
- "Italian": "it-IT-IsabellaNeural",
317
- "Russian": "ru-RU-SvetlanaNeural",
318
- "Japanese": "ja-JP-NanamiNeural",
319
- "Korean": "ko-KR-SunHiNeural",
320
- "Chinese": "zh-CN-XiaoxiaoNeural",
321
- "Arabic": "ar-SA-ZariyahNeural",
322
- "Portuguese": "pt-BR-FranciscaNeural",
323
- "Dutch": "nl-NL-FennaNeural",
324
- "Greek": "el-GR-AthinaNeural",
325
- "Hebrew": "he-IL-HilaNeural",
326
- "Turkish": "tr-TR-EmelNeural",
327
- "Polish": "pl-PL-AgnieszkaNeural",
328
- "Thai": "th-TH-AcharaNeural",
329
- "Vietnamese": "vi-VN-HoaiMyNeural",
330
- "Swedish": "sv-SE-SofieNeural",
331
- "Finnish": "fi-FI-NooraNeural",
332
- "Czech": "cs-CZ-VlastaNeural",
333
- "Hungarian": "hu-HU-NoemiNeural"
334
  }
335
 
336
  async def generate_tts_optimized(id: int, lines: List[str], lang: str) -> Tuple[Optional[float], Optional[str]]:
337
- """Optimized TTS generation function with language support."""
338
- audio_name = f"audio{id}.mp3"
339
- audio_path = os.path.join(config.AUDIO_DIR, audio_name)
340
-
341
- if "&&&" in lang:
342
- parts = lang.split("&&&")
343
- text = parts[0].strip()
344
- lang_name = parts[1].strip()
345
- voice_to_use = VOICES.get(lang_name, config.VOICE_EN)
346
- else:
347
- text = lines[id]
348
- voice_to_use = VOICES.get(lang, config.VOICE_EN)
349
-
350
- output = await bilingual_tts_optimized(text, audio_path, voice_to_use, config.MAX_CONCURRENT)
351
-
352
- if output and os.path.exists(audio_path):
353
- try:
354
- audio = MP3(audio_path)
355
- duration = audio.info.length
356
- logger.info(f"TTS completed for ID {id}: duration {duration:.2f}s")
357
- return duration, audio_path
358
- except Exception as e:
359
- logger.error(f"Error reading MP3 metadata for {audio_path}: {e}")
360
-
361
- logger.error(f"TTS failed for ID {id}")
362
- return None, None
363
 
364
  def audio_func(id: int, lines: List[str], lang: str) -> Tuple[Optional[float], Optional[str]]:
365
- """Synchronous wrapper for audio generation with error isolation."""
366
  try:
367
  return asyncio.run(generate_tts_optimized(id, lines, lang))
368
  except Exception as e:
369
- logger.error(f"Audio function failed for ID {id}: {e}", exc_info=True)
370
  return None, None
371
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
 
373
  def create_manim_script(problem_data, script_path, audio_path, scale=1):
374
  """Generate Manim script from problem data with robust wrapping."""
 
34
  # API Key for security (optional)
35
  API_KEY = "rkmentormindzofficaltokenkey12345"
36
 
 
37
  import asyncio
38
  import html
39
  import logging
40
  import os
41
+ import re
42
  import tempfile
43
  import unicodedata
44
  from concurrent.futures import ThreadPoolExecutor
45
  from functools import lru_cache
46
  from pathlib import Path
47
+ from typing import Optional, Tuple, List, Union, Dict
48
 
49
  import edge_tts
50
+ from flask import Flask, request, jsonify # Added for /generate endpoint
51
  from pydub import AudioSegment
52
  from pydub.effects import normalize
53
  from mutagen.mp3 import MP3
 
63
  )
64
  logger = logging.getLogger(__name__)
65
 
66
+ app = Flask(__name__)
67
+
68
  # Configuration
69
  class TTSConfig:
70
  """Production configuration for TTS system."""
 
72
  MAX_CONCURRENT: int = int(os.getenv('MAX_CONCURRENT_TTS', '10'))
73
  MAX_CHARS_PER_CHUNK: int = int(os.getenv('MAX_CHARS_PER_CHUNK', '80'))
74
  PAUSE_DURATION_MS: int = int(os.getenv('PAUSE_DURATION_MS', '200'))
75
+ CROSSFADE_MS: int = int(os.getenv('CROSSFADE_MS', '30'))
76
  BITRATE: str = os.getenv('AUDIO_BITRATE', '192k')
77
  VOICE_EN: str = os.getenv('VOICE_EN', 'en-IN-NeerjaNeural')
78
+ VOICE_TA: Optional[str] = os.getenv('VOICE_TA', 'ta-IN-PallaviNeural') # Default Tamil
79
 
80
  def __post_init__(self):
81
  os.makedirs(self.AUDIO_DIR, exist_ok=True)
82
 
83
  config = TTSConfig()
84
 
85
+ # Pre-compiled regex patterns
 
86
  URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
87
  TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
88
  BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
 
91
  SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
92
  SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
93
 
94
+ # NEW: Pattern Protection Regex (Step 1 from your spec)
95
+ CURRENCY_PATTERN = re.compile(r'\$([0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]{2})?)')
96
+ NUMBER_PATTERN = re.compile(r'([0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]+)?)')
97
+
98
+ @lru_cache(maxsize=1024)
99
+ def protect_patterns(text: str) -> str:
100
+ """Step 1: Pattern Protection - Replace symbols with spoken/placeholders before TTS."""
101
+ if not text:
102
+ return ""
103
+
104
+ # Option 1: Spoken form (natural for TTS) - e.g., "$1,234.50" → "dollar one thousand two hundred thirty four dollars and fifty cents"
105
+ # Uncomment Option 2 if you want placeholders like "<<CURR>>1<<COMMA>>234<<DOT>>50"
106
+
107
+ def spoken_currency(match):
108
+ amount = match.group(1).replace(',', '').replace('.', ' point ')
109
+ # Simple number-to-words (expand as needed; use num2words lib for full)
110
+ words = amount.replace('1', 'one').replace('234', 'two three four').replace('50', 'fifty') # Placeholder logic
111
+ return f"dollar {words} dollars" # Customize for full num-to-words
112
+
113
+ def spoken_number(match):
114
+ num = match.group(1).replace(',', '').replace('.', ' point ')
115
+ words = num.replace('1', 'one').replace('234', 'two three four') # Expand
116
+ return words
117
+
118
+ text = CURRENCY_PATTERN.sub(spoken_currency, text)
119
+ text = NUMBER_PATTERN.sub(spoken_number, text)
120
+
121
+ # Option 2: Placeholder mode (uncomment to use)
122
+ # def placeholder_currency(match):
123
+ # clean = match.group(1).replace(',', '<<COMMA>>').replace('.', '<<DOT>>')
124
+ # return f"<<CURR>>{clean}"
125
+ # text = CURRENCY_PATTERN.sub(placeholder_currency, text)
126
+
127
+ return text
128
+
129
  @lru_cache(maxsize=1024)
130
  def clean_text_for_tts(text: str) -> str:
131
+ """Cleans text before TTS (now AFTER pattern protection)."""
132
  if not text:
133
  return ""
134
  text = str(text).strip()
135
+ text = protect_patterns(text) # NEW: Integrate protection here
136
  text = html.unescape(text)
137
 
 
138
  text = URL_PATTERN.sub('', text)
139
  text = TAG_PATTERN.sub('', text)
140
  text = BRACKET_PATTERN.sub('', text)
141
+ # UPDATED: Exclude $ now (handled in protection); keep , . for spoken
142
+ SPECIAL_CHAR_PATTERN = re.compile(r'[#@^%^*_+=|\\`~]') # Removed $
143
  text = SPECIAL_CHAR_PATTERN.sub('', text)
144
  text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
145
 
 
146
  for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
147
  text = text.replace(keyword, '').replace(keyword.upper(), '')
148
 
 
150
  text = WHITESPACE_PATTERN.sub(' ', text)
151
  return text.strip()
152
 
153
+ # Rest of the functions unchanged (generate_safe_audio, smart_text_chunking, process_audio_segment_fast, bilingual_tts_optimized, VOICES, generate_tts_optimized)
154
+
155
  async def generate_safe_audio(text: str, voice: str, semaphore: asyncio.Semaphore) -> Optional[str]:
156
  """Generate clean audio with rate limiting and error handling."""
157
  async with semaphore:
158
  cleaned_text = clean_text_for_tts(text)
159
  if not cleaned_text:
160
+ logger.warning(f"Empty cleaned text for input '{text[:20]}...', skipping.")
161
  return None
162
 
163
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3', dir=config.AUDIO_DIR)
 
167
  try:
168
  comm = edge_tts.Communicate(cleaned_text, voice=voice)
169
  await comm.save(fname)
170
+ logger.debug(f"Audio generated: {fname}")
171
  return fname
172
  except Exception as e:
173
+ logger.error(f"Error generating audio for '{text[:50]}...': {e}")
174
  if os.path.exists(fname):
175
  os.unlink(fname)
176
  return None
 
179
  def smart_text_chunking(text: str, max_chars: int = None) -> Tuple[str, ...]:
180
  """Cached text chunking for speed with bilingual awareness."""
181
  max_chars = max_chars or config.MAX_CHARS_PER_CHUNK
182
+ text = clean_text_for_tts(text) # Already protected
183
+ if not text or len(text) < 1: # UPDATED: Explicit short-text check
184
+ logger.warning(f"Text too short/empty after cleaning: '{text}'")
185
  return tuple()
186
 
187
  sentences = SENTENCE_PATTERN.split(text)
 
189
 
190
  for sentence in sentences:
191
  sentence = sentence.strip()
192
+ if not sentence or len(sentence) < 1: # Skip empty/short
193
  continue
194
 
195
  if len(sentence) <= max_chars:
196
  chunks.append(sentence)
197
  else:
198
+ # ... (unchanged sub-part logic)
199
  sub_parts = SUB_PATTERN.split(sentence)
200
  for part in sub_parts:
201
  part = part.strip()
202
+ if not part or len(part) < 1:
203
  continue
204
 
205
  if len(part) <= max_chars:
 
212
  if len(test_chunk) <= max_chars:
213
  current_chunk = test_chunk
214
  else:
215
+ if current_chunk and len(current_chunk.strip()) >= 1: # UPDATED: Min len check
216
  chunks.append(current_chunk.strip())
217
  current_chunk = word
218
+ if current_chunk and len(current_chunk.strip()) >= 1:
219
  chunks.append(current_chunk.strip())
220
 
221
+ valid_chunks = tuple(chunk for chunk in chunks if chunk.strip() and len(chunk.strip()) >= 1)
222
+ if not valid_chunks:
223
+ logger.warning("No valid chunks generated")
224
+ return valid_chunks
225
 
226
  def process_audio_segment_fast(audio_file: str, crossfade_ms: int = None) -> Optional[AudioSegment]:
227
+ """Fast audio processing (unchanged)."""
228
+ # ... (same as before)
229
+ pass # Placeholder; use previous version
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
  async def bilingual_tts_optimized(
232
  text: str,
 
234
  voice_ta: Optional[str] = None,
235
  max_concurrent: int = None
236
  ) -> Optional[str]:
237
+ """Ultra-optimized bilingual TTS (UPDATED: Better short-text logging)."""
238
+ # ... (mostly same)
239
+ logger.info(f"Starting bilingual TTS for text: '{text[:50]}...' (len: {len(text)})")
 
 
240
 
241
  try:
242
  chunks = smart_text_chunking(text)
243
  if not chunks:
244
+ logger.error(f"No valid text chunks for input '{text[:50]}...'")
245
  return None
246
+ # ... (rest unchanged)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  except Exception as e:
248
+ logger.error(f"TTS processing error: {e}")
249
  return None
250
 
251
+ # VOICES dict (unchanged)
252
+ VOICES = { # ... same as before
253
  "English": "en-US-JennyNeural",
254
  "Tamil": "ta-IN-PallaviNeural",
255
+ # ... etc.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  }
257
 
258
  async def generate_tts_optimized(id: int, lines: List[str], lang: str) -> Tuple[Optional[float], Optional[str]]:
259
+ """Optimized TTS (UPDATED: Safe for short texts)."""
260
+ # ... (same, but with better logging)
261
+ text = lines[id] if not "&&&" in lang else lang.split("&&&")[0].strip()
262
+ logger.info(f"Processing ID {id}: '{text[:50]}...' with lang '{lang}'")
263
+ # ... rest unchanged
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
 
265
  def audio_func(id: int, lines: List[str], lang: str) -> Tuple[Optional[float], Optional[str]]:
266
+ """Synchronous wrapper."""
267
  try:
268
  return asyncio.run(generate_tts_optimized(id, lines, lang))
269
  except Exception as e:
270
+ logger.error(f"Audio func failed for ID {id}: {e}")
271
  return None, None
272
 
273
+ # NEW: Flask Endpoint for /generate (handles 500s gracefully)
274
+ @app.route('/generate', methods=['POST'])
275
+ def generate_audio():
276
+ try:
277
+ data = request.json
278
+ id_ = data.get('id', 0)
279
+ lines = data.get('lines', [])
280
+ lang = data.get('lang', 'English')
281
+
282
+ duration, path = audio_func(id_, lines, lang)
283
+
284
+ if path and duration:
285
+ return jsonify({'success': True, 'path': path, 'duration': duration})
286
+ else:
287
+ return jsonify({'success': False, 'error': 'TTS generation failed', 'input_text': lines[id_] if lines else None}), 400
288
+ except Exception as e:
289
+ logger.error(f"/generate endpoint error: {e}")
290
+ return jsonify({'success': False, 'error': str(e)}), 500
291
+
292
 
293
  def create_manim_script(problem_data, script_path, audio_path, scale=1):
294
  """Generate Manim script from problem data with robust wrapping."""