Spaces:

sreepathi-ravikumar
/

backendprocessmath

Sleeping

App Files Files Community

sreepathi-ravikumar commited on 26 days ago

Commit

f362ff5

verified ·

1 Parent(s): 29df12a

Update app.py

Browse files

Files changed (1) hide show

app.py +400 -202

app.py CHANGED Viewed

@@ -34,261 +34,459 @@ os.makedirs(AUDIO_DIR, exist_ok=True)
 # API Key for security (optional)
 API_KEY = "rkmentormindzofficaltokenkey12345"
-import asyncio
-import html
-import logging
 import os
 import re
-import tempfile
 import unicodedata
 from concurrent.futures import ThreadPoolExecutor
 from functools import lru_cache
-from pathlib import Path
-from typing import Optional, Tuple, List, Union, Dict
 import edge_tts
-from flask import Flask, request, jsonify  # Added for /generate endpoint
 from pydub import AudioSegment
 from pydub.effects import normalize
 from mutagen.mp3 import MP3
-# Configure logging for production
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.FileHandler('tts_production.log'),
-        logging.StreamHandler()
-    ]
-)
-logger = logging.getLogger(__name__)
-app = Flask(__name__)
-# Configuration
-class TTSConfig:
-    """Production configuration for TTS system."""
-    AUDIO_DIR: str = os.getenv('AUDIO_OUTPUT_DIR', './audio_output')
-    MAX_CONCURRENT: int = int(os.getenv('MAX_CONCURRENT_TTS', '10'))
-    MAX_CHARS_PER_CHUNK: int = int(os.getenv('MAX_CHARS_PER_CHUNK', '80'))
-    PAUSE_DURATION_MS: int = int(os.getenv('PAUSE_DURATION_MS', '200'))
-    CROSSFADE_MS: int = int(os.getenv('CROSSFADE_MS', '30'))
-    BITRATE: str = os.getenv('AUDIO_BITRATE', '192k')
-    VOICE_EN: str = os.getenv('VOICE_EN', 'en-IN-NeerjaNeural')
-    VOICE_TA: Optional[str] = os.getenv('VOICE_TA', 'ta-IN-PallaviNeural')  # Default Tamil
-    def __post_init__(self):
-        os.makedirs(self.AUDIO_DIR, exist_ok=True)
-config = TTSConfig()
 # Pre-compiled regex patterns
 URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
-TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
 BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
 SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
 WHITESPACE_PATTERN = re.compile(r'\s+')
-SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
-SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
-# NEW: Pattern Protection Regex (Step 1 from your spec)
-CURRENCY_PATTERN = re.compile(r'\$([0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]{2})?)')
-NUMBER_PATTERN = re.compile(r'([0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]+)?)')
-@lru_cache(maxsize=1024)
-def protect_patterns(text: str) -> str:
-    """Step 1: Pattern Protection - Replace symbols with spoken/placeholders before TTS."""
-    if not text:
-        return ""
-    # Option 1: Spoken form (natural for TTS) - e.g., "$1,234.50" → "dollar one thousand two hundred thirty four dollars and fifty cents"
-    # Uncomment Option 2 if you want placeholders like "<<CURR>>1<<COMMA>>234<<DOT>>50"
-    def spoken_currency(match):
-        amount = match.group(1).replace(',', '').replace('.', ' point ')
-        # Simple number-to-words (expand as needed; use num2words lib for full)
-        words = amount.replace('1', 'one').replace('234', 'two three four').replace('50', 'fifty')  # Placeholder logic
-        return f"dollar {words} dollars"  # Customize for full num-to-words
-    def spoken_number(match):
-        num = match.group(1).replace(',', '').replace('.', ' point ')
-        words = num.replace('1', 'one').replace('234', 'two three four')  # Expand
-        return words
-    text = CURRENCY_PATTERN.sub(spoken_currency, text)
-    text = NUMBER_PATTERN.sub(spoken_number, text)
-    # Option 2: Placeholder mode (uncomment to use)
-    # def placeholder_currency(match):
-    #     clean = match.group(1).replace(',', '<<COMMA>>').replace('.', '<<DOT>>')
-    #     return f"<<CURR>>{clean}"
-    # text = CURRENCY_PATTERN.sub(placeholder_currency, text)
-    return text
-@lru_cache(maxsize=1024)
 def clean_text_for_tts(text: str) -> str:
-    """Cleans text before TTS (now AFTER pattern protection)."""
     if not text:
         return ""
     text = str(text).strip()
-    text = protect_patterns(text)  # NEW: Integrate protection here
     text = html.unescape(text)
     text = URL_PATTERN.sub('', text)
     text = TAG_PATTERN.sub('', text)
     text = BRACKET_PATTERN.sub('', text)
-    # UPDATED: Exclude $ now (handled in protection); keep , . for spoken
-    SPECIAL_CHAR_PATTERN = re.compile(r'[#@^%^*_+=|\\`~]')  # Removed $
     text = SPECIAL_CHAR_PATTERN.sub('', text)
     text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
-    for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
-        text = text.replace(keyword, '').replace(keyword.upper(), '')
-    text = unicodedata.normalize('NFKD', text)
     text = WHITESPACE_PATTERN.sub(' ', text)
     return text.strip()
-# Rest of the functions unchanged (generate_safe_audio, smart_text_chunking, process_audio_segment_fast, bilingual_tts_optimized, VOICES, generate_tts_optimized)
-async def generate_safe_audio(text: str, voice: str, semaphore: asyncio.Semaphore) -> Optional[str]:
-    """Generate clean audio with rate limiting and error handling."""
-    async with semaphore:
-        cleaned_text = clean_text_for_tts(text)
-        if not cleaned_text:
-            logger.warning(f"Empty cleaned text for input '{text[:20]}...', skipping.")
-            return None
-        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3', dir=config.AUDIO_DIR)
-        fname = temp_file.name
-        temp_file.close()
-        try:
-            comm = edge_tts.Communicate(cleaned_text, voice=voice)
-            await comm.save(fname)
-            logger.debug(f"Audio generated: {fname}")
-            return fname
-        except Exception as e:
-            logger.error(f"Error generating audio for '{text[:50]}...': {e}")
-            if os.path.exists(fname):
-                os.unlink(fname)
-            return None
-@lru_cache(maxsize=256)
-def smart_text_chunking(text: str, max_chars: int = None) -> Tuple[str, ...]:
-    """Cached text chunking for speed with bilingual awareness."""
-    max_chars = max_chars or config.MAX_CHARS_PER_CHUNK
-    text = clean_text_for_tts(text)  # Already protected
-    if not text or len(text) < 1:  # UPDATED: Explicit short-text check
-        logger.warning(f"Text too short/empty after cleaning: '{text}'")
-        return tuple()
-    sentences = SENTENCE_PATTERN.split(text)
     chunks = []
-    for sentence in sentences:
-        sentence = sentence.strip()
-        if not sentence or len(sentence) < 1:  # Skip empty/short
-            continue
-        if len(sentence) <= max_chars:
-            chunks.append(sentence)
         else:
-            # ... (unchanged sub-part logic)
-            sub_parts = SUB_PATTERN.split(sentence)
-            for part in sub_parts:
-                part = part.strip()
-                if not part or len(part) < 1:
-                    continue
-                if len(part) <= max_chars:
-                    chunks.append(part)
-                else:
-                    words = part.split()
-                    current_chunk = ""
-                    for word in words:
-                        test_chunk = f"{current_chunk} {word}" if current_chunk else word
-                        if len(test_chunk) <= max_chars:
-                            current_chunk = test_chunk
-                        else:
-                            if current_chunk and len(current_chunk.strip()) >= 1:  # UPDATED: Min len check
-                                chunks.append(current_chunk.strip())
-                            current_chunk = word
-                    if current_chunk and len(current_chunk.strip()) >= 1:
-                        chunks.append(current_chunk.strip())
-    valid_chunks = tuple(chunk for chunk in chunks if chunk.strip() and len(chunk.strip()) >= 1)
-    if not valid_chunks:
-        logger.warning("No valid chunks generated")
-    return valid_chunks
-def process_audio_segment_fast(audio_file: str, crossfade_ms: int = None) -> Optional[AudioSegment]:
-    """Fast audio processing (unchanged)."""
-    # ... (same as before)
-    pass  # Placeholder; use previous version
-async def bilingual_tts_optimized(
-    text: str,
-    output_file: str = None,
-    voice_ta: Optional[str] = None,
-    max_concurrent: int = None
-) -> Optional[str]:
-    """Ultra-optimized bilingual TTS (UPDATED: Better short-text logging)."""
-    # ... (mostly same)
-    logger.info(f"Starting bilingual TTS for text: '{text[:50]}...' (len: {len(text)})")
-    try:
-        chunks = smart_text_chunking(text)
-        if not chunks:
-            logger.error(f"No valid text chunks for input '{text[:50]}...'")
-            return None
-        # ... (rest unchanged)
-    except Exception as e:
-        logger.error(f"TTS processing error: {e}")
-        return None
-# VOICES dict (unchanged)
-VOICES = {  # ... same as before
-    "English": "en-US-JennyNeural",
-    "Tamil": "ta-IN-PallaviNeural",
-    # ... etc.
-}
-async def generate_tts_optimized(id: int, lines: List[str], lang: str) -> Tuple[Optional[float], Optional[str]]:
-    """Optimized TTS (UPDATED: Safe for short texts)."""
-    # ... (same, but with better logging)
-    text = lines[id] if not "&&&" in lang else lang.split("&&&")[0].strip()
-    logger.info(f"Processing ID {id}: '{text[:50]}...' with lang '{lang}'")
-    # ... rest unchanged
-def audio_func(id: int, lines: List[str], lang: str) -> Tuple[Optional[float], Optional[str]]:
-    """Synchronous wrapper."""
     try:
-        return asyncio.run(generate_tts_optimized(id, lines, lang))
     except Exception as e:
-        logger.error(f"Audio func failed for ID {id}: {e}")
-        return None, None
-# NEW: Flask Endpoint for /generate (handles 500s gracefully)
-@app.route('/generate', methods=['POST'])
-def generate_audio():
     try:
-        data = request.json
-        id_ = data.get('id', 0)
-        lines = data.get('lines', [])
-        lang = data.get('lang', 'English')
-        duration, path = audio_func(id_, lines, lang)
-        if path and duration:
-            return jsonify({'success': True, 'path': path, 'duration': duration})
         else:
-            return jsonify({'success': False, 'error': 'TTS generation failed', 'input_text': lines[id_] if lines else None}), 400
-    except Exception as e:
-        logger.error(f"/generate endpoint error: {e}")
-        return jsonify({'success': False, 'error': str(e)}), 500
 def create_manim_script(problem_data, script_path, audio_path, scale=1):
     """Generate Manim script from problem data with robust wrapping."""
@@ -381,7 +579,7 @@ class GeneratedMathScene(Scene):
             if slide_type == "title":
                 title_text = content
                 if title_text:
-                    lines_group = make_wrapped_paragraph(title_text, highlight_color, default_font, title_size, line_spacing=0.2)
                     obj = lines_group if len(lines_group) > 0 else Text(title_text, color=highlight_color, font=default_font, font_size=title_size)
                 else:
                     obj = Text("", color=highlight_color, font=default_font, font_size=title_size)

 # API Key for security (optional)
 API_KEY = "rkmentormindzofficaltokenkey12345"
 import os
 import re
+import html
 import unicodedata
+import asyncio
+import tempfile
+import traceback
+import random
+import hashlib
+import json
 from concurrent.futures import ThreadPoolExecutor
 from functools import lru_cache
+from typing import List, Tuple, Optional, Dict
 import edge_tts
 from pydub import AudioSegment
 from pydub.effects import normalize
 from mutagen.mp3 import MP3
+# Voice configuration
+VOICE_EN = "en-IN-NeerjaNeural"
+AUDIO_DIR = os.path.join(os.getcwd(), "audio")
+os.makedirs(AUDIO_DIR, exist_ok=True)
 # Pre-compiled regex patterns
 URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
+TAG_PATTERN = re.compile(r'<[^>]*>')
 BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
 SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
 WHITESPACE_PATTERN = re.compile(r'\s+')
+# Conservative sentence splitting that doesn't break on abbreviations
+SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+(?=[A-Z])')
+# Avoid splitting on commas inside numbers
+SUB_PATTERN = re.compile(r'(?<!\d),(?!\d)\s*')
+# Cache for chunking results
+_chunking_cache: Dict[str, Tuple[str, ...]] = {}
 def clean_text_for_tts(text: str) -> str:
+    """Cleans text while preserving Tamil/Indic characters and code-switched punctuation."""
     if not text:
         return ""
     text = str(text).strip()
     text = html.unescape(text)
+    # Remove URLs
     text = URL_PATTERN.sub('', text)
+    # Remove HTML/XML tags but preserve content
     text = TAG_PATTERN.sub('', text)
+    # Remove brackets
     text = BRACKET_PATTERN.sub('', text)
+    # Remove special characters but preserve punctuation needed for TTS
     text = SPECIAL_CHAR_PATTERN.sub('', text)
+    # Replace newlines/tabs with spaces
     text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
+    # Use NFC normalization to preserve Tamil/Indic characters
+    text = unicodedata.normalize('NFC', text)
+    # Collapse multiple whitespace
     text = WHITESPACE_PATTERN.sub(' ', text)
     return text.strip()
+def split_by_word_boundary(text: str) -> List[str]:
+    """
+    Intelligently splits text by language boundaries while preserving code-switched words.
+    Example: "Voltage னு" → ["Voltage", " னு"]
+    """
+    if not text:
+        return []
+    segments = []
+    current_segment = ""
+    current_lang = None  # 'en', 'ta', or None
+    i = 0
+    while i < len(text):
+        char = text[i]
+        # Detect language of current character
+        if '\u0B80' <= char <= '\u0BFF':  # Tamil range
+            char_lang = 'ta'
+        elif char.isalpha() or char in '-':
+            char_lang = 'en'
+        else:
+            char_lang = current_lang  # Punctuation/space keeps current language
+        # Start new segment on language boundary
+        if current_lang and char_lang and current_lang != char_lang:
+            # Don't split on hyphens in code-switched words like "simple-ஆ"
+            if char == '-' and i > 0 and i < len(text) - 1:
+                # Check if it's a code-switched hyphen (English-Tamil)
+                prev_char = text[i-1]
+                next_char = text[i+1]
+                if prev_char.isalpha() and ('\u0B80' <= next_char <= '\u0BFF'):
+                    # Keep hyphen with current segment
+                    current_segment += char
+                    i += 1
+                    continue
+            if current_segment.strip():
+                segments.append(current_segment)
+            current_segment = char
+            current_lang = char_lang
+        else:
+            current_segment += char
+            current_lang = char_lang or current_lang
+        i += 1
+    if current_segment.strip():
+        segments.append(current_segment)
+    return segments
+def chunk_text_with_overlap(text: str, max_chars: int = 250) -> List[Tuple[str, int]]:
+    """
+    Creates chunks with overlap for smooth transitions.
+    Returns list of (chunk_text, chunk_index)
+    """
+    # Clean first
+    cleaned = clean_text_for_tts(text)
+    if not cleaned:
+        return []
+    # Split into segments by language boundary
+    segments = split_by_word_boundary(cleaned)
+    # Group segments into chunks
     chunks = []
+    current_chunk = ""
+    current_words = []
+    for segment in segments:
+        test_chunk = current_chunk + segment if current_chunk else segment
+        test_words = test_chunk.split()
+        if len(test_chunk) <= max_chars and len(test_words) <= 20:
+            current_chunk = test_chunk
+            current_words = test_words
         else:
+            # Need to start new chunk
+            if current_chunk:
+                chunks.append(current_chunk)
+            # Handle long segments
+            if len(segment) > max_chars:
+                # Split long segment by words
+                words = segment.split()
+                temp_chunk = ""
+                temp_words = []
+                for word in words:
+                    test = temp_chunk + " " + word if temp_chunk else word
+                    if len(test) <= max_chars:
+                        temp_chunk = test
+                        temp_words.append(word)
+                    else:
+                        if temp_chunk:
+                            chunks.append(temp_chunk)
+                        temp_chunk = word
+                        temp_words = [word]
+                if temp_chunk:
+                    current_chunk = temp_chunk
+                    current_words = temp_words
+            else:
+                current_chunk = segment
+                current_words = segment.split()
+    # Add final chunk
+    if current_chunk:
+        chunks.append(current_chunk)
+    # Add overlap between chunks (last 3 words of chunk N become first 3 words of chunk N+1)
+    overlapped_chunks = []
+    for i, chunk in enumerate(chunks):
+        if i > 0:
+            # Get last 3 words from previous chunk
+            prev_chunk = chunks[i-1]
+            prev_words = prev_chunk.split()
+            overlap_words = prev_words[-3:] if len(prev_words) >= 3 else prev_words
+            if overlap_words:
+                overlap_text = " ".join(overlap_words)
+                # Add overlap if it won't make the chunk too long
+                test_chunk = overlap_text + " " + chunk
+                if len(test_chunk) <= max_chars:
+                    chunk = test_chunk
+        overlapped_chunks.append((chunk, i))
+    return overlapped_chunks
+async def generate_safe_audio(text: str, voice: str, semaphore: asyncio.Semaphore,
+                             chunk_index: int) -> Tuple[Optional[str], int]:
+    """Generate audio with rate limiting, caching, and retry logic."""
+    if not text or len(text) < 2:
+        return None, chunk_index
+    # Create deterministic cache key
+    cache_key = f"{text}_{voice}"
+    text_hash = hashlib.md5(cache_key.encode('utf-8')).hexdigest()
+    cache_filename = os.path.join(AUDIO_DIR, f"cache_{text_hash}.mp3")
+    # Check disk cache
+    if os.path.exists(cache_filename) and os.path.getsize(cache_filename) > 1024:
+        return cache_filename, chunk_index
+    async with semaphore:
+        max_retries = 3
+        base_delay = 2.0
+        for attempt in range(max_retries):
+            try:
+                # Create temp file
+                with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp:
+                    temp_filename = tmp.name
+                comm = edge_tts.Communicate(text, voice=voice)
+                await comm.save(temp_filename)
+                # Verify successful generation
+                if os.path.exists(temp_filename) and os.path.getsize(temp_filename) > 1024:
+                    # Move to cache location
+                    os.replace(temp_filename, cache_filename)
+                    return cache_filename, chunk_index
+            except Exception as e:
+                # Clean up temp file on error
+                try:
+                    if os.path.exists(temp_filename):
+                        os.unlink(temp_filename)
+                except:
+                    pass
+                if attempt == max_retries - 1:
+                    print(f"Failed to generate audio chunk {chunk_index} after {max_retries} attempts: {e}")
+                    return None, chunk_index
+                # Exponential backoff with jitter
+                sleep_time = (base_delay * (2 ** attempt)) + random.uniform(0.1, 1.0)
+                await asyncio.sleep(sleep_time)
+        return None, chunk_index
+def process_audio_segment_fast(audio_data: Tuple[str, int]) -> Tuple[Optional[AudioSegment], int]:
+    """Process audio segment with proper cleanup."""
+    audio_file, chunk_index = audio_data
     try:
+        if not audio_file or not os.path.exists(audio_file):
+            return None, chunk_index
+        segment = AudioSegment.from_file(audio_file)
+        # Add micro-padding to prevent clipping
+        if len(segment) > 0:
+            segment = AudioSegment.silent(duration=50) + segment + AudioSegment.silent(duration=50)
+        segment = normalize(segment)
+        return segment, chunk_index
     except Exception as e:
+        print(f"Warning: Error processing audio segment {chunk_index}: {e}")
+        return None, chunk_index
+async def bilingual_tts_optimized(text: str, output_file: str = "audio0.mp3",
+                                  VOICE_TA: Optional[str] = None, max_concurrent: int = 5) -> Optional[str]:
+    """Optimized bilingual TTS with proper ordering and smooth transitions."""
+    print("Starting bilingual TTS processing...")
     try:
+        # Split text into chunks with overlap
+        chunks_with_indices = chunk_text_with_overlap(text, max_chars=250)
+        if not chunks_with_indices:
+            print("Error: No valid text chunks after processing")
+            return None
+        print(f"Processing {len(chunks_with_indices)} text chunks...")
+        # Determine which chunks need Tamil voice
+        chunks_to_generate = []
+        for chunk_text, chunk_index in chunks_with_indices:
+            has_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk_text)
+            if VOICE_TA and has_tamil:
+                voice = VOICE_TA
+            else:
+                voice = VOICE_TA or VOICE_EN
+            chunks_to_generate.append((chunk_text, voice, chunk_index))
+        # Semaphore for rate limiting
+        semaphore = asyncio.Semaphore(max_concurrent)
+        # Prepare tasks
+        tasks = []
+        for chunk_text, voice, chunk_index in chunks_to_generate:
+            tasks.append(generate_safe_audio(chunk_text, voice, semaphore, chunk_index))
+        # Generate all audio files
+        results = await asyncio.gather(*tasks, return_exceptions=False)
+        # Filter successful results and maintain order
+        audio_data = []
+        for result in results:
+            if isinstance(result, tuple) and result[0] and os.path.exists(result[0]):
+                audio_data.append(result)
+        if not audio_data:
+            print("Error: No audio was successfully generated")
+            return None
+        # Sort by chunk index
+        audio_data.sort(key=lambda x: x[1])
+        print(f"Successfully generated {len(audio_data)} audio segments")
+        # Process audio segments in parallel
+        with ThreadPoolExecutor(max_workers=min(len(audio_data), 8)) as executor:
+            processed = list(executor.map(process_audio_segment_fast, audio_data))
+        # Filter and sort
+        processed = [(seg, idx) for seg, idx in processed if seg is not None]
+        processed.sort(key=lambda x: x[1])
+        audio_segments = [seg for seg, idx in processed]
+        if not audio_segments:
+            print("Error: No audio segments were successfully processed")
+            return None
+        print(f"Merging {len(audio_segments)} audio segments with crossfade...")
+        # Merge with crossfade for smooth transitions
+        merged_audio = audio_segments[0]
+        for segment in audio_segments[1:]:
+            # Crossfade 30ms for smooth transition
+            merged_audio = merged_audio.append(segment, crossfade=30)
+        # Apply compression for consistent volume
+        try:
+            merged_audio = merged_audio.compress_dynamic_range(
+                threshold=-20.0,
+                ratio=2.5,  # Gentler compression for more natural sound
+                attack=5.0,
+                release=50.0
+            )
+        except:
+            pass  # Skip if compression fails
+        merged_audio = normalize(merged_audio)
+        # Export
+        merged_audio.export(output_file, format="mp3", bitrate="192k")
+        if os.path.exists(output_file) and os.path.getsize(output_file) > 1024:
+            print(f"✅ Audio successfully generated: {output_file}")
+            return output_file
         else:
+            print(f"Error: Generated file is empty or missing")
+            return None
+    except Exception as main_error:
+        print(f"Main error in bilingual TTS: {main_error}")
+        traceback.print_exc()
+        return None
+async def generate_tts_optimized(id: int, lines, lang: str) -> Tuple[Optional[float], Optional[str]]:
+    """Optimized TTS generation function."""
+    voice_map = {
+        "English": "en-US-JennyNeural",
+        "Tamil": "ta-IN-PallaviNeural",
+        "Hindi": "hi-IN-SwaraNeural",
+        "Malayalam": "ml-IN-SobhanaNeural",
+        "Kannada": "kn-IN-SapnaNeural",
+        "Telugu": "te-IN-ShrutiNeural",
+        "Bengali": "bn-IN-TanishaaNeural",
+        "Marathi": "mr-IN-AarohiNeural",
+        "Gujarati": "gu-IN-DhwaniNeural",
+        "Punjabi": "pa-IN-VaaniNeural",
+        "Urdu": "ur-IN-GulNeural",
+        "French": "fr-FR-DeniseNeural",
+        "German": "de-DE-KatjaNeural",
+        "Spanish": "es-ES-ElviraNeural",
+        "Italian": "it-IT-IsabellaNeural",
+        "Russian": "ru-RU-SvetlanaNeural",
+        "Japanese": "ja-JP-NanamiNeural",
+        "Korean": "ko-KR-SunHiNeural",
+        "Chinese": "zh-CN-XiaoxiaoNeural",
+        "Arabic": "ar-SA-ZariyahNeural",
+        "Portuguese": "pt-BR-FranciscaNeural",
+        "Dutch": "nl-NL-FennaNeural",
+        "Greek": "el-GR-AthinaNeural",
+        "Hebrew": "he-IL-HilaNeural",
+        "Turkish": "tr-TR-EmelNeural",
+        "Polish": "pl-PL-AgnieszkaNeural",
+        "Thai": "th-TH-AcharaNeural",
+        "Vietnamese": "vi-VN-HoaiMyNeural",
+        "Swedish": "sv-SE-SofieNeural",
+        "Finnish": "fi-FI-NooraNeural",
+        "Czech": "cs-CZ-VlastaNeural",
+        "Hungarian": "hu-HU-NoemiNeural"
+    }
+    audio_name = f"audio{id}.mp3"
+    audio_path = os.path.join(AUDIO_DIR, audio_name)
+    if "&&&" in lang:
+        listf = lang.split("&&&")
+        text = listf[0].strip()
+        lang_name = listf[1].strip() if len(listf) > 1 else "English"
+        voice_to_use = voice_map.get(lang_name, VOICE_EN)
+    else:
+        text = lines[id] if isinstance(lines, (list, tuple)) and id < len(lines) else str(lines)
+        voice_to_use = voice_map.get(lang, VOICE_EN)
+    # Use max_concurrent=5 for better rate limit handling
+    output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=5)
+    if output and os.path.exists(audio_path):
+        try:
+            audio = MP3(audio_path)
+            duration = audio.info.length
+            return duration, audio_path
+        except Exception as e:
+            print(f"Error reading audio file: {e}")
+            return None, None
+    return None, None
+def audio_func(id: int, lines, lang: str) -> Tuple[Optional[float], Optional[str]]:
+    """Synchronous wrapper for audio generation."""
+    try:
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        try:
+            return loop.run_until_complete(generate_tts_optimized(id, lines, lang))
+        finally:
+            loop.close()
+    except Exception as e:
+        print(f"Error in audio_func: {e}")
+        traceback.print_exc()
+        return None, None
 def create_manim_script(problem_data, script_path, audio_path, scale=1):
     """Generate Manim script from problem data with robust wrapping."""
             if slide_type == "title":
                 title_text = content
                 if title_text:
+                    lines_group = make_wrapped_paragraph(title_text, highlight_color, default_font, title_size, line_spacing=0.5)
                     obj = lines_group if len(lines_group) > 0 else Text(title_text, color=highlight_color, font=default_font, font_size=title_size)
                 else:
                     obj = Text("", color=highlight_color, font=default_font, font_size=title_size)