Spaces:

sreepathi-ravikumar
/

backendprocessmath

Sleeping

App Files Files Community

sreepathi-ravikumar commited on 26 days ago

Commit

1cbcb32

verified ·

1 Parent(s): 6e0cf4b

Update app.py

Browse files

Files changed (1) hide show

app.py +242 -352

app.py CHANGED Viewed

@@ -35,460 +35,350 @@ os.makedirs(AUDIO_DIR, exist_ok=True)
 API_KEY = "rkmentormindzofficaltokenkey12345"
-import os
-import re
-import html
-import unicodedata
 import asyncio
 import tempfile
-import traceback
-import random
-import hashlib
-import json
 from concurrent.futures import ThreadPoolExecutor
 from functools import lru_cache
-from typing import List, Tuple, Optional, Dict
 import edge_tts
 from pydub import AudioSegment
 from pydub.effects import normalize
 from mutagen.mp3 import MP3
-# Voice configuration
-VOICE_EN = "en-IN-NeerjaNeural"
-AUDIO_DIR = os.path.join(os.getcwd(), "audio")
-os.makedirs(AUDIO_DIR, exist_ok=True)
-# Pre-compiled regex patterns
 URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
-TAG_PATTERN = re.compile(r'<[^>]*>')
 BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
 SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
 WHITESPACE_PATTERN = re.compile(r'\s+')
-# Conservative sentence splitting that doesn't break on abbreviations
-SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+(?=[A-Z])')
-# Avoid splitting on commas inside numbers
-SUB_PATTERN = re.compile(r'(?<!\d),(?!\d)\s*')
-# Cache for chunking results
-_chunking_cache: Dict[str, Tuple[str, ...]] = {}
 def clean_text_for_tts(text: str) -> str:
-    """Cleans text while preserving Tamil/Indic characters and code-switched punctuation."""
     if not text:
         return ""
     text = str(text).strip()
     text = html.unescape(text)
-    # Remove URLs
     text = URL_PATTERN.sub('', text)
-    # Remove HTML/XML tags but preserve content
     text = TAG_PATTERN.sub('', text)
-    # Remove brackets
     text = BRACKET_PATTERN.sub('', text)
-    # Remove special characters but preserve punctuation needed for TTS
     text = SPECIAL_CHAR_PATTERN.sub('', text)
-    # Replace newlines/tabs with spaces
     text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
-    # Use NFC normalization to preserve Tamil/Indic characters
-    text = unicodedata.normalize('NFC', text)
-    # Collapse multiple whitespace
     text = WHITESPACE_PATTERN.sub(' ', text)
     return text.strip()
-def split_by_word_boundary(text: str) -> List[str]:
-    """
-    Intelligently splits text by language boundaries while preserving code-switched words.
-    Example: "Voltage னு" → ["Voltage", " னு"]
-    """
-    if not text:
-        return []
-    segments = []
-    current_segment = ""
-    current_lang = None  # 'en', 'ta', or None
-    i = 0
-    while i < len(text):
-        char = text[i]
-        # Detect language of current character
-        if '\u0B80' <= char <= '\u0BFF':  # Tamil range
-            char_lang = 'ta'
-        elif char.isalpha() or char in '-':
-            char_lang = 'en'
-        else:
-            char_lang = current_lang  # Punctuation/space keeps current language
-        # Start new segment on language boundary
-        if current_lang and char_lang and current_lang != char_lang:
-            # Don't split on hyphens in code-switched words like "simple-ஆ"
-            if char == '-' and i > 0 and i < len(text) - 1:
-                # Check if it's a code-switched hyphen (English-Tamil)
-                prev_char = text[i-1]
-                next_char = text[i+1]
-                if prev_char.isalpha() and ('\u0B80' <= next_char <= '\u0BFF'):
-                    # Keep hyphen with current segment
-                    current_segment += char
-                    i += 1
-                    continue
-            if current_segment.strip():
-                segments.append(current_segment)
-            current_segment = char
-            current_lang = char_lang
-        else:
-            current_segment += char
-            current_lang = char_lang or current_lang
-        i += 1
-    if current_segment.strip():
-        segments.append(current_segment)
-    return segments
-def chunk_text_with_overlap(text: str, max_chars: int = 250) -> List[Tuple[str, int]]:
-    """
-    Creates chunks with overlap for smooth transitions.
-    Returns list of (chunk_text, chunk_index)
-    """
-    # Clean first
-    cleaned = clean_text_for_tts(text)
-    if not cleaned:
-        return []
-    # Split into segments by language boundary
-    segments = split_by_word_boundary(cleaned)
-    # Group segments into chunks
     chunks = []
-    current_chunk = ""
-    current_words = []
-    for segment in segments:
-        test_chunk = current_chunk + segment if current_chunk else segment
-        test_words = test_chunk.split()
-        if len(test_chunk) <= max_chars and len(test_words) <= 20:
-            current_chunk = test_chunk
-            current_words = test_words
         else:
-            # Need to start new chunk
-            if current_chunk:
-                chunks.append(current_chunk)
-            # Handle long segments
-            if len(segment) > max_chars:
-                # Split long segment by words
-                words = segment.split()
-                temp_chunk = ""
-                temp_words = []
-                for word in words:
-                    test = temp_chunk + " " + word if temp_chunk else word
-                    if len(test) <= max_chars:
-                        temp_chunk = test
-                        temp_words.append(word)
-                    else:
-                        if temp_chunk:
-                            chunks.append(temp_chunk)
-                        temp_chunk = word
-                        temp_words = [word]
-                if temp_chunk:
-                    current_chunk = temp_chunk
-                    current_words = temp_words
-            else:
-                current_chunk = segment
-                current_words = segment.split()
-    # Add final chunk
-    if current_chunk:
-        chunks.append(current_chunk)
-    # Add overlap between chunks (last 3 words of chunk N become first 3 words of chunk N+1)
-    overlapped_chunks = []
-    for i, chunk in enumerate(chunks):
-        if i > 0:
-            # Get last 3 words from previous chunk
-            prev_chunk = chunks[i-1]
-            prev_words = prev_chunk.split()
-            overlap_words = prev_words[-3:] if len(prev_words) >= 3 else prev_words
-            if overlap_words:
-                overlap_text = " ".join(overlap_words)
-                # Add overlap if it won't make the chunk too long
-                test_chunk = overlap_text + " " + chunk
-                if len(test_chunk) <= max_chars:
-                    chunk = test_chunk
-        overlapped_chunks.append((chunk, i))
-    return overlapped_chunks
-async def generate_safe_audio(text: str, voice: str, semaphore: asyncio.Semaphore,
-                             chunk_index: int) -> Tuple[Optional[str], int]:
-    """Generate audio with rate limiting, caching, and retry logic."""
-    if not text or len(text) < 2:
-        return None, chunk_index
-    # Create deterministic cache key
-    cache_key = f"{text}_{voice}"
-    text_hash = hashlib.md5(cache_key.encode('utf-8')).hexdigest()
-    cache_filename = os.path.join(AUDIO_DIR, f"cache_{text_hash}.mp3")
-    # Check disk cache
-    if os.path.exists(cache_filename) and os.path.getsize(cache_filename) > 1024:
-        return cache_filename, chunk_index
-    async with semaphore:
-        max_retries = 3
-        base_delay = 2.0
-        for attempt in range(max_retries):
-            try:
-                # Create temp file
-                with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp:
-                    temp_filename = tmp.name
-                comm = edge_tts.Communicate(text, voice=voice)
-                await comm.save(temp_filename)
-                # Verify successful generation
-                if os.path.exists(temp_filename) and os.path.getsize(temp_filename) > 1024:
-                    # Move to cache location
-                    os.replace(temp_filename, cache_filename)
-                    return cache_filename, chunk_index
-            except Exception as e:
-                # Clean up temp file on error
-                try:
-                    if os.path.exists(temp_filename):
-                        os.unlink(temp_filename)
-                except:
-                    pass
-                if attempt == max_retries - 1:
-                    print(f"Failed to generate audio chunk {chunk_index} after {max_retries} attempts: {e}")
-                    return None, chunk_index
-                # Exponential backoff with jitter
-                sleep_time = (base_delay * (2 ** attempt)) + random.uniform(0.1, 1.0)
-                await asyncio.sleep(sleep_time)
-        return None, chunk_index
-def process_audio_segment_fast(audio_data: Tuple[str, int]) -> Tuple[Optional[AudioSegment], int]:
-    """Process audio segment with proper cleanup."""
-    audio_file, chunk_index = audio_data
     try:
-        if not audio_file or not os.path.exists(audio_file):
-            return None, chunk_index
         segment = AudioSegment.from_file(audio_file)
-        # Add micro-padding to prevent clipping
-        if len(segment) > 0:
-            segment = AudioSegment.silent(duration=50) + segment + AudioSegment.silent(duration=50)
-        segment = normalize(segment)
-        return segment, chunk_index
     except Exception as e:
-        print(f"Warning: Error processing audio segment {chunk_index}: {e}")
-        return None, chunk_index
-async def bilingual_tts_optimized(text: str, output_file: str = "audio0.mp3",
-                                  VOICE_TA: Optional[str] = None, max_concurrent: int = 5) -> Optional[str]:
-    """Optimized bilingual TTS with proper ordering and smooth transitions."""
-    print("Starting bilingual TTS processing...")
     try:
-        # Split text into chunks with overlap
-        chunks_with_indices = chunk_text_with_overlap(text, max_chars=250)
-        if not chunks_with_indices:
-            print("Error: No valid text chunks after processing")
             return None
-        print(f"Processing {len(chunks_with_indices)} text chunks...")
-        # Determine which chunks need Tamil voice
-        chunks_to_generate = []
-        for chunk_text, chunk_index in chunks_with_indices:
-            has_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk_text)
-            if VOICE_TA and has_tamil:
-                voice = VOICE_TA
-            else:
-                voice = VOICE_TA or VOICE_EN
-            chunks_to_generate.append((chunk_text, voice, chunk_index))
-        # Semaphore for rate limiting
         semaphore = asyncio.Semaphore(max_concurrent)
-        # Prepare tasks
         tasks = []
-        for chunk_text, voice, chunk_index in chunks_to_generate:
-            tasks.append(generate_safe_audio(chunk_text, voice, semaphore, chunk_index))
-        # Generate all audio files
-        results = await asyncio.gather(*tasks, return_exceptions=False)
-        # Filter successful results and maintain order
-        audio_data = []
-        for result in results:
-            if isinstance(result, tuple) and result[0] and os.path.exists(result[0]):
-                audio_data.append(result)
-        if not audio_data:
-            print("Error: No audio was successfully generated")
             return None
-        # Sort by chunk index
-        audio_data.sort(key=lambda x: x[1])
-        print(f"Successfully generated {len(audio_data)} audio segments")
-        # Process audio segments in parallel
-        with ThreadPoolExecutor(max_workers=min(len(audio_data), 8)) as executor:
-            processed = list(executor.map(process_audio_segment_fast, audio_data))
-        # Filter and sort
-        processed = [(seg, idx) for seg, idx in processed if seg is not None]
-        processed.sort(key=lambda x: x[1])
-        audio_segments = [seg for seg, idx in processed]
         if not audio_segments:
-            print("Error: No audio segments were successfully processed")
             return None
-        print(f"Merging {len(audio_segments)} audio segments with crossfade...")
-        # Merge with crossfade for smooth transitions
         merged_audio = audio_segments[0]
         for segment in audio_segments[1:]:
-            # Crossfade 30ms for smooth transition
-            merged_audio = merged_audio.append(segment, crossfade=30)
-        # Apply compression for consistent volume
         try:
             merged_audio = merged_audio.compress_dynamic_range(
-                threshold=-20.0,
-                ratio=2.5,  # Gentler compression for more natural sound
-                attack=5.0,
                 release=50.0
             )
-        except:
-            pass  # Skip if compression fails
         merged_audio = normalize(merged_audio)
         # Export
-        merged_audio.export(output_file, format="mp3", bitrate="192k")
-        if os.path.exists(output_file) and os.path.getsize(output_file) > 1024:
-            print(f"✅ Audio successfully generated: {output_file}")
-            return output_file
-        else:
-            print(f"Error: Generated file is empty or missing")
-            return None
-    except Exception as main_error:
-        print(f"Main error in bilingual TTS: {main_error}")
-        traceback.print_exc()
         return None
-async def generate_tts_optimized(id: int, lines, lang: str) -> Tuple[Optional[float], Optional[str]]:
-    """Optimized TTS generation function."""
-    voice_map = {
-        "English": "en-US-JennyNeural",
-        "Tamil": "ta-IN-PallaviNeural",
-        "Hindi": "hi-IN-SwaraNeural",
-        "Malayalam": "ml-IN-SobhanaNeural",
-        "Kannada": "kn-IN-SapnaNeural",
-        "Telugu": "te-IN-ShrutiNeural",
-        "Bengali": "bn-IN-TanishaaNeural",
-        "Marathi": "mr-IN-AarohiNeural",
-        "Gujarati": "gu-IN-DhwaniNeural",
-        "Punjabi": "pa-IN-VaaniNeural",
-        "Urdu": "ur-IN-GulNeural",
-        "French": "fr-FR-DeniseNeural",
-        "German": "de-DE-KatjaNeural",
-        "Spanish": "es-ES-ElviraNeural",
-        "Italian": "it-IT-IsabellaNeural",
-        "Russian": "ru-RU-SvetlanaNeural",
-        "Japanese": "ja-JP-NanamiNeural",
-        "Korean": "ko-KR-SunHiNeural",
-        "Chinese": "zh-CN-XiaoxiaoNeural",
-        "Arabic": "ar-SA-ZariyahNeural",
-        "Portuguese": "pt-BR-FranciscaNeural",
-        "Dutch": "nl-NL-FennaNeural",
-        "Greek": "el-GR-AthinaNeural",
-        "Hebrew": "he-IL-HilaNeural",
-        "Turkish": "tr-TR-EmelNeural",
-        "Polish": "pl-PL-AgnieszkaNeural",
-        "Thai": "th-TH-AcharaNeural",
-        "Vietnamese": "vi-VN-HoaiMyNeural",
-        "Swedish": "sv-SE-SofieNeural",
-        "Finnish": "fi-FI-NooraNeural",
-        "Czech": "cs-CZ-VlastaNeural",
-        "Hungarian": "hu-HU-NoemiNeural"
-    }
     audio_name = f"audio{id}.mp3"
-    audio_path = os.path.join(AUDIO_DIR, audio_name)
     if "&&&" in lang:
-        listf = lang.split("&&&")
-        text = listf[0].strip()
-        lang_name = listf[1].strip() if len(listf) > 1 else "English"
-        voice_to_use = voice_map.get(lang_name, VOICE_EN)
     else:
-        text = lines[id] if isinstance(lines, (list, tuple)) and id < len(lines) else str(lines)
-        voice_to_use = voice_map.get(lang, VOICE_EN)
-    # Use max_concurrent=5 for better rate limit handling
-    output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=5)
     if output and os.path.exists(audio_path):
         try:
             audio = MP3(audio_path)
             duration = audio.info.length
             return duration, audio_path
         except Exception as e:
-            print(f"Error reading audio file: {e}")
-            return None, None
     return None, None
-def audio_func(id: int, lines, lang: str) -> Tuple[Optional[float], Optional[str]]:
-    """Synchronous wrapper for audio generation."""
     try:
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-        try:
-            return loop.run_until_complete(generate_tts_optimized(id, lines, lang))
-        finally:
-            loop.close()
     except Exception as e:
-        print(f"Error in audio_func: {e}")
-        traceback.print_exc()
         return None, None
 def create_manim_script(problem_data, script_path, audio_path, scale=1):
     """Generate Manim script from problem data with robust wrapping."""

 API_KEY = "rkmentormindzofficaltokenkey12345"
 import asyncio
+import html
+import logging
+import os
 import tempfile
+import unicodedata
 from concurrent.futures import ThreadPoolExecutor
 from functools import lru_cache
+from pathlib import Path
+from typing import Optional, Tuple, List, Union
 import edge_tts
 from pydub import AudioSegment
 from pydub.effects import normalize
 from mutagen.mp3 import MP3
+# Configure logging for production
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler('tts_production.log'),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger(__name__)
+# Configuration
+class TTSConfig:
+    """Production configuration for TTS system."""
+    AUDIO_DIR: str = os.getenv('AUDIO_OUTPUT_DIR', './audio_output')
+    MAX_CONCURRENT: int = int(os.getenv('MAX_CONCURRENT_TTS', '10'))
+    MAX_CHARS_PER_CHUNK: int = int(os.getenv('MAX_CHARS_PER_CHUNK', '80'))
+    PAUSE_DURATION_MS: int = int(os.getenv('PAUSE_DURATION_MS', '200'))
+    CROSSFADE_MS: int = int(os.getenv('CROSSFADE_MS', '30'))  # For smooth transitions
+    BITRATE: str = os.getenv('AUDIO_BITRATE', '192k')
+    VOICE_EN: str = os.getenv('VOICE_EN', 'en-IN-NeerjaNeural')
+    VOICE_TA: Optional[str] = os.getenv('VOICE_TA')  # Optional for bilingual
+    def __post_init__(self):
+        os.makedirs(self.AUDIO_DIR, exist_ok=True)
+config = TTSConfig()
+# Pre-compiled regex patterns for performance
+import re
 URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
+TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
 BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
 SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
 WHITESPACE_PATTERN = re.compile(r'\s+')
+SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
+SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
+@lru_cache(maxsize=1024)
 def clean_text_for_tts(text: str) -> str:
+    """Cleans text before TTS with optimized regex and caching."""
     if not text:
         return ""
     text = str(text).strip()
     text = html.unescape(text)
+    # Apply pre-compiled patterns
     text = URL_PATTERN.sub('', text)
     text = TAG_PATTERN.sub('', text)
     text = BRACKET_PATTERN.sub('', text)
     text = SPECIAL_CHAR_PATTERN.sub('', text)
     text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
+    # Batch remove keywords
+    for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
+        text = text.replace(keyword, '').replace(keyword.upper(), '')
+    text = unicodedata.normalize('NFKD', text)
     text = WHITESPACE_PATTERN.sub(' ', text)
     return text.strip()
+async def generate_safe_audio(text: str, voice: str, semaphore: asyncio.Semaphore) -> Optional[str]:
+    """Generate clean audio with rate limiting and error handling."""
+    async with semaphore:
+        cleaned_text = clean_text_for_tts(text)
+        if not cleaned_text:
+            logger.warning("Empty cleaned text, skipping audio generation.")
+            return None
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3', dir=config.AUDIO_DIR)
+        fname = temp_file.name
+        temp_file.close()
+        try:
+            comm = edge_tts.Communicate(cleaned_text, voice=voice)
+            await comm.save(fname)
+            logger.debug(f"Audio generated successfully: {fname}")
+            return fname
+        except Exception as e:
+            logger.error(f"Error generating audio for text '{text[:50]}...': {e}")
+            if os.path.exists(fname):
+                os.unlink(fname)
+            return None
+@lru_cache(maxsize=256)
+def smart_text_chunking(text: str, max_chars: int = None) -> Tuple[str, ...]:
+    """Cached text chunking for speed with bilingual awareness."""
+    max_chars = max_chars or config.MAX_CHARS_PER_CHUNK
+    text = clean_text_for_tts(text)
+    if not text:
+        return tuple()
+    sentences = SENTENCE_PATTERN.split(text)
     chunks = []
+    for sentence in sentences:
+        sentence = sentence.strip()
+        if not sentence:
+            continue
+        if len(sentence) <= max_chars:
+            chunks.append(sentence)
         else:
+            sub_parts = SUB_PATTERN.split(sentence)
+            for part in sub_parts:
+                part = part.strip()
+                if not part:
+                    continue
+                if len(part) <= max_chars:
+                    chunks.append(part)
+                else:
+                    words = part.split()
+                    current_chunk = ""
+                    for word in words:
+                        test_chunk = f"{current_chunk} {word}" if current_chunk else word
+                        if len(test_chunk) <= max_chars:
+                            current_chunk = test_chunk
+                        else:
+                            if current_chunk:
+                                chunks.append(current_chunk.strip())
+                            current_chunk = word
+                    if current_chunk:
+                        chunks.append(current_chunk.strip())
+    return tuple(chunk for chunk in chunks if chunk.strip())
+def process_audio_segment_fast(audio_file: str, crossfade_ms: int = None) -> Optional[AudioSegment]:
+    """Fast audio processing in separate thread with crossfade prep."""
+    crossfade_ms = crossfade_ms or config.CROSSFADE_MS
     try:
         segment = AudioSegment.from_file(audio_file)
+        segment = normalize(segment)
+        # Strip silence conditionally
+        if len(segment) > 200:
+            try:
+                segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
+            except Exception as e:
+                logger.warning(f"Silence stripping failed: {e}")
+        # Add micro-padding for crossfade safety
+        silence_start = AudioSegment.silent(duration=50)
+        silence_end = AudioSegment.silent(duration=50)
+        segment = silence_start + segment + silence_end
+        # Pre-apply crossfade to ends for smoother merging
+        if len(segment) > crossfade_ms * 2:
+            segment = segment.fade_in(crossfade_ms).fade_out(crossfade_ms)
+        return segment
     except Exception as e:
+        logger.error(f"Error processing audio segment {audio_file}: {e}")
+        return None
+    finally:
+        # Cleanup temp file
+        try:
+            if os.path.exists(audio_file):
+                os.unlink(audio_file)
+        except Exception as e:
+            logger.warning(f"Failed to cleanup {audio_file}: {e}")
+async def bilingual_tts_optimized(
+    text: str,
+    output_file: str = None,
+    voice_ta: Optional[str] = None,
+    max_concurrent: int = None
+) -> Optional[str]:
+    """Ultra-optimized bilingual TTS with parallel processing and crossfading."""
+    max_concurrent = max_concurrent or config.MAX_CONCURRENT
+    output_file = output_file or os.path.join(config.AUDIO_DIR, "audio_output.mp3")
+    logger.info(f"Starting bilingual TTS for text length: {len(text)}")
     try:
+        chunks = smart_text_chunking(text)
+        if not chunks:
+            logger.error("No valid text chunks after cleaning")
             return None
+        logger.info(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests")
+        is_bilingual = voice_ta is not None and "ta-IN" in voice_ta
         semaphore = asyncio.Semaphore(max_concurrent)
+        # Prepare tasks with language detection
         tasks = []
+        for chunk in chunks:
+            is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
+            voice = voice_ta if (is_bilingual and is_tamil) else (voice_ta or config.VOICE_EN)
+            tasks.append(generate_safe_audio(chunk, voice, semaphore))
+        # Generate audio concurrently
+        audio_files = await asyncio.gather(*tasks, return_exceptions=True)
+        processed_audio_files = [f for f in audio_files if isinstance(f, str) and f and os.path.exists(f)]
+        if not processed_audio_files:
+            logger.error("No audio was successfully generated")
             return None
+        logger.info(f"Successfully generated {len(processed_audio_files)} audio segments")
+        # Process segments in parallel
+        with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
+            audio_segments = list(executor.map(process_audio_segment_fast, processed_audio_files))
+        audio_segments = [seg for seg in audio_segments if seg is not None]
         if not audio_segments:
+            logger.error("No audio segments were successfully processed")
             return None
+        # Merge with crossfading for smoothness
+        logger.info("Merging audio segments with crossfading...")
         merged_audio = audio_segments[0]
+        pause = AudioSegment.silent(duration=config.PAUSE_DURATION_MS)
         for segment in audio_segments[1:]:
+            # Crossfade between segments
+            merged_audio = merged_audio.append(segment, crossfade=config.CROSSFADE_MS)
+            merged_audio += pause  # Add pause after crossfade
+        # Final mastering: compression and normalization
+        logger.info("Applying final audio mastering...")
         try:
             merged_audio = merged_audio.compress_dynamic_range(
+                threshold=-20.0,
+                ratio=4.0,
+                attack=5.0,
                 release=50.0
             )
+        except Exception as e:
+            logger.warning(f"Dynamic range compression failed: {e}")
         merged_audio = normalize(merged_audio)
         # Export
+        merged_audio.export(output_file, format="mp3", bitrate=config.BITRATE)
+        logger.info(f"✅ Audio successfully generated: {output_file}")
+        return output_file
+    except Exception as e:
+        logger.error(f"Main error in bilingual TTS: {e}", exc_info=True)
         return None
+# Voice mapping for multi-language support
+VOICES = {
+    "English": "en-US-JennyNeural",
+    "Tamil": "ta-IN-PallaviNeural",
+    "Hindi": "hi-IN-SwaraNeural",
+    "Malayalam": "ml-IN-SobhanaNeural",
+    "Kannada": "kn-IN-SapnaNeural",
+    "Telugu": "te-IN-ShrutiNeural",
+    "Bengali": "bn-IN-TanishaaNeural",
+    "Marathi": "mr-IN-AarohiNeural",
+    "Gujarati": "gu-IN-DhwaniNeural",
+    "Punjabi": "pa-IN-VaaniNeural",
+    "Urdu": "ur-IN-GulNeural",
+    "French": "fr-FR-DeniseNeural",
+    "German": "de-DE-KatjaNeural",
+    "Spanish": "es-ES-ElviraNeural",
+    "Italian": "it-IT-IsabellaNeural",
+    "Russian": "ru-RU-SvetlanaNeural",
+    "Japanese": "ja-JP-NanamiNeural",
+    "Korean": "ko-KR-SunHiNeural",
+    "Chinese": "zh-CN-XiaoxiaoNeural",
+    "Arabic": "ar-SA-ZariyahNeural",
+    "Portuguese": "pt-BR-FranciscaNeural",
+    "Dutch": "nl-NL-FennaNeural",
+    "Greek": "el-GR-AthinaNeural",
+    "Hebrew": "he-IL-HilaNeural",
+    "Turkish": "tr-TR-EmelNeural",
+    "Polish": "pl-PL-AgnieszkaNeural",
+    "Thai": "th-TH-AcharaNeural",
+    "Vietnamese": "vi-VN-HoaiMyNeural",
+    "Swedish": "sv-SE-SofieNeural",
+    "Finnish": "fi-FI-NooraNeural",
+    "Czech": "cs-CZ-VlastaNeural",
+    "Hungarian": "hu-HU-NoemiNeural"
+}
+async def generate_tts_optimized(id: int, lines: List[str], lang: str) -> Tuple[Optional[float], Optional[str]]:
+    """Optimized TTS generation function with language support."""
     audio_name = f"audio{id}.mp3"
+    audio_path = os.path.join(config.AUDIO_DIR, audio_name)
     if "&&&" in lang:
+        parts = lang.split("&&&")
+        text = parts[0].strip()
+        lang_name = parts[1].strip()
+        voice_to_use = VOICES.get(lang_name, config.VOICE_EN)
     else:
+        text = lines[id]
+        voice_to_use = VOICES.get(lang, config.VOICE_EN)
+    output = await bilingual_tts_optimized(text, audio_path, voice_to_use, config.MAX_CONCURRENT)
     if output and os.path.exists(audio_path):
         try:
             audio = MP3(audio_path)
             duration = audio.info.length
+            logger.info(f"TTS completed for ID {id}: duration {duration:.2f}s")
             return duration, audio_path
         except Exception as e:
+            logger.error(f"Error reading MP3 metadata for {audio_path}: {e}")
+    logger.error(f"TTS failed for ID {id}")
     return None, None
+def audio_func(id: int, lines: List[str], lang: str) -> Tuple[Optional[float], Optional[str]]:
+    """Synchronous wrapper for audio generation with error isolation."""
     try:
+        return asyncio.run(generate_tts_optimized(id, lines, lang))
     except Exception as e:
+        logger.error(f"Audio function failed for ID {id}: {e}", exc_info=True)
         return None, None
+# Example usage (production entry point)
+if __name__ == "__main__":
+    # Example: Generate audio for a sample text
+    sample_text = "Voltage னு சொல்றது simple circuit ல current அ..."
+    sample_lines = [sample_text]
+    duration, path = audio_func(0, sample_lines, "Tamil&&&Tamil")
+    if path:
+        print(f"Generated: {path} (Duration: {duration:.2f}s)")
+    else:
+        print("Generation failed.")
 def create_manim_script(problem_data, script_path, audio_path, scale=1):
     """Generate Manim script from problem data with robust wrapping."""