Spaces:

sreepathi-ravikumar
/

backendprocessmath

Sleeping

App Files Files Community

sreepathi-ravikumar commited on Dec 6, 2025

Commit

016b3e7

verified ·

1 Parent(s): d59be26

Update app.py

Browse files

Files changed (1) hide show

app.py +352 -170

app.py CHANGED Viewed

@@ -36,6 +36,24 @@ API_KEY = "rkmentormindzofficaltokenkey12345"
 import os
 import re
 import html
@@ -62,236 +80,394 @@ os.makedirs(AUDIO_DIR, exist_ok=True)
 # Pre-compiled regex patterns for speed
 URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
-TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
-BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
-SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
 WHITESPACE_PATTERN = re.compile(r'\s+')
-# More conservative sentence splitting - only on major punctuation with space
-SENTENCE_PATTERN = re.compile(r'(?<=[.!?।॥])\s+')
-# More conservative sub-splitting - avoid splitting on hyphens and preserve word boundaries
-SUB_PATTERN = re.compile(r'(?<=[,;])\s+')
-@lru_cache(maxsize=1024)
-def clean_text_for_tts(text):
-    """Cleans text before TTS with optimized regex and caching."""
     if not text:
         return ""
     text = str(text).strip()
     text = html.unescape(text)
     text = URL_PATTERN.sub('', text)
     text = TAG_PATTERN.sub('', text)
-    text = BRACKET_PATTERN.sub('', text)
-    text = SPECIAL_CHAR_PATTERN.sub('', text)
-    text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
-    for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
-        text = text.replace(keyword, '').replace(keyword.upper(), '')
-    text = unicodedata.normalize('NFKD', text)
     text = WHITESPACE_PATTERN.sub(' ', text)
     return text.strip()
-async def generate_safe_audio(text, voice, semaphore):
-    """Generate audio with robust retries, caching, and exponential backoff."""
-    # Create a deterministic filename based on content (Disk Caching)
-    text_hash = hashlib.md5(f"{text}_{voice}".encode('utf-8')).hexdigest()
-    cache_filename = os.path.join(AUDIO_DIR, f"cache_{text_hash}.mp3")
-    if os.path.exists(cache_filename) and os.path.getsize(cache_filename) > 0:
-        return cache_filename
-    async with semaphore:
-        cleaned_text = clean_text_for_tts(text)
-        if not cleaned_text:
-            return None
-        # Retry configuration
-        max_retries = 3
-        base_delay = 2.0
-        for attempt in range(max_retries):
-            try:
-                comm = edge_tts.Communicate(cleaned_text, voice=voice)
-                await comm.save(cache_filename)
-                if os.path.exists(cache_filename) and os.path.getsize(cache_filename) > 0:
-                    return cache_filename
-            except Exception as e:
-                if attempt == max_retries - 1:
-                    print(f"Failed to generate audio after {max_retries} attempts: {e}")
-                    return None
-                # Exponential backoff with jitter
-                sleep_time = (base_delay * (2 ** attempt)) + random.uniform(0.1, 1.0)
-                print(f"Rate limit/Error hit. Retrying in {sleep_time:.2f}s...")
-                await asyncio.sleep(sleep_time)
-        return None
-@lru_cache(maxsize=256)
-def smart_text_chunking(text, max_chars=300):
-    """Cached text chunking with larger chunks and better preservation of word order."""
-    text = clean_text_for_tts(text)
     if not text:
-        return tuple()
-    # First try to split on major sentence boundaries
-    sentences = SENTENCE_PATTERN.split(text)
     chunks = []
     for sentence in sentences:
         sentence = sentence.strip()
         if not sentence:
             continue
-        # If sentence fits, keep it whole
-        if len(sentence) <= max_chars:
-            chunks.append(sentence)
         else:
-            # Try splitting on commas/semicolons but preserve larger context
-            sub_parts = SUB_PATTERN.split(sentence)
-            current_chunk = ""
-            for part in sub_parts:
-                part = part.strip()
-                if not part:
-                    continue
-                test_chunk = f"{current_chunk}, {part}" if current_chunk else part
-                if len(test_chunk) <= max_chars:
-                    current_chunk = test_chunk
-                else:
-                    if current_chunk:
-                        chunks.append(current_chunk.strip())
-                    # If single part is too long, split by words carefully
-                    if len(part) > max_chars:
                         words = part.split()
                         word_chunk = ""
-                        for word in words:
-                            test_word_chunk = f"{word_chunk} {word}" if word_chunk else word
                             if len(test_word_chunk) <= max_chars:
                                 word_chunk = test_word_chunk
                             else:
                                 if word_chunk:
-                                    chunks.append(word_chunk.strip())
                                 word_chunk = word
                         if word_chunk:
                             current_chunk = word_chunk
-                    else:
-                        current_chunk = part
-            if current_chunk:
-                chunks.append(current_chunk.strip())
-    return tuple(chunk for chunk in chunks if chunk.strip())
-def process_audio_segment_fast(audio_file):
-    """Fast audio processing in separate thread."""
     segment = None
     try:
         if not audio_file or not os.path.exists(audio_file):
-            return None
         segment = AudioSegment.from_file(audio_file)
-        segment = normalize(segment)
-        if len(segment) > 200:
             try:
-                segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
             except Exception:
                 pass
-        return segment
     except Exception as e:
-        print(f"Warning: Error processing audio segment: {e}")
-        return None
-    finally:
-        try:
-            if audio_file and os.path.exists(audio_file):
-                os.unlink(audio_file)
-        except Exception:
-            pass
-async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=5):
-    """Ultra-optimized bilingual TTS with parallel processing and reduced concurrency."""
-    print("Starting optimized bilingual TTS processing...")
     try:
-        chunks = smart_text_chunking(text)
         if not chunks:
-            print("Error: No valid text chunks after cleaning")
             return None
-        print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
-        is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
         semaphore = asyncio.Semaphore(max_concurrent)
-        tasks = []
-        for i, chunk in enumerate(chunks):
-            is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
-            voice = VOICE_TA if (is_bilingual_tamil and is_tamil) else (VOICE_TA or VOICE_EN)
-            tasks.append(generate_safe_audio(chunk, voice, semaphore))
-        audio_files = await asyncio.gather(*tasks, return_exceptions=True)
-        processed_audio_files = [f for f in audio_files if isinstance(f, str) and f and os.path.exists(f)]
-        if not processed_audio_files:
-            print("Error: No audio was successfully generated")
             return None
-        print(f"Successfully generated {len(processed_audio_files)} audio segments")
-        with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
-            audio_segments = list(executor.map(process_audio_segment_fast, processed_audio_files))
-        audio_segments = [seg for seg in audio_segments if seg is not None]
         if not audio_segments:
-            print("Error: No audio segments were successfully processed")
             return None
         print("Merging audio segments...")
         merged_audio = audio_segments[0]
-        pause = AudioSegment.silent(duration=150)
-        for segment in audio_segments[1:]:
             merged_audio += pause + segment
         print("Applying final audio processing...")
         merged_audio = merged_audio.compress_dynamic_range(
             threshold=-20.0,
-            ratio=4.0,
             attack=5.0,
             release=50.0
         )
-        merged_audio = normalize(merged_audio)
         merged_audio.export(output_file, format="mp3", bitrate="192k")
         print(f"✅ Audio successfully generated: {output_file}")
         return output_file
     except Exception as main_error:
-        print(f"Main error in bilingual TTS: {main_error}")
         traceback.print_exc()
         return None
 async def generate_tts_optimized(id, lines, lang):
-    """Optimized TTS generation function with reduced concurrency."""
-    voice = {
         "English": "en-US-JennyNeural",
         "Tamil": "ta-IN-PallaviNeural",
         "Hindi": "hi-IN-SwaraNeural",
@@ -325,30 +501,35 @@ async def generate_tts_optimized(id, lines, lang):
         "Czech": "cs-CZ-VlastaNeural",
         "Hungarian": "hu-HU-NoemiNeural"
     }
     audio_name = f"audio{id}.mp3"
     audio_path = os.path.join(AUDIO_DIR, audio_name)
     if "&&&" in lang:
-        listf = lang.split("&&&")
-        text = listf[0].strip()
-        lang_name = listf[1].strip() if len(listf) > 1 else "English"
-        voice_to_use = voice.get(lang_name, VOICE_EN)
     else:
-        text = lines[id] if isinstance(lines, (list, tuple)) and id < len(lines) else str(lines)
-        voice_to_use = voice.get(lang, VOICE_EN)
-    output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=5)
     if output and os.path.exists(audio_path):
         try:
             audio = MP3(audio_path)
             duration = audio.info.length
             return duration, audio_path
         except Exception as e:
-            print(f"Error reading audio file: {e}")
             return None, None
     return None, None
@@ -362,9 +543,10 @@ def audio_func(id, lines, lang):
         finally:
             loop.close()
     except Exception as e:
-        print(f"Error in audio_func: {e}")
         traceback.print_exc()
         return None, None

+import os
+import re
+import html
+import unicodedata
+import asyncio
+import tempfile
+import traceback
+import random
+import hashlib
+from concurrent.futures import ThreadPoolExecutor
+from functools import lru_cache
+import edge_tts
+from pydub import AudioSegment
+from pydub.effects import normalize
+from mutagen.mp3 import MP3
+```python
 import os
 import re
 import html
 # Pre-compiled regex patterns for speed
 URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
+TAG_PATTERN = re.compile(r'<[^>]*>')
+# Preserve sentence-ending abbreviations
+ABBREVIATION_PATTERN = re.compile(r'\b(?:Dr|Mr|Mrs|Ms|Prof|Sr|Jr|Ph\.D|M\.D|B\.A|M\.A)\.')
+# Sentence split avoiding abbreviations and numbers
+SENTENCE_SPLIT_PATTERN = re.compile(r'(?<!\d)(?<![A-Z])(?<=[.!?।॥])\s+(?=[A-Z\u0B80-\u0BFF])')
 WHITESPACE_PATTERN = re.compile(r'\s+')
+def clean_text_for_tts(text, preserve_structure=True):
+    """
+    Cleans text for TTS with language-aware preservation.
+    No caching to avoid cross-contamination between different contexts.
+    """
     if not text:
         return ""
     text = str(text).strip()
     text = html.unescape(text)
+    # Remove URLs
     text = URL_PATTERN.sub('', text)
+    # Remove HTML tags only (not angle brackets in general)
     text = TAG_PATTERN.sub('', text)
+    # Only remove truly problematic characters, preserve hyphens, apostrophes
+    # Preserve: hyphens, apostrophes, numbers with commas, currency symbols
+    if preserve_structure:
+        # Only remove control characters and extreme special chars
+        text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]', '', text)
+        text = re.sub(r'[{}[\]\\`~]', '', text)
+    else:
+        # More aggressive cleaning
+        text = re.sub(r'[#@$%^&*_+=|\\`~{}[\]]', '', text)
+    # Normalize line breaks to spaces
+    text = text.replace('\n', ' ').replace('\t', ' ').replace('\r', ' ')
+    # SSML keyword removal - only remove if they appear as XML-like tags or attributes
+    # Don't remove legitimate usage in normal text
+    text = re.sub(r'</?(?:voice|speak|prosody|ssml)[^>]*>', '', text, flags=re.IGNORECASE)
+    text = re.sub(r'\bxmlns\s*=\s*["\'][^"\']*["\']', '', text, flags=re.IGNORECASE)
+    # Use NFC (Canonical Composition) instead of NFKD for better Unicode preservation
+    # NFC preserves grapheme clusters in Tamil and other Indic scripts
+    text = unicodedata.normalize('NFC', text)
+    # Collapse multiple spaces
     text = WHITESPACE_PATTERN.sub(' ', text)
     return text.strip()
+def detect_language_segments(text):
+    """
+    Detects language at the text level (not chunk level) to avoid mid-sentence voice switching.
+    Returns a single dominant language code.
+    """
+    if not text:
+        return 'en'
+    # Count Unicode ranges
+    tamil_chars = sum(1 for c in text if '\u0B80' <= c <= '\u0BFF')
+    devanagari_chars = sum(1 for c in text if '\u0900' <= c <= '\u097F')
+    malayalam_chars = sum(1 for c in text if '\u0D00' <= c <= '\u0D7F')
+    kannada_chars = sum(1 for c in text if '\u0C80' <= c <= '\u0CFF')
+    telugu_chars = sum(1 for c in text if '\u0C00' <= c <= '\u0C7F')
+    # Return dominant script
+    max_chars = max(tamil_chars, devanagari_chars, malayalam_chars, kannada_chars, telugu_chars)
+    if tamil_chars == max_chars and tamil_chars > 5:
+        return 'ta'
+    elif devanagari_chars == max_chars and devanagari_chars > 5:
+        return 'hi'
+    elif malayalam_chars == max_chars and malayalam_chars > 5:
+        return 'ml'
+    elif kannada_chars == max_chars and kannada_chars > 5:
+        return 'kn'
+    elif telugu_chars == max_chars and telugu_chars > 5:
+        return 'te'
+    return 'en'
+def smart_text_chunking(text, max_chars=350):
+    """
+    Improved chunking that preserves word order, handles abbreviations, and maintains context.
+    Deterministic splitting for cache consistency.
+    """
+    text = clean_text_for_tts(text, preserve_structure=True)
     if not text:
+        return []
+    # Protect abbreviations by replacing periods temporarily
+    protected_text = ABBREVIATION_PATTERN.sub(lambda m: m.group(0).replace('.', '<<<DOT>>>'), text)
+    # Split on sentence boundaries
+    sentences = SENTENCE_SPLIT_PATTERN.split(protected_text)
+    # Restore abbreviations
+    sentences = [s.replace('<<<DOT>>>', '.') for s in sentences]
     chunks = []
+    current_chunk = ""
     for sentence in sentences:
         sentence = sentence.strip()
         if not sentence:
             continue
+        # If adding this sentence keeps us under limit, add it
+        test_chunk = f"{current_chunk} {sentence}".strip() if current_chunk else sentence
+        if len(test_chunk) <= max_chars:
+            current_chunk = test_chunk
         else:
+            # Save current chunk if it exists
+            if current_chunk:
+                chunks.append(current_chunk)
+            # If single sentence is too long, split carefully
+            if len(sentence) > max_chars:
+                # Split on natural boundaries: semicolons, colons, dashes
+                # But NOT on commas inside numbers or hyphens in compound words
+                # First protect numbers with commas
+                protected_sentence = re.sub(r'(\d+),(\d+)', r'\1<<<COMMA>>>\2', sentence)
+                # Split on safe punctuation
+                sub_parts = re.split(r'(?<=[;:—])\s+', protected_sentence)
+                # Restore commas in numbers
+                sub_parts = [p.replace('<<<COMMA>>>', ',') for p in sub_parts]
+                for part in sub_parts:
+                    part = part.strip()
+                    if not part:
+                        continue
+                    if len(part) <= max_chars:
+                        if current_chunk and len(current_chunk) + len(part) + 1 <= max_chars:
+                            current_chunk = f"{current_chunk} {part}"
+                        else:
+                            if current_chunk:
+                                chunks.append(current_chunk)
+                            current_chunk = part
+                    else:
+                        # Last resort: split on word boundaries with overlap for continuity
                         words = part.split()
                         word_chunk = ""
+                        for i, word in enumerate(words):
+                            test_word_chunk = f"{word_chunk} {word}".strip() if word_chunk else word
                             if len(test_word_chunk) <= max_chars:
                                 word_chunk = test_word_chunk
                             else:
                                 if word_chunk:
+                                    # Add overlap: include first word of next chunk in previous
+                                    if i + 1 < len(words):
+                                        overlap_chunk = f"{word_chunk} {words[i]}"
+                                        if len(overlap_chunk) <= max_chars:
+                                            chunks.append(overlap_chunk)
+                                        else:
+                                            chunks.append(word_chunk)
+                                    else:
+                                        chunks.append(word_chunk)
                                 word_chunk = word
                         if word_chunk:
                             current_chunk = word_chunk
+            else:
+                current_chunk = sentence
+    # Don't forget the last chunk
+    if current_chunk:
+        chunks.append(current_chunk)
+    return [c.strip() for c in chunks if c.strip()]
+async def generate_safe_audio(text, voice, semaphore, chunk_index=0):
+    """
+    Generate audio with robust retries, caching, and exponential backoff.
+    Includes chunk_index for debugging and ordering verification.
+    """
+    # Create cache key with voice to avoid cross-language contamination
+    cache_key = f"{text}_{voice}_{chunk_index}"
+    text_hash = hashlib.md5(cache_key.encode('utf-8')).hexdigest()
+    cache_filename = os.path.join(AUDIO_DIR, f"cache_{text_hash}.mp3")
+    # Check cache
+    if os.path.exists(cache_filename):
+        try:
+            if os.path.getsize(cache_filename) > 1024:  # At least 1KB
+                print(f"✓ Using cached audio for chunk {chunk_index}")
+                return cache_filename, chunk_index
+        except Exception:
+            pass
+    async with semaphore:
+        cleaned_text = clean_text_for_tts(text, preserve_structure=True)
+        if not cleaned_text or len(cleaned_text) < 2:
+            print(f"✗ Chunk {chunk_index} has no valid content after cleaning")
+            return None, chunk_index
+        # Retry configuration
+        max_retries = 3
+        base_delay = 2.0
+        for attempt in range(max_retries):
+            try:
+                print(f"→ Generating chunk {chunk_index} (attempt {attempt + 1}): {cleaned_text[:50]}...")
+                comm = edge_tts.Communicate(cleaned_text, voice=voice)
+                await comm.save(cache_filename)
+                # Validate file
+                if os.path.exists(cache_filename) and os.path.getsize(cache_filename) > 1024:
+                    print(f"✓ Generated chunk {chunk_index}")
+                    return cache_filename, chunk_index
+                else:
+                    print(f"✗ Chunk {chunk_index} file too small or missing")
+            except Exception as e:
+                if attempt == max_retries - 1:
+                    print(f"✗ Failed chunk {chunk_index} after {max_retries} attempts: {e}")
+                    return None, chunk_index
+                # Exponential backoff with jitter
+                sleep_time = (base_delay * (2 ** attempt)) + random.uniform(0.1, 1.0)
+                print(f"⚠ Chunk {chunk_index} rate limit/error. Retrying in {sleep_time:.2f}s...")
+                await asyncio.sleep(sleep_time)
+        return None, chunk_index
+def process_audio_segment_fast(audio_data):
+    """
+    Fast audio processing with ordering preservation.
+    Input: (audio_file, chunk_index)
+    Output: (segment, chunk_index)
+    """
+    audio_file, chunk_index = audio_data
     segment = None
     try:
         if not audio_file or not os.path.exists(audio_file):
+            return None, chunk_index
         segment = AudioSegment.from_file(audio_file)
+        # Gentle normalization
+        if segment.dBFS < -30:
+            segment = segment.apply_gain(-segment.dBFS - 20)
+        # Light silence trimming (preserve natural pauses)
+        if len(segment) > 500:
             try:
+                segment = segment.strip_silence(
+                    silence_len=100,
+                    silence_thresh=-45,
+                    padding=100
+                )
             except Exception:
                 pass
+        return segment, chunk_index
     except Exception as e:
+        print(f"✗ Error processing audio segment {chunk_index}: {e}")
+        return None, chunk_index
+async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=4):
+    """
+    Optimized bilingual TTS with proper ordering, overlap handling, and language detection.
+    """
+    print(f"\n{'='*60}")
+    print(f"Starting TTS processing: {len(text)} chars")
+    print(f"{'='*60}")
     try:
+        # Detect primary language ONCE for entire text
+        primary_lang = detect_language_segments(text)
+        print(f"Detected primary language: {primary_lang}")
+        # Chunk text deterministically
+        chunks = smart_text_chunking(text, max_chars=350)
         if not chunks:
+            print("✗ No valid text chunks after cleaning")
             return None
+        print(f"Split into {len(chunks)} chunks")
+        for i, chunk in enumerate(chunks[:3]):
+            print(f"  Chunk {i}: {chunk[:60]}...")
+        # Determine voice
+        if VOICE_TA and ("ta-IN" in VOICE_TA and primary_lang == 'ta'):
+            voice = VOICE_TA
+        else:
+            voice = VOICE_TA or VOICE_EN
+        print(f"Using voice: {voice}")
+        # Create semaphore for rate limiting
         semaphore = asyncio.Semaphore(max_concurrent)
+        # Generate all audio with index tracking
+        tasks = [
+            generate_safe_audio(chunk, voice, semaphore, chunk_index=i)
+            for i, chunk in enumerate(chunks)
+        ]
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+        # Filter and sort by index to preserve order
+        valid_results = [
+            (audio_file, idx)
+            for audio_file, idx in results
+            if not isinstance(audio_file, Exception) and audio_file and os.path.exists(audio_file)
+        ]
+        if not valid_results:
+            print("✗ No audio was successfully generated")
             return None
+        # Sort by chunk index to guarantee correct order
+        valid_results.sort(key=lambda x: x[1])
+        print(f"✓ Generated {len(valid_results)}/{len(chunks)} audio segments")
+        # Process audio with ordering
+        with ThreadPoolExecutor(max_workers=min(len(valid_results), 8)) as executor:
+            processed = list(executor.map(process_audio_segment_fast, valid_results))
+        # Sort again after processing and filter None
+        processed = [(seg, idx) for seg, idx in processed if seg is not None]
+        processed.sort(key=lambda x: x[1])
+        audio_segments = [seg for seg, idx in processed]
         if not audio_segments:
+            print("✗ No audio segments were successfully processed")
             return None
+        print(f"✓ Processed {len(audio_segments)} segments in correct order")
+        # Merge with natural pauses
         print("Merging audio segments...")
         merged_audio = audio_segments[0]
+        pause = AudioSegment.silent(duration=180)
+        for i, segment in enumerate(audio_segments[1:], 1):
             merged_audio += pause + segment
+        # Final processing
         print("Applying final audio processing...")
+        # Gentle compression
         merged_audio = merged_audio.compress_dynamic_range(
             threshold=-20.0,
+            ratio=3.0,
             attack=5.0,
             release=50.0
         )
+        # Final normalization
+        merged_audio = normalize(merged_audio, headroom=0.1)
+        # Export
         merged_audio.export(output_file, format="mp3", bitrate="192k")
         print(f"✅ Audio successfully generated: {output_file}")
+        print(f"   Duration: {len(merged_audio)/1000:.2f}s")
+        print(f"{'='*60}\n")
         return output_file
     except Exception as main_error:
+        print(f"✗ Main error in bilingual TTS: {main_error}")
         traceback.print_exc()
         return None
 async def generate_tts_optimized(id, lines, lang):
+    """Optimized TTS generation function with proper error handling."""
+    voice_map = {
         "English": "en-US-JennyNeural",
         "Tamil": "ta-IN-PallaviNeural",
         "Hindi": "hi-IN-SwaraNeural",
         "Czech": "cs-CZ-VlastaNeural",
         "Hungarian": "hu-HU-NoemiNeural"
     }
     audio_name = f"audio{id}.mp3"
     audio_path = os.path.join(AUDIO_DIR, audio_name)
+    # Parse input
     if "&&&" in lang:
+        parts = lang.split("&&&")
+        text = parts[0].strip()
+        lang_name = parts[1].strip() if len(parts) > 1 else "English"
+        voice_to_use = voice_map.get(lang_name, VOICE_EN)
     else:
+        if isinstance(lines, (list, tuple)) and 0 <= id < len(lines):
+            text = str(lines[id])
+        else:
+            text = str(lines)
+        voice_to_use = voice_map.get(lang, VOICE_EN)
+    # Generate audio
+    output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=4)
     if output and os.path.exists(audio_path):
         try:
             audio = MP3(audio_path)
             duration = audio.info.length
             return duration, audio_path
         except Exception as e:
+            print(f"✗ Error reading audio file metadata: {e}")
             return None, None
     return None, None
         finally:
             loop.close()
     except Exception as e:
+        print(f"✗ Error in audio_func: {e}")
         traceback.print_exc()
         return None, None
+```