Spaces:

sreepathi-ravikumar
/

backendprocessmath

Sleeping

App Files Files Community

sreepathi-ravikumar commited on 28 days ago

Commit

0bb2b49

verified ·

1 Parent(s): 13b333e

Update app.py

Browse files

Files changed (1) hide show

app.py +223 -145

app.py CHANGED Viewed

@@ -35,9 +35,6 @@ os.makedirs(AUDIO_DIR, exist_ok=True)
 API_KEY = "rkmentormindzofficaltokenkey12345"
 import os
 import re
 import html
@@ -47,8 +44,10 @@ import tempfile
 import traceback
 import random
 import hashlib
 from concurrent.futures import ThreadPoolExecutor
 from functools import lru_cache
 import edge_tts
 from pydub import AudioSegment
@@ -62,166 +61,227 @@ VOICE_EN = "en-IN-NeerjaNeural"
 AUDIO_DIR = os.path.join(os.getcwd(), "audio")
 os.makedirs(AUDIO_DIR, exist_ok=True)
-# Pre-compiled regex patterns for speed (compiled once, reused many times)
 URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
-TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
 BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
 SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
 WHITESPACE_PATTERN = re.compile(r'\s+')
-# More conservative sentence splitting to avoid breaking mid-word
 SENTENCE_PATTERN = re.compile(r'(?<=[.!?।॥])\s+(?=[A-ZА-ЯА-Я\u0B80-\u0BFF\u0900-\u097F])')
-# Avoid splitting on colons that are part of numbers (like time 5:30)
-SUB_PATTERN = re.compile(r'(?<=[,;])\s+')
-@lru_cache(maxsize=1024)
-def clean_text_for_tts(text):
-    """Cleans text before TTS with optimized regex and caching."""
     if not text:
         return ""
     text = str(text).strip()
     text = html.unescape(text)
-    # Use pre-compiled patterns (much faster)
     text = URL_PATTERN.sub('', text)
     text = TAG_PATTERN.sub('', text)
     text = BRACKET_PATTERN.sub('', text)
     text = SPECIAL_CHAR_PATTERN.sub('', text)
-    text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
-    # Batch remove keywords (faster than multiple re.sub calls)
-    # But only if they appear as standalone words or in SSML context
-    for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
-        # Remove only if surrounded by whitespace or special chars (not part of words)
-        text = re.sub(rf'\b{keyword}\b', '', text, flags=re.IGNORECASE)
-    # Use NFC normalization instead of NFKD to preserve Tamil/Indic characters better
     text = unicodedata.normalize('NFC', text)
     text = WHITESPACE_PATTERN.sub(' ', text)
     return text.strip()
-async def generate_safe_audio(text, voice, semaphore, chunk_index):
-    """Generate clean audio with rate limiting, caching, and retry logic."""
     # Create deterministic cache key
-    cache_key = f"{text}_{voice}"
-    text_hash = hashlib.md5(cache_key.encode('utf-8')).hexdigest()
     cache_filename = os.path.join(AUDIO_DIR, f"cache_{text_hash}.mp3")
-    # Check disk cache first
     if os.path.exists(cache_filename) and os.path.getsize(cache_filename) > 1024:
         return cache_filename, chunk_index
-    async with semaphore:  # Limit concurrent TTS requests
-        cleaned_text = clean_text_for_tts(text)
-        if not cleaned_text or len(cleaned_text) < 2:
-            return None, chunk_index
-        # Retry configuration
         max_retries = 3
         base_delay = 2.0
         for attempt in range(max_retries):
             try:
-                comm = edge_tts.Communicate(cleaned_text, voice=voice)
-                await comm.save(cache_filename)
-                # Verify file was created successfully
-                if os.path.exists(cache_filename) and os.path.getsize(cache_filename) > 1024:
                     return cache_filename, chunk_index
             except Exception as e:
                 if attempt == max_retries - 1:
                     print(f"Failed to generate audio chunk {chunk_index} after {max_retries} attempts: {e}")
                     return None, chunk_index
-                # Exponential backoff with jitter to avoid thundering herd
                 sleep_time = (base_delay * (2 ** attempt)) + random.uniform(0.1, 1.0)
                 print(f"Rate limit hit on chunk {chunk_index}. Retrying in {sleep_time:.2f}s...")
                 await asyncio.sleep(sleep_time)
         return None, chunk_index
-@lru_cache(maxsize=256)
-def smart_text_chunking(text, max_chars=250):
-    """
-    Cached text chunking with improved algorithm to preserve word order and context.
-    Increased max_chars to reduce total number of API calls.
-    """
-    text = clean_text_for_tts(text)
-    if not text:
-        return tuple()  # Return tuple for hashability (required by lru_cache)
-    # Protect common abbreviations
-    text = re.sub(r'\b(Dr|Mr|Mrs|Ms|Prof|Sr|Jr)\.\s', r'\1<<DOT>> ', text)
-    sentences = SENTENCE_PATTERN.split(text)
-    chunks = []
-    for sentence in sentences:
-        sentence = sentence.strip()
-        if not sentence:
-            continue
-        # Restore protected periods
-        sentence = sentence.replace('<<DOT>>', '.')
-        if len(sentence) <= max_chars:
-            chunks.append(sentence)
-        else:
-            # Try splitting on commas/semicolons first
-            sub_parts = SUB_PATTERN.split(sentence)
-            current_chunk = ""
-            for part in sub_parts:
-                part = part.strip()
-                if not part:
-                    continue
-                # Try to add to current chunk
-                test_chunk = f"{current_chunk}, {part}" if current_chunk else part
-                if len(test_chunk) <= max_chars:
-                    current_chunk = test_chunk
-                else:
-                    # Save current chunk if exists
-                    if current_chunk:
-                        chunks.append(current_chunk.strip())
-                    # If part itself is too long, split by words
-                    if len(part) > max_chars:
-                        words = part.split()
-                        word_chunk = ""
-                        for word in words:
-                            test_word_chunk = f"{word_chunk} {word}" if word_chunk else word
-                            if len(test_word_chunk) <= max_chars:
-                                word_chunk = test_word_chunk
-                            else:
-                                if word_chunk:
-                                    chunks.append(word_chunk.strip())
-                                word_chunk = word
-                        if word_chunk:
-                            current_chunk = word_chunk
-                    else:
-                        current_chunk = part
-            # Don't forget last chunk
-            if current_chunk:
-                chunks.append(current_chunk.strip())
-    # Filter empty chunks
-    return tuple(chunk for chunk in chunks if chunk.strip())
-def process_audio_segment_fast(audio_data):
-    """
-    Fast audio processing in separate thread with ordering preserved.
-    Input: (audio_file, chunk_index)
-    Output: (segment, chunk_index)
-    """
     audio_file, chunk_index = audio_data
     try:
@@ -236,23 +296,24 @@ def process_audio_segment_fast(audio_data):
             try:
                 segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
             except:
-                pass  # Skip if fails
         return segment, chunk_index
     except Exception as e:
         print(f"Warning: Error processing audio segment {chunk_index}: {e}")
         return None, chunk_index
-async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=5):
-    """
-    Ultra-optimized bilingual TTS with parallel processing.
-    Reduced max_concurrent to 5 for better rate limit compliance.
-    """
     print("Starting optimized bilingual TTS processing...")
     try:
         chunks = smart_text_chunking(text, max_chars=250)
         if not chunks:
             print("Error: No valid text chunks after cleaning")
@@ -260,26 +321,37 @@ async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None,
         print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
         is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
-        # Semaphore to limit concurrent TTS requests (prevents rate limiting)
         semaphore = asyncio.Semaphore(max_concurrent)
-        # Prepare all tasks with index tracking
         tasks = []
         for i, chunk in enumerate(chunks):
-            is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
-            voice = VOICE_TA if (is_bilingual_tamil and is_tamil) else (VOICE_TA or VOICE_EN)
             tasks.append(generate_safe_audio(chunk, voice, semaphore, i))
         # Generate all audio files concurrently
-        results = await asyncio.gather(*tasks, return_exceptions=True)
-        # Filter successful files and maintain order
         audio_data = []
         for result in results:
             if isinstance(result, tuple) and result[0] and os.path.exists(result[0]):
                 audio_data.append(result)
         if not audio_data:
             print("Error: No audio was successfully generated")
@@ -290,7 +362,7 @@ async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None,
         print(f"Successfully generated {len(audio_data)}/{len(chunks)} audio segments")
-        # Process audio segments in parallel using ThreadPoolExecutor
         with ThreadPoolExecutor(max_workers=min(len(audio_data), 8)) as executor:
             processed = list(executor.map(process_audio_segment_fast, audio_data))
@@ -306,37 +378,45 @@ async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None,
         print(f"Processed {len(audio_segments)} segments in correct order")
-        # Merge audio segments (fast concatenation)
         print("Merging audio segments...")
         merged_audio = audio_segments[0]
-        pause = AudioSegment.silent(duration=180)  # Slightly shorter pause for smoother flow
         for segment in audio_segments[1:]:
             merged_audio += pause + segment
-        # Apply final processing (compression and normalization)
         print("Applying final audio processing...")
-        merged_audio = merged_audio.compress_dynamic_range(
-            threshold=-20.0,
-            ratio=4.0,
-            attack=5.0,
-            release=50.0
-        )
         merged_audio = normalize(merged_audio)
         # Export with high quality
         merged_audio.export(output_file, format="mp3", bitrate="192k")
-        print(f"✅ Audio successfully generated: {output_file}")
-        return output_file
     except Exception as main_error:
         print(f"Main error in bilingual TTS: {main_error}")
         traceback.print_exc()
         return None
-async def generate_tts_optimized(id, lines, lang):
     """Optimized TTS generation function."""
     voice = {
         "English": "en-US-JennyNeural",
@@ -399,8 +479,7 @@ async def generate_tts_optimized(id, lines, lang):
     return None, None
-def audio_func(id, lines, lang):
     """Synchronous wrapper for audio generation."""
     try:
         loop = asyncio.new_event_loop()
@@ -415,7 +494,6 @@ def audio_func(id, lines, lang):
         return None, None
 def create_manim_script(problem_data, script_path, audio_path, scale=1):
     """Generate Manim script from problem data with robust wrapping."""

 API_KEY = "rkmentormindzofficaltokenkey12345"
 import os
 import re
 import html
 import traceback
 import random
 import hashlib
+import json
 from concurrent.futures import ThreadPoolExecutor
 from functools import lru_cache
+from typing import List, Tuple, Optional
 import edge_tts
 from pydub import AudioSegment
 AUDIO_DIR = os.path.join(os.getcwd(), "audio")
 os.makedirs(AUDIO_DIR, exist_ok=True)
+# Pre-compiled regex patterns
 URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
+TAG_PATTERN = re.compile(r'<[^>]*>')
 BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
 SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
 WHITESPACE_PATTERN = re.compile(r'\s+')
+# Improved sentence splitting - more conservative
 SENTENCE_PATTERN = re.compile(r'(?<=[.!?।॥])\s+(?=[A-ZА-ЯА-Я\u0B80-\u0BFF\u0900-\u097F])')
+# Avoid splitting on commas in numbers
+SUB_PATTERN = re.compile(r'(?<!\d),(?!\d)\s*')
+# Cache for chunking results
+_chunking_cache = {}
+def clean_text_for_tts(text: str) -> str:
+    """Cleans text before TTS with proper Unicode handling."""
     if not text:
         return ""
     text = str(text).strip()
     text = html.unescape(text)
+    # Remove URLs
     text = URL_PATTERN.sub('', text)
+    # Remove HTML/XML tags but preserve content
     text = TAG_PATTERN.sub('', text)
+    # Remove brackets
     text = BRACKET_PATTERN.sub('', text)
+    # Remove special characters but preserve punctuation needed for TTS
     text = SPECIAL_CHAR_PATTERN.sub('', text)
+    # Replace newlines/tabs with spaces
+    text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
+    # Use NFC normalization to preserve Tamil/Indic characters
     text = unicodedata.normalize('NFC', text)
+    # Collapse multiple whitespace
     text = WHITESPACE_PATTERN.sub(' ', text)
     return text.strip()
+def _protect_special_patterns(text: str) -> str:
+    """Protect numbers with commas and abbreviations from being split."""
+    # Protect numbers with commas: 1,234 -> 1<<COMMA>>234
+    text = re.sub(r'(\d),(\d)', r'\1<<COMMA>>\2', text)
+    # Protect common abbreviations
+    abbreviations = ['Dr', 'Mr', 'Mrs', 'Ms', 'Prof', 'Sr', 'Jr', 'St', 'etc', 'vs', 'approx', 'no']
+    for abbr in abbreviations:
+        text = re.sub(rf'\b{abbr}\.(\s|$)', rf'{abbr}<<DOT>>\1', text, flags=re.IGNORECASE)
+    # Protect currency symbols with numbers: $1,234.50 -> <<CURR>>1<<COMMA>>234<<DOT>>50
+    text = re.sub(r'([$€£¥])(\d[\d,.]*\d)', r'<<CURR>>\2', text)
+    return text
+def _restore_special_patterns(text: str) -> str:
+    """Restore protected patterns."""
+    text = text.replace('<<COMMA>>', ',')
+    text = text.replace('<<DOT>>', '.')
+    text = text.replace('<<CURR>>', '$')
+    return text
+def smart_text_chunking(text: str, max_chars: int = 250) -> Tuple[str, ...]:
+    """
+    Deterministic text chunking with overlap and pattern protection.
+    Returns the same chunks for the same input always.
+    """
+    if not text:
+        return tuple()
+    # Create cache key
+    cache_key = hashlib.md5(f"{text}_{max_chars}".encode()).hexdigest()
+    if cache_key in _chunking_cache:
+        return _chunking_cache[cache_key]
+    cleaned = clean_text_for_tts(text)
+    if not cleaned:
+        return tuple()
+    # Protect special patterns before splitting
+    protected = _protect_special_patterns(cleaned)
+    # Initial sentence splitting
+    sentences = []
+    for sentence in SENTENCE_PATTERN.split(protected):
+        sentence = sentence.strip()
+        if sentence:
+            sentences.append(sentence)
+    chunks = []
+    current_chunk = ""
+    overlap_words = []
+    for sentence in sentences:
+        sentence = sentence.strip()
+        if not sentence:
+            continue
+        # Try adding sentence to current chunk
+        test_chunk = f"{current_chunk} {sentence}" if current_chunk else sentence
+        test_chunk = test_chunk.strip()
+        if len(test_chunk) <= max_chars:
+            current_chunk = test_chunk
+        else:
+            # Need to split current sentence
+            if current_chunk:
+                # Add overlap from previous chunk
+                if overlap_words:
+                    overlap_text = " ".join(overlap_words)
+                    current_chunk = f"{overlap_text} {current_chunk}"
+                    overlap_words = []
+                chunks.append(current_chunk)
+            # If sentence itself is too long, split by words
+            if len(sentence) > max_chars:
+                words = sentence.split()
+                temp_chunk = ""
+                for word in words:
+                    test = f"{temp_chunk} {word}" if temp_chunk else word
+                    if len(test) <= max_chars:
+                        temp_chunk = test
+                    else:
+                        if temp_chunk:
+                            # Save last 5 words for overlap
+                            last_words = temp_chunk.split()[-5:]
+                            overlap_words = last_words.copy()
+                            chunks.append(temp_chunk)
+                        temp_chunk = word
+                if temp_chunk:
+                    current_chunk = temp_chunk
+            else:
+                current_chunk = sentence
+    # Add final chunk
+    if current_chunk:
+        if overlap_words:
+            overlap_text = " ".join(overlap_words)
+            current_chunk = f"{overlap_text} {current_chunk}"
+        chunks.append(current_chunk)
+    # Restore protected patterns and filter empty chunks
+    result_chunks = []
+    for chunk in chunks:
+        restored = _restore_special_patterns(chunk)
+        if restored.strip():
+            result_chunks.append(restored)
+    result = tuple(result_chunks)
+    _chunking_cache[cache_key] = result
+    return result
+async def generate_safe_audio(text: str, voice: str, semaphore: asyncio.Semaphore,
+                             chunk_index: int) -> Tuple[Optional[str], int]:
+    """Generate audio with rate limiting, caching, retry logic, and order preservation."""
+    if not text or len(text) < 2:
+        return None, chunk_index
     # Create deterministic cache key
+    text_hash = hashlib.md5(f"{text}_{voice}".encode()).hexdigest()
     cache_filename = os.path.join(AUDIO_DIR, f"cache_{text_hash}.mp3")
+    # Check disk cache
     if os.path.exists(cache_filename) and os.path.getsize(cache_filename) > 1024:
         return cache_filename, chunk_index
+    async with semaphore:
         max_retries = 3
         base_delay = 2.0
         for attempt in range(max_retries):
             try:
+                # Create temp file for generation
+                with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp:
+                    temp_filename = tmp.name
+                comm = edge_tts.Communicate(text, voice=voice)
+                await comm.save(temp_filename)
+                # Verify successful generation
+                if os.path.exists(temp_filename) and os.path.getsize(temp_filename) > 1024:
+                    # Move to cache location
+                    os.replace(temp_filename, cache_filename)
                     return cache_filename, chunk_index
+                else:
+                    # Clean up temp file
+                    try:
+                        if os.path.exists(temp_filename):
+                            os.unlink(temp_filename)
+                    except:
+                        pass
             except Exception as e:
+                # Clean up temp file on error
+                try:
+                    if os.path.exists(temp_filename):
+                        os.unlink(temp_filename)
+                except:
+                    pass
                 if attempt == max_retries - 1:
                     print(f"Failed to generate audio chunk {chunk_index} after {max_retries} attempts: {e}")
                     return None, chunk_index
+                # Exponential backoff with jitter
                 sleep_time = (base_delay * (2 ** attempt)) + random.uniform(0.1, 1.0)
                 print(f"Rate limit hit on chunk {chunk_index}. Retrying in {sleep_time:.2f}s...")
                 await asyncio.sleep(sleep_time)
         return None, chunk_index
+def process_audio_segment_fast(audio_data: Tuple[str, int]) -> Tuple[Optional[AudioSegment], int]:
+    """Process audio segment with proper cleanup and order preservation."""
     audio_file, chunk_index = audio_data
     try:
             try:
                 segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
             except:
+                pass
         return segment, chunk_index
     except Exception as e:
         print(f"Warning: Error processing audio segment {chunk_index}: {e}")
         return None, chunk_index
+    finally:
+        # Note: We don't delete cache files as they're reused
+        pass
+async def bilingual_tts_optimized(text: str, output_file: str = "audio0.mp3",
+                                  VOICE_TA: Optional[str] = None, max_concurrent: int = 5) -> Optional[str]:
+    """Optimized bilingual TTS with parallel processing and order preservation."""
     print("Starting optimized bilingual TTS processing...")
     try:
+        # Get chunks deterministically
         chunks = smart_text_chunking(text, max_chars=250)
         if not chunks:
             print("Error: No valid text chunks after cleaning")
         print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
+        # Detect language once for entire text
         is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
+        has_tamil_chars = any('\u0B80' <= char <= '\u0BFF' for char in text)
+        # Choose default voice
+        default_voice = VOICE_TA if (is_bilingual_tamil and has_tamil_chars) else (VOICE_TA or VOICE_EN)
+        # Semaphore for rate limiting
         semaphore = asyncio.Semaphore(max_concurrent)
+        # Prepare tasks with indices
         tasks = []
         for i, chunk in enumerate(chunks):
+            # Use Tamil voice only if chunk contains Tamil characters AND we have Tamil voice
+            if is_bilingual_tamil and any('\u0B80' <= char <= '\u0BFF' for char in chunk):
+                voice = VOICE_TA
+            else:
+                voice = default_voice
             tasks.append(generate_safe_audio(chunk, voice, semaphore, i))
         # Generate all audio files concurrently
+        results = await asyncio.gather(*tasks, return_exceptions=False)
+        # Filter successful results and maintain order
         audio_data = []
         for result in results:
             if isinstance(result, tuple) and result[0] and os.path.exists(result[0]):
                 audio_data.append(result)
+            elif result is not None:
+                print(f"Warning: Got unexpected result type: {type(result)}")
         if not audio_data:
             print("Error: No audio was successfully generated")
         print(f"Successfully generated {len(audio_data)}/{len(chunks)} audio segments")
+        # Process audio segments in parallel
         with ThreadPoolExecutor(max_workers=min(len(audio_data), 8)) as executor:
             processed = list(executor.map(process_audio_segment_fast, audio_data))
         print(f"Processed {len(audio_segments)} segments in correct order")
+        # Merge audio segments with smooth transitions
         print("Merging audio segments...")
         merged_audio = audio_segments[0]
+        pause = AudioSegment.silent(duration=150)  # Shorter pause for smoother flow
         for segment in audio_segments[1:]:
             merged_audio += pause + segment
+        # Apply final processing
         print("Applying final audio processing...")
+        try:
+            merged_audio = merged_audio.compress_dynamic_range(
+                threshold=-20.0,
+                ratio=4.0,
+                attack=5.0,
+                release=50.0
+            )
+        except:
+            pass  # Skip if compression fails
         merged_audio = normalize(merged_audio)
         # Export with high quality
         merged_audio.export(output_file, format="mp3", bitrate="192k")
+        # Verify output
+        if os.path.exists(output_file) and os.path.getsize(output_file) > 1024:
+            print(f"✅ Audio successfully generated: {output_file}")
+            return output_file
+        else:
+            print(f"Error: Generated file is empty or missing: {output_file}")
+            return None
     except Exception as main_error:
         print(f"Main error in bilingual TTS: {main_error}")
         traceback.print_exc()
         return None
+async def generate_tts_optimized(id: int, lines, lang: str) -> Tuple[Optional[float], Optional[str]]:
     """Optimized TTS generation function."""
     voice = {
         "English": "en-US-JennyNeural",
     return None, None
+def audio_func(id: int, lines, lang: str) -> Tuple[Optional[float], Optional[str]]:
     """Synchronous wrapper for audio generation."""
     try:
         loop = asyncio.new_event_loop()
         return None, None
 def create_manim_script(problem_data, script_path, audio_path, scale=1):
     """Generate Manim script from problem data with robust wrapping."""