Spaces:

sreepathi-ravikumar
/

backendprocessmath

Sleeping

App Files Files Community

sreepathi-ravikumar commited on 27 days ago

Commit

1bdfdde

verified ·

1 Parent(s): 531192c

Update app.py

Browse files

Files changed (1) hide show

app.py +135 -85

app.py CHANGED Viewed

@@ -34,9 +34,30 @@ os.makedirs(AUDIO_DIR, exist_ok=True)
 # API Key for security (optional)
 API_KEY = "rkmentormindzofficaltokenkey12345"
 VOICE_EN = "en-IN-NeerjaNeural"
-# Pre-compiled regex patterns for speed (compiled once, reused many times)
 URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
 TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
 BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
@@ -45,65 +66,83 @@ WHITESPACE_PATTERN = re.compile(r'\s+')
 SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
 SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
-@lru_cache(maxsize=1024)  # Cache cleaned text to avoid re-processing
 def clean_text_for_tts(text):
     """Cleans text before TTS with optimized regex and caching."""
     if not text:
         return ""
     text = str(text).strip()
     text = html.unescape(text)
-    # Use pre-compiled patterns (much faster)
     text = URL_PATTERN.sub('', text)
     text = TAG_PATTERN.sub('', text)
     text = BRACKET_PATTERN.sub('', text)
     text = SPECIAL_CHAR_PATTERN.sub('', text)
     text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
-    # Batch remove keywords (faster than multiple re.sub calls)
     for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
         text = text.replace(keyword, '').replace(keyword.upper(), '')
     text = unicodedata.normalize('NFKD', text)
     text = WHITESPACE_PATTERN.sub(' ', text)
     return text.strip()
 async def generate_safe_audio(text, voice, semaphore):
-    """Generate clean audio with rate limiting."""
-    async with semaphore:  # Limit concurrent TTS requests
         cleaned_text = clean_text_for_tts(text)
         if not cleaned_text:
             return None
-        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
-        fname = temp_file.name
-        temp_file.close()
-        try:
-            comm = edge_tts.Communicate(cleaned_text, voice=voice)
-            await comm.save(fname)
-            return fname
-        except Exception as e:
-            print(f"Error generating audio: {e}")
-            if os.path.exists(fname):
-                os.unlink(fname)
-            return None
 @lru_cache(maxsize=256)
-def smart_text_chunking(text, max_chars=80):
-    """Cached text chunking for speed."""
     text = clean_text_for_tts(text)
     if not text:
-        return tuple()  # Return tuple for hashability (required by lru_cache)
     sentences = SENTENCE_PATTERN.split(text)
     chunks = []
     for sentence in sentences:
         sentence = sentence.strip()
         if not sentence:
             continue
         if len(sentence) <= max_chars:
             chunks.append(sentence)
         else:
@@ -112,7 +151,7 @@ def smart_text_chunking(text, max_chars=80):
                 part = part.strip()
                 if not part:
                     continue
                 if len(part) <= max_chars:
                     chunks.append(part)
                 else:
@@ -128,111 +167,108 @@ def smart_text_chunking(text, max_chars=80):
                             current_chunk = word
                     if current_chunk:
                         chunks.append(current_chunk.strip())
     return tuple(chunk for chunk in chunks if chunk.strip())
 def process_audio_segment_fast(audio_file):
     """Fast audio processing in separate thread."""
     try:
         segment = AudioSegment.from_file(audio_file)
         segment = normalize(segment)
-        # Only strip silence for longer segments
         if len(segment) > 200:
             try:
                 segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
-            except:
-                pass  # Skip if fails
         return segment
     except Exception as e:
         print(f"Warning: Error processing audio segment: {e}")
         return None
     finally:
-        # Cleanup temp file immediately
         try:
-            if os.path.exists(audio_file):
                 os.unlink(audio_file)
-        except:
             pass
-async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=10):
-    """Ultra-optimized bilingual TTS with parallel processing."""
     print("Starting optimized bilingual TTS processing...")
     try:
         chunks = smart_text_chunking(text)
         if not chunks:
             print("Error: No valid text chunks after cleaning")
             return None
         print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
         is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
-        # Semaphore to limit concurrent TTS requests (prevents rate limiting)
         semaphore = asyncio.Semaphore(max_concurrent)
-        # Prepare all tasks
         tasks = []
         for i, chunk in enumerate(chunks):
             is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
             voice = VOICE_TA if (is_bilingual_tamil and is_tamil) else (VOICE_TA or VOICE_EN)
             tasks.append(generate_safe_audio(chunk, voice, semaphore))
-        # Generate all audio files concurrently
         audio_files = await asyncio.gather(*tasks, return_exceptions=True)
-        # Filter successful files
-        processed_audio_files = [f for f in audio_files if isinstance(f, str) and f]
         if not processed_audio_files:
             print("Error: No audio was successfully generated")
             return None
         print(f"Successfully generated {len(processed_audio_files)} audio segments")
-        # Process audio segments in parallel using ThreadPoolExecutor
         with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
             audio_segments = list(executor.map(process_audio_segment_fast, processed_audio_files))
-        # Filter out None segments
         audio_segments = [seg for seg in audio_segments if seg is not None]
         if not audio_segments:
             print("Error: No audio segments were successfully processed")
             return None
-        # Merge audio segments (fast concatenation)
         print("Merging audio segments...")
         merged_audio = audio_segments[0]
         pause = AudioSegment.silent(duration=200)
         for segment in audio_segments[1:]:
             merged_audio += pause + segment
-        # Apply final processing (compression and normalization)
         print("Applying final audio processing...")
         merged_audio = merged_audio.compress_dynamic_range(
-            threshold=-20.0,
-            ratio=4.0,
-            attack=5.0,
             release=50.0
         )
         merged_audio = normalize(merged_audio)
-        # Export with high quality
         merged_audio.export(output_file, format="mp3", bitrate="192k")
         print(f"✅ Audio successfully generated: {output_file}")
         return output_file
     except Exception as main_error:
         print(f"Main error in bilingual TTS: {main_error}")
         return None
 async def generate_tts_optimized(id, lines, lang):
-    """Optimized TTS generation function."""
     voice = {
         "English": "en-US-JennyNeural",
         "Tamil": "ta-IN-PallaviNeural",
@@ -267,33 +303,47 @@ async def generate_tts_optimized(id, lines, lang):
         "Czech": "cs-CZ-VlastaNeural",
         "Hungarian": "hu-HU-NoemiNeural"
     }
     audio_name = f"audio{id}.mp3"
     audio_path = os.path.join(AUDIO_DIR, audio_name)
     if "&&&" in lang:
         listf = lang.split("&&&")
         text = listf[0].strip()
-        lang_name = listf[1].strip()
         voice_to_use = voice.get(lang_name, VOICE_EN)
     else:
-        text = lines[id]
         voice_to_use = voice.get(lang, VOICE_EN)
-    # Increase max_concurrent for more speed (adjust based on your system)
-    output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=15)
     if output and os.path.exists(audio_path):
-        audio = MP3(audio_path)
-        duration = audio.info.length
-        return duration, audio_path
     return None, None
 def audio_func(id, lines, lang):
     """Synchronous wrapper for audio generation."""
-    return asyncio.run(generate_tts_optimized(id, lines, lang))
 def create_manim_script(problem_data, script_path, audio_path, scale=1):

 # API Key for security (optional)
 API_KEY = "rkmentormindzofficaltokenkey12345"
+import os
+import re
+import html
+import unicodedata
+import asyncio
+import tempfile
+import traceback
+import random
+import hashlib
+from concurrent.futures import ThreadPoolExecutor
+from functools import lru_cache
+import edge_tts
+from pydub import AudioSegment
+from pydub.effects import normalize
+from mutagen.mp3 import MP3
+# Voice configuration
 VOICE_EN = "en-IN-NeerjaNeural"
+# Pre-compiled regex patterns for speed
 URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
 TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
 BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
 SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
 SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
+@lru_cache(maxsize=1024)
 def clean_text_for_tts(text):
     """Cleans text before TTS with optimized regex and caching."""
     if not text:
         return ""
     text = str(text).strip()
     text = html.unescape(text)
     text = URL_PATTERN.sub('', text)
     text = TAG_PATTERN.sub('', text)
     text = BRACKET_PATTERN.sub('', text)
     text = SPECIAL_CHAR_PATTERN.sub('', text)
     text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
     for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
         text = text.replace(keyword, '').replace(keyword.upper(), '')
     text = unicodedata.normalize('NFKD', text)
     text = WHITESPACE_PATTERN.sub(' ', text)
     return text.strip()
 async def generate_safe_audio(text, voice, semaphore):
+    """Generate audio with robust retries, caching, and exponential backoff."""
+    # Create a deterministic filename based on content (Disk Caching)
+    text_hash = hashlib.md5(f"{text}_{voice}".encode('utf-8')).hexdigest()
+    cache_filename = os.path.join(AUDIO_DIR, f"cache_{text_hash}.mp3")
+    if os.path.exists(cache_filename) and os.path.getsize(cache_filename) > 0:
+        return cache_filename
+    async with semaphore:
         cleaned_text = clean_text_for_tts(text)
         if not cleaned_text:
             return None
+        # Retry configuration
+        max_retries = 3
+        base_delay = 2.0
+        for attempt in range(max_retries):
+            try:
+                comm = edge_tts.Communicate(cleaned_text, voice=voice)
+                await comm.save(cache_filename)
+                if os.path.exists(cache_filename) and os.path.getsize(cache_filename) > 0:
+                    return cache_filename
+            except Exception as e:
+                if attempt == max_retries - 1:
+                    print(f"Failed to generate audio after {max_retries} attempts: {e}")
+                    return None
+                # Exponential backoff with jitter
+                sleep_time = (base_delay * (2 ** attempt)) + random.uniform(0.1, 1.0)
+                print(f"Rate limit/Error hit. Retrying in {sleep_time:.2f}s...")
+                await asyncio.sleep(sleep_time)
+        return None
 @lru_cache(maxsize=256)
+def smart_text_chunking(text, max_chars=200):
+    """Cached text chunking with larger chunk size to reduce requests."""
     text = clean_text_for_tts(text)
     if not text:
+        return tuple()
     sentences = SENTENCE_PATTERN.split(text)
     chunks = []
     for sentence in sentences:
         sentence = sentence.strip()
         if not sentence:
             continue
         if len(sentence) <= max_chars:
             chunks.append(sentence)
         else:
                 part = part.strip()
                 if not part:
                     continue
                 if len(part) <= max_chars:
                     chunks.append(part)
                 else:
                             current_chunk = word
                     if current_chunk:
                         chunks.append(current_chunk.strip())
     return tuple(chunk for chunk in chunks if chunk.strip())
 def process_audio_segment_fast(audio_file):
     """Fast audio processing in separate thread."""
+    segment = None
     try:
+        if not audio_file or not os.path.exists(audio_file):
+            return None
         segment = AudioSegment.from_file(audio_file)
         segment = normalize(segment)
         if len(segment) > 200:
             try:
                 segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
+            except Exception:
+                pass
         return segment
     except Exception as e:
         print(f"Warning: Error processing audio segment: {e}")
         return None
     finally:
         try:
+            if audio_file and os.path.exists(audio_file):
                 os.unlink(audio_file)
+        except Exception:
             pass
+async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=5):
+    """Ultra-optimized bilingual TTS with parallel processing and reduced concurrency."""
     print("Starting optimized bilingual TTS processing...")
     try:
         chunks = smart_text_chunking(text)
         if not chunks:
             print("Error: No valid text chunks after cleaning")
             return None
         print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
         is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
         semaphore = asyncio.Semaphore(max_concurrent)
         tasks = []
         for i, chunk in enumerate(chunks):
             is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
             voice = VOICE_TA if (is_bilingual_tamil and is_tamil) else (VOICE_TA or VOICE_EN)
             tasks.append(generate_safe_audio(chunk, voice, semaphore))
         audio_files = await asyncio.gather(*tasks, return_exceptions=True)
+        processed_audio_files = [f for f in audio_files if isinstance(f, str) and f and os.path.exists(f)]
         if not processed_audio_files:
             print("Error: No audio was successfully generated")
             return None
         print(f"Successfully generated {len(processed_audio_files)} audio segments")
         with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
             audio_segments = list(executor.map(process_audio_segment_fast, processed_audio_files))
         audio_segments = [seg for seg in audio_segments if seg is not None]
         if not audio_segments:
             print("Error: No audio segments were successfully processed")
             return None
         print("Merging audio segments...")
         merged_audio = audio_segments[0]
         pause = AudioSegment.silent(duration=200)
         for segment in audio_segments[1:]:
             merged_audio += pause + segment
         print("Applying final audio processing...")
         merged_audio = merged_audio.compress_dynamic_range(
+            threshold=-20.0,
+            ratio=4.0,
+            attack=5.0,
             release=50.0
         )
         merged_audio = normalize(merged_audio)
         merged_audio.export(output_file, format="mp3", bitrate="192k")
         print(f"✅ Audio successfully generated: {output_file}")
         return output_file
     except Exception as main_error:
         print(f"Main error in bilingual TTS: {main_error}")
+        traceback.print_exc()
         return None
 async def generate_tts_optimized(id, lines, lang):
+    """Optimized TTS generation function with reduced concurrency."""
     voice = {
         "English": "en-US-JennyNeural",
         "Tamil": "ta-IN-PallaviNeural",
         "Czech": "cs-CZ-VlastaNeural",
         "Hungarian": "hu-HU-NoemiNeural"
     }
     audio_name = f"audio{id}.mp3"
     audio_path = os.path.join(AUDIO_DIR, audio_name)
     if "&&&" in lang:
         listf = lang.split("&&&")
         text = listf[0].strip()
+        lang_name = listf[1].strip() if len(listf) > 1 else "English"
         voice_to_use = voice.get(lang_name, VOICE_EN)
     else:
+        text = lines[id] if isinstance(lines, (list, tuple)) and id < len(lines) else str(lines)
         voice_to_use = voice.get(lang, VOICE_EN)
+    output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=5)
     if output and os.path.exists(audio_path):
+        try:
+            audio = MP3(audio_path)
+            duration = audio.info.length
+            return duration, audio_path
+        except Exception as e:
+            print(f"Error reading audio file: {e}")
+            return None, None
     return None, None
 def audio_func(id, lines, lang):
     """Synchronous wrapper for audio generation."""
+    try:
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        try:
+            return loop.run_until_complete(generate_tts_optimized(id, lines, lang))
+        finally:
+            loop.close()
+    except Exception as e:
+        print(f"Error in audio_func: {e}")
+        traceback.print_exc()
+        return None, None
+```
 def create_manim_script(problem_data, script_path, audio_path, scale=1):