Spaces:

sreepathi-ravikumar
/

backendprocessmath

Sleeping

App Files Files Community

sreepathi-ravikumar commited on Dec 5, 2025

Commit

5567945

verified ·

1 Parent(s): b61fb9d

Update app.py

Browse files

Files changed (1) hide show

app.py +317 -3

app.py CHANGED Viewed

@@ -25,7 +25,307 @@ os.makedirs(TEMP_DIR, exist_ok=True)
 # API Key for security (optional)
 API_KEY = "rkmentormindzofficaltokenkey12345"
 def make_wrapped_paragraph(content, max_width, color, font, font_size, line_spacing, align_left=True):
     """
@@ -63,7 +363,7 @@ def make_wrapped_paragraph(content, max_width, color, font, font_size, line_spac
         para = para.align_to(LEFT)
     return para.strip()
-def create_manim_script(problem_data, script_path):
     """Generate Manim script from problem data with robust wrapping for title, text, and equations."""
     # Defaults
@@ -93,6 +393,7 @@ import textwrap
 class GeneratedMathScene(Scene):
     def construct(self):
         # Scene settings
         self.camera.background_color = "{settings.get('background_color', '#0f0f23')}"
         default_color = {settings.get('text_color', 'WHITE')}
         highlight_color = {settings.get('highlight_color', 'YELLOW')}
@@ -144,7 +445,8 @@ class GeneratedMathScene(Scene):
             obj = None
             content = slide.get("content", "")
             animation = slide.get("animation", "write_left")
-            duration = slide.get("duration", 1.0)
             slide_type = slide.get("type", "text")
             if slide_type == "title":
@@ -246,7 +548,10 @@ def generate_video():
         cleaned = re.sub(r'(\d)\s*\.\s*(\d)', r'\1.\2', lst[0])
         nlist = ast.literal_eval(cleaned)
         datalst=[]
         for line in range(len(nlist)):
           datalst.append({
                             "type": nlist[line][0].strip(),
                             "content": nlist[line][1].strip(),
@@ -265,6 +570,15 @@ def generate_video():
         "title_size": 48
     },
     "slides":datalst}
         # Now proceed with video generation using 'data'
         print(json.dumps(data, indent=2))  # For debugging
         # ✅ Final validation
@@ -286,7 +600,7 @@ def generate_video():
         # Generate Manim script
         script_path = os.path.join(temp_work_dir, "scene.py")
-        create_manim_script(data, script_path)
         print(f"Created Manim script at {script_path}")
         # Render video using subprocess

 # API Key for security (optional)
 API_KEY = "rkmentormindzofficaltokenkey12345"
+def extract_english_paragraphs(text):
+    """
+    Extract paragraphs that contain only English text
+    """
+    paragraphs = text.split('\n\n')
+    english_paragraphs = [paragraphs[0]]
+    #for para in paragraphs:
+        # Check if the paragraph contains only English characters
+        #if not re.search(r'[^\x00-\x7F]', para):
+            #english_paragraphs.append(para.strip())
+    return '\n\n'.join(english_paragraphs)
+def extract_native_text(text):
+    paragraphs = text.split('\n\n')
+    nativelang_paragraphs = paragraphs[1]
+    #pattern = r'[^\x00-\x7F]'
+    # Search for the first non-English character
+    #match = re.search(pattern, text)
+    #if match:
+        # Return everything from the first non-English character
+        #return text[match.start():]
+    #else:
+        # If no non-English characters found, return empty string
+    return nativelang_paragraphs
+import re
+import html
+import unicodedata
+import tempfile
+import os
+import asyncio
+from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
+from functools import lru_cache
+import edge_tts
+from pydub import AudioSegment
+from pydub.effects import normalize
+from mutagen.mp3 import MP3
+VOICE_EN = "en-IN-NeerjaNeural"
+# Pre-compiled regex patterns for speed (compiled once, reused many times)
+URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
+TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
+BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
+SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
+WHITESPACE_PATTERN = re.compile(r'\s+')
+SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
+SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
+@lru_cache(maxsize=1024)  # Cache cleaned text to avoid re-processing
+def clean_text_for_tts(text):
+    """Cleans text before TTS with optimized regex and caching."""
+    if not text:
+        return ""
+    text = str(text).strip()
+    text = html.unescape(text)
+    # Use pre-compiled patterns (much faster)
+    text = URL_PATTERN.sub('', text)
+    text = TAG_PATTERN.sub('', text)
+    text = BRACKET_PATTERN.sub('', text)
+    text = SPECIAL_CHAR_PATTERN.sub('', text)
+    text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
+    # Batch remove keywords (faster than multiple re.sub calls)
+    for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
+        text = text.replace(keyword, '').replace(keyword.upper(), '')
+    text = unicodedata.normalize('NFKD', text)
+    text = WHITESPACE_PATTERN.sub(' ', text)
+    return text.strip()
+async def generate_safe_audio(text, voice, semaphore):
+    """Generate clean audio with rate limiting."""
+    async with semaphore:  # Limit concurrent TTS requests
+        cleaned_text = clean_text_for_tts(text)
+        if not cleaned_text:
+            return None
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
+        fname = temp_file.name
+        temp_file.close()
+        try:
+            comm = edge_tts.Communicate(cleaned_text, voice=voice)
+            await comm.save(fname)
+            return fname
+        except Exception as e:
+            print(f"Error generating audio: {e}")
+            if os.path.exists(fname):
+                os.unlink(fname)
+            return None
+@lru_cache(maxsize=256)
+def smart_text_chunking(text, max_chars=80):
+    """Cached text chunking for speed."""
+    text = clean_text_for_tts(text)
+    if not text:
+        return tuple()  # Return tuple for hashability (required by lru_cache)
+    sentences = SENTENCE_PATTERN.split(text)
+    chunks = []
+    for sentence in sentences:
+        sentence = sentence.strip()
+        if not sentence:
+            continue
+        if len(sentence) <= max_chars:
+            chunks.append(sentence)
+        else:
+            sub_parts = SUB_PATTERN.split(sentence)
+            for part in sub_parts:
+                part = part.strip()
+                if not part:
+                    continue
+                if len(part) <= max_chars:
+                    chunks.append(part)
+                else:
+                    words = part.split()
+                    current_chunk = ""
+                    for word in words:
+                        test_chunk = f"{current_chunk} {word}" if current_chunk else word
+                        if len(test_chunk) <= max_chars:
+                            current_chunk = test_chunk
+                        else:
+                            if current_chunk:
+                                chunks.append(current_chunk.strip())
+                            current_chunk = word
+                    if current_chunk:
+                        chunks.append(current_chunk.strip())
+    return tuple(chunk for chunk in chunks if chunk.strip())
+def process_audio_segment_fast(audio_file):
+    """Fast audio processing in separate thread."""
+    try:
+        segment = AudioSegment.from_file(audio_file)
+        segment = normalize(segment)
+        # Only strip silence for longer segments
+        if len(segment) > 200:
+            try:
+                segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
+            except:
+                pass  # Skip if fails
+        return segment
+    except Exception as e:
+        print(f"Warning: Error processing audio segment: {e}")
+        return None
+    finally:
+        # Cleanup temp file immediately
+        try:
+            if os.path.exists(audio_file):
+                os.unlink(audio_file)
+        except:
+            pass
+async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=10):
+    """Ultra-optimized bilingual TTS with parallel processing."""
+    print("Starting optimized bilingual TTS processing...")
+    try:
+        chunks = smart_text_chunking(text)
+        if not chunks:
+            print("Error: No valid text chunks after cleaning")
+            return None
+        print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
+        is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
+        # Semaphore to limit concurrent TTS requests (prevents rate limiting)
+        semaphore = asyncio.Semaphore(max_concurrent)
+        # Prepare all tasks
+        tasks = []
+        for i, chunk in enumerate(chunks):
+            is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
+            voice = VOICE_TA if (is_bilingual_tamil and is_tamil) else (VOICE_TA or VOICE_EN)
+            tasks.append(generate_safe_audio(chunk, voice, semaphore))
+        # Generate all audio files concurrently
+        audio_files = await asyncio.gather(*tasks, return_exceptions=True)
+        # Filter successful files
+        processed_audio_files = [f for f in audio_files if isinstance(f, str) and f]
+        if not processed_audio_files:
+            print("Error: No audio was successfully generated")
+            return None
+        print(f"Successfully generated {len(processed_audio_files)} audio segments")
+        # Process audio segments in parallel using ThreadPoolExecutor
+        with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
+            audio_segments = list(executor.map(process_audio_segment_fast, processed_audio_files))
+        # Filter out None segments
+        audio_segments = [seg for seg in audio_segments if seg is not None]
+        if not audio_segments:
+            print("Error: No audio segments were successfully processed")
+            return None
+        # Merge audio segments (fast concatenation)
+        print("Merging audio segments...")
+        merged_audio = audio_segments[0]
+        pause = AudioSegment.silent(duration=200)
+        for segment in audio_segments[1:]:
+            merged_audio += pause + segment
+        # Apply final processing (compression and normalization)
+        print("Applying final audio processing...")
+        merged_audio = merged_audio.compress_dynamic_range(
+            threshold=-20.0,
+            ratio=4.0,
+            attack=5.0,
+            release=50.0
+        )
+        merged_audio = normalize(merged_audio)
+        # Export with high quality
+        merged_audio.export(output_file, format="mp3", bitrate="192k")
+        print(f"✅ Audio successfully generated: {output_file}")
+        return output_file
+    except Exception as main_error:
+        print(f"Main error in bilingual TTS: {main_error}")
+        return None
+async def generate_tts_optimized(id, lines, lang):
+    """Optimized TTS generation function."""
+    voice = {
+        "English": "en-US-JennyNeural",
+        "Tamil": "ta-IN-PallaviNeural",
+        "Hindi": "hi-IN-SwaraNeural",
+        "Malayalam": "ml-IN-SobhanaNeural",
+        "Kannada": "kn-IN-SapnaNeural",
+        "Telugu": "te-IN-ShrutiNeural",
+        "Bengali": "bn-IN-TanishaaNeural",
+        "Marathi": "mr-IN-AarohiNeural",
+        "Gujarati": "gu-IN-DhwaniNeural",
+        "Punjabi": "pa-IN-VaaniNeural",
+        "Urdu": "ur-IN-GulNeural",
+        "French": "fr-FR-DeniseNeural",
+        "German": "de-DE-KatjaNeural",
+        "Spanish": "es-ES-ElviraNeural",
+        "Italian": "it-IT-IsabellaNeural",
+        "Russian": "ru-RU-SvetlanaNeural",
+        "Japanese": "ja-JP-NanamiNeural",
+        "Korean": "ko-KR-SunHiNeural",
+        "Chinese": "zh-CN-XiaoxiaoNeural",
+        "Arabic": "ar-SA-ZariyahNeural",
+        "Portuguese": "pt-BR-FranciscaNeural",
+        "Dutch": "nl-NL-FennaNeural",
+        "Greek": "el-GR-AthinaNeural",
+        "Hebrew": "he-IL-HilaNeural",
+        "Turkish": "tr-TR-EmelNeural",
+        "Polish": "pl-PL-AgnieszkaNeural",
+        "Thai": "th-TH-AcharaNeural",
+        "Vietnamese": "vi-VN-HoaiMyNeural",
+        "Swedish": "sv-SE-SofieNeural",
+        "Finnish": "fi-FI-NooraNeural",
+        "Czech": "cs-CZ-VlastaNeural",
+        "Hungarian": "hu-HU-NoemiNeural"
+    }
+    audio_name = f"audio{id}.mp3"
+    audio_path = os.path.join(AUDIO_DIR, audio_name)
+    if "&&&" in lang:
+        listf = lang.split("&&&")
+        text = listf[0].strip()
+        lang_name = listf[1].strip()
+        voice_to_use = voice.get(lang_name, VOICE_EN)
+    else:
+        text = lines[id]
+        voice_to_use = voice.get(lang, VOICE_EN)
+    # Increase max_concurrent for more speed (adjust based on your system)
+    output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=15)
+    if output and os.path.exists(audio_path):
+        audio = MP3(audio_path)
+        duration = audio.info.length
+        return duration, audio_path
+    return None, None
+def audio_func(id, lines, lang):
+    """Synchronous wrapper for audio generation."""
+    return asyncio.run(generate_tts_optimized(id, lines, lang))
 def make_wrapped_paragraph(content, max_width, color, font, font_size, line_spacing, align_left=True):
     """
         para = para.align_to(LEFT)
     return para.strip()
+def create_manim_script(problem_data, script_path,audio_path,scale=1):
     """Generate Manim script from problem data with robust wrapping for title, text, and equations."""
     # Defaults
 class GeneratedMathScene(Scene):
     def construct(self):
         # Scene settings
+        self.add_sound({audio_path})
         self.camera.background_color = "{settings.get('background_color', '#0f0f23')}"
         default_color = {settings.get('text_color', 'WHITE')}
         highlight_color = {settings.get('highlight_color', 'YELLOW')}
             obj = None
             content = slide.get("content", "")
             animation = slide.get("animation", "write_left")
+            scalelen = slide.get("duration", 1.0)
+            duration=scalelen*{scale}
             slide_type = slide.get("type", "text")
             if slide_type == "title":
         cleaned = re.sub(r'(\d)\s*\.\s*(\d)', r'\1.\2', lst[0])
         nlist = ast.literal_eval(cleaned)
         datalst=[]
+        total=0
+        scale=1
         for line in range(len(nlist)):
+          total=total+float(nlist[line][3])
           datalst.append({
                             "type": nlist[line][0].strip(),
                             "content": nlist[line][1].strip(),
         "title_size": 48
     },
     "slides":datalst}
+        #audio generating code here
+        lines=extract_english_paragraphs(lst[1])
+        lang=extract_native_text(lst[1])
+        length, audio_path = audio_func(id, lines, lang)
+        if not duration or not audio_path:
+            print("Failed to generate audio.")
+        scale=total/length
         # Now proceed with video generation using 'data'
         print(json.dumps(data, indent=2))  # For debugging
         # ✅ Final validation
         # Generate Manim script
         script_path = os.path.join(temp_work_dir, "scene.py")
+        create_manim_script(data, script_path,audio_path,scale)
         print(f"Created Manim script at {script_path}")
         # Render video using subprocess