Spaces:

sreepathi-ravikumar
/

backendprocessmath

Sleeping

App Files Files Community

sreepathi-ravikumar commited on Dec 5, 2025

Commit

d59be26

verified ·

1 Parent(s): e50ef81

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -18

app.py CHANGED Viewed

@@ -35,6 +35,7 @@ os.makedirs(AUDIO_DIR, exist_ok=True)
 API_KEY = "rkmentormindzofficaltokenkey12345"
 import os
 import re
 import html
@@ -55,7 +56,9 @@ from mutagen.mp3 import MP3
 # Voice configuration
 VOICE_EN = "en-IN-NeerjaNeural"
 # Pre-compiled regex patterns for speed
 URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
@@ -63,8 +66,10 @@ TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
 BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
 SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
 WHITESPACE_PATTERN = re.compile(r'\s+')
-SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
-SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
 @lru_cache(maxsize=1024)
@@ -129,12 +134,13 @@ async def generate_safe_audio(text, voice, semaphore):
 @lru_cache(maxsize=256)
-def smart_text_chunking(text, max_chars=200):
-    """Cached text chunking with larger chunk size to reduce requests."""
     text = clean_text_for_tts(text)
     if not text:
         return tuple()
     sentences = SENTENCE_PATTERN.split(text)
     chunks = []
@@ -143,30 +149,46 @@ def smart_text_chunking(text, max_chars=200):
         if not sentence:
             continue
         if len(sentence) <= max_chars:
             chunks.append(sentence)
         else:
             sub_parts = SUB_PATTERN.split(sentence)
             for part in sub_parts:
                 part = part.strip()
                 if not part:
                     continue
-                if len(part) <= max_chars:
-                    chunks.append(part)
                 else:
-                    words = part.split()
-                    current_chunk = ""
-                    for word in words:
-                        test_chunk = f"{current_chunk} {word}" if current_chunk else word
-                        if len(test_chunk) <= max_chars:
-                            current_chunk = test_chunk
-                        else:
-                            if current_chunk:
-                                chunks.append(current_chunk.strip())
-                            current_chunk = word
                     if current_chunk:
                         chunks.append(current_chunk.strip())
     return tuple(chunk for chunk in chunks if chunk.strip())
@@ -242,7 +264,7 @@ async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None,
         print("Merging audio segments...")
         merged_audio = audio_segments[0]
-        pause = AudioSegment.silent(duration=200)
         for segment in audio_segments[1:]:
             merged_audio += pause + segment
@@ -346,6 +368,7 @@ def audio_func(id, lines, lang):
 def create_manim_script(problem_data, script_path, audio_path, scale=1):
     """Generate Manim script from problem data with robust wrapping."""

 API_KEY = "rkmentormindzofficaltokenkey12345"
 import os
 import re
 import html
 # Voice configuration
 VOICE_EN = "en-IN-NeerjaNeural"
+# Directory paths - ensure they exist
+AUDIO_DIR = os.path.join(os.getcwd(), "audio")
+os.makedirs(AUDIO_DIR, exist_ok=True)
 # Pre-compiled regex patterns for speed
 URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
 BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
 SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
 WHITESPACE_PATTERN = re.compile(r'\s+')
+# More conservative sentence splitting - only on major punctuation with space
+SENTENCE_PATTERN = re.compile(r'(?<=[.!?।॥])\s+')
+# More conservative sub-splitting - avoid splitting on hyphens and preserve word boundaries
+SUB_PATTERN = re.compile(r'(?<=[,;])\s+')
 @lru_cache(maxsize=1024)
 @lru_cache(maxsize=256)
+def smart_text_chunking(text, max_chars=300):
+    """Cached text chunking with larger chunks and better preservation of word order."""
     text = clean_text_for_tts(text)
     if not text:
         return tuple()
+    # First try to split on major sentence boundaries
     sentences = SENTENCE_PATTERN.split(text)
     chunks = []
         if not sentence:
             continue
+        # If sentence fits, keep it whole
         if len(sentence) <= max_chars:
             chunks.append(sentence)
         else:
+            # Try splitting on commas/semicolons but preserve larger context
             sub_parts = SUB_PATTERN.split(sentence)
+            current_chunk = ""
             for part in sub_parts:
                 part = part.strip()
                 if not part:
                     continue
+                test_chunk = f"{current_chunk}, {part}" if current_chunk else part
+                if len(test_chunk) <= max_chars:
+                    current_chunk = test_chunk
                 else:
                     if current_chunk:
                         chunks.append(current_chunk.strip())
+                    # If single part is too long, split by words carefully
+                    if len(part) > max_chars:
+                        words = part.split()
+                        word_chunk = ""
+                        for word in words:
+                            test_word_chunk = f"{word_chunk} {word}" if word_chunk else word
+                            if len(test_word_chunk) <= max_chars:
+                                word_chunk = test_word_chunk
+                            else:
+                                if word_chunk:
+                                    chunks.append(word_chunk.strip())
+                                word_chunk = word
+                        if word_chunk:
+                            current_chunk = word_chunk
+                    else:
+                        current_chunk = part
+            if current_chunk:
+                chunks.append(current_chunk.strip())
     return tuple(chunk for chunk in chunks if chunk.strip())
         print("Merging audio segments...")
         merged_audio = audio_segments[0]
+        pause = AudioSegment.silent(duration=150)
         for segment in audio_segments[1:]:
             merged_audio += pause + segment
 def create_manim_script(problem_data, script_path, audio_path, scale=1):
     """Generate Manim script from problem data with robust wrapping."""