sreepathi-ravikumar commited on
Commit
5567945
·
verified ·
1 Parent(s): b61fb9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +317 -3
app.py CHANGED
@@ -25,7 +25,307 @@ os.makedirs(TEMP_DIR, exist_ok=True)
25
  # API Key for security (optional)
26
  API_KEY = "rkmentormindzofficaltokenkey12345"
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  def make_wrapped_paragraph(content, max_width, color, font, font_size, line_spacing, align_left=True):
31
  """
@@ -63,7 +363,7 @@ def make_wrapped_paragraph(content, max_width, color, font, font_size, line_spac
63
  para = para.align_to(LEFT)
64
  return para.strip()
65
 
66
- def create_manim_script(problem_data, script_path):
67
  """Generate Manim script from problem data with robust wrapping for title, text, and equations."""
68
 
69
  # Defaults
@@ -93,6 +393,7 @@ import textwrap
93
  class GeneratedMathScene(Scene):
94
  def construct(self):
95
  # Scene settings
 
96
  self.camera.background_color = "{settings.get('background_color', '#0f0f23')}"
97
  default_color = {settings.get('text_color', 'WHITE')}
98
  highlight_color = {settings.get('highlight_color', 'YELLOW')}
@@ -144,7 +445,8 @@ class GeneratedMathScene(Scene):
144
  obj = None
145
  content = slide.get("content", "")
146
  animation = slide.get("animation", "write_left")
147
- duration = slide.get("duration", 1.0)
 
148
  slide_type = slide.get("type", "text")
149
 
150
  if slide_type == "title":
@@ -246,7 +548,10 @@ def generate_video():
246
  cleaned = re.sub(r'(\d)\s*\.\s*(\d)', r'\1.\2', lst[0])
247
  nlist = ast.literal_eval(cleaned)
248
  datalst=[]
 
 
249
  for line in range(len(nlist)):
 
250
  datalst.append({
251
  "type": nlist[line][0].strip(),
252
  "content": nlist[line][1].strip(),
@@ -265,6 +570,15 @@ def generate_video():
265
  "title_size": 48
266
  },
267
  "slides":datalst}
 
 
 
 
 
 
 
 
 
268
  # Now proceed with video generation using 'data'
269
  print(json.dumps(data, indent=2)) # For debugging
270
  # ✅ Final validation
@@ -286,7 +600,7 @@ def generate_video():
286
 
287
  # Generate Manim script
288
  script_path = os.path.join(temp_work_dir, "scene.py")
289
- create_manim_script(data, script_path)
290
  print(f"Created Manim script at {script_path}")
291
 
292
  # Render video using subprocess
 
25
  # API Key for security (optional)
26
  API_KEY = "rkmentormindzofficaltokenkey12345"
27
 
28
+ def extract_english_paragraphs(text):
29
+ """
30
+ Extract paragraphs that contain only English text
31
+ """
32
+ paragraphs = text.split('\n\n')
33
+ english_paragraphs = [paragraphs[0]]
34
+
35
+ #for para in paragraphs:
36
+ # Check if the paragraph contains only English characters
37
+ #if not re.search(r'[^\x00-\x7F]', para):
38
+ #english_paragraphs.append(para.strip())
39
+
40
+ return '\n\n'.join(english_paragraphs)
41
 
42
+ def extract_native_text(text):
43
+ paragraphs = text.split('\n\n')
44
+ nativelang_paragraphs = paragraphs[1]
45
+ #pattern = r'[^\x00-\x7F]'
46
+
47
+ # Search for the first non-English character
48
+ #match = re.search(pattern, text)
49
+
50
+ #if match:
51
+ # Return everything from the first non-English character
52
+ #return text[match.start():]
53
+ #else:
54
+ # If no non-English characters found, return empty string
55
+ return nativelang_paragraphs
56
+
57
+ import re
58
+ import html
59
+ import unicodedata
60
+ import tempfile
61
+ import os
62
+ import asyncio
63
+ from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
64
+ from functools import lru_cache
65
+ import edge_tts
66
+ from pydub import AudioSegment
67
+ from pydub.effects import normalize
68
+ from mutagen.mp3 import MP3
69
+
70
+ VOICE_EN = "en-IN-NeerjaNeural"
71
+
72
+ # Pre-compiled regex patterns for speed (compiled once, reused many times)
73
+ URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
74
+ TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
75
+ BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
76
+ SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
77
+ WHITESPACE_PATTERN = re.compile(r'\s+')
78
+ SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
79
+ SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
80
+
81
+ @lru_cache(maxsize=1024) # Cache cleaned text to avoid re-processing
82
+ def clean_text_for_tts(text):
83
+ """Cleans text before TTS with optimized regex and caching."""
84
+ if not text:
85
+ return ""
86
+ text = str(text).strip()
87
+ text = html.unescape(text)
88
+
89
+ # Use pre-compiled patterns (much faster)
90
+ text = URL_PATTERN.sub('', text)
91
+ text = TAG_PATTERN.sub('', text)
92
+ text = BRACKET_PATTERN.sub('', text)
93
+ text = SPECIAL_CHAR_PATTERN.sub('', text)
94
+ text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
95
+
96
+ # Batch remove keywords (faster than multiple re.sub calls)
97
+ for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
98
+ text = text.replace(keyword, '').replace(keyword.upper(), '')
99
+
100
+ text = unicodedata.normalize('NFKD', text)
101
+ text = WHITESPACE_PATTERN.sub(' ', text)
102
+ return text.strip()
103
+
104
+ async def generate_safe_audio(text, voice, semaphore):
105
+ """Generate clean audio with rate limiting."""
106
+ async with semaphore: # Limit concurrent TTS requests
107
+ cleaned_text = clean_text_for_tts(text)
108
+ if not cleaned_text:
109
+ return None
110
+
111
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
112
+ fname = temp_file.name
113
+ temp_file.close()
114
+
115
+ try:
116
+ comm = edge_tts.Communicate(cleaned_text, voice=voice)
117
+ await comm.save(fname)
118
+ return fname
119
+ except Exception as e:
120
+ print(f"Error generating audio: {e}")
121
+ if os.path.exists(fname):
122
+ os.unlink(fname)
123
+ return None
124
+
125
+ @lru_cache(maxsize=256)
126
+ def smart_text_chunking(text, max_chars=80):
127
+ """Cached text chunking for speed."""
128
+ text = clean_text_for_tts(text)
129
+ if not text:
130
+ return tuple() # Return tuple for hashability (required by lru_cache)
131
+
132
+ sentences = SENTENCE_PATTERN.split(text)
133
+ chunks = []
134
+
135
+ for sentence in sentences:
136
+ sentence = sentence.strip()
137
+ if not sentence:
138
+ continue
139
+
140
+ if len(sentence) <= max_chars:
141
+ chunks.append(sentence)
142
+ else:
143
+ sub_parts = SUB_PATTERN.split(sentence)
144
+ for part in sub_parts:
145
+ part = part.strip()
146
+ if not part:
147
+ continue
148
+
149
+ if len(part) <= max_chars:
150
+ chunks.append(part)
151
+ else:
152
+ words = part.split()
153
+ current_chunk = ""
154
+ for word in words:
155
+ test_chunk = f"{current_chunk} {word}" if current_chunk else word
156
+ if len(test_chunk) <= max_chars:
157
+ current_chunk = test_chunk
158
+ else:
159
+ if current_chunk:
160
+ chunks.append(current_chunk.strip())
161
+ current_chunk = word
162
+ if current_chunk:
163
+ chunks.append(current_chunk.strip())
164
+
165
+ return tuple(chunk for chunk in chunks if chunk.strip())
166
+
167
+ def process_audio_segment_fast(audio_file):
168
+ """Fast audio processing in separate thread."""
169
+ try:
170
+ segment = AudioSegment.from_file(audio_file)
171
+ segment = normalize(segment)
172
+
173
+ # Only strip silence for longer segments
174
+ if len(segment) > 200:
175
+ try:
176
+ segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
177
+ except:
178
+ pass # Skip if fails
179
+
180
+ return segment
181
+ except Exception as e:
182
+ print(f"Warning: Error processing audio segment: {e}")
183
+ return None
184
+ finally:
185
+ # Cleanup temp file immediately
186
+ try:
187
+ if os.path.exists(audio_file):
188
+ os.unlink(audio_file)
189
+ except:
190
+ pass
191
+
192
+ async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=10):
193
+ """Ultra-optimized bilingual TTS with parallel processing."""
194
+ print("Starting optimized bilingual TTS processing...")
195
+
196
+ try:
197
+ chunks = smart_text_chunking(text)
198
+ if not chunks:
199
+ print("Error: No valid text chunks after cleaning")
200
+ return None
201
+
202
+ print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
203
+
204
+ is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
205
+
206
+ # Semaphore to limit concurrent TTS requests (prevents rate limiting)
207
+ semaphore = asyncio.Semaphore(max_concurrent)
208
+
209
+ # Prepare all tasks
210
+ tasks = []
211
+ for i, chunk in enumerate(chunks):
212
+ is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
213
+ voice = VOICE_TA if (is_bilingual_tamil and is_tamil) else (VOICE_TA or VOICE_EN)
214
+ tasks.append(generate_safe_audio(chunk, voice, semaphore))
215
+
216
+ # Generate all audio files concurrently
217
+ audio_files = await asyncio.gather(*tasks, return_exceptions=True)
218
+
219
+ # Filter successful files
220
+ processed_audio_files = [f for f in audio_files if isinstance(f, str) and f]
221
+
222
+ if not processed_audio_files:
223
+ print("Error: No audio was successfully generated")
224
+ return None
225
+
226
+ print(f"Successfully generated {len(processed_audio_files)} audio segments")
227
+
228
+ # Process audio segments in parallel using ThreadPoolExecutor
229
+ with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
230
+ audio_segments = list(executor.map(process_audio_segment_fast, processed_audio_files))
231
+
232
+ # Filter out None segments
233
+ audio_segments = [seg for seg in audio_segments if seg is not None]
234
+
235
+ if not audio_segments:
236
+ print("Error: No audio segments were successfully processed")
237
+ return None
238
+
239
+ # Merge audio segments (fast concatenation)
240
+ print("Merging audio segments...")
241
+ merged_audio = audio_segments[0]
242
+ pause = AudioSegment.silent(duration=200)
243
+
244
+ for segment in audio_segments[1:]:
245
+ merged_audio += pause + segment
246
+
247
+ # Apply final processing (compression and normalization)
248
+ print("Applying final audio processing...")
249
+ merged_audio = merged_audio.compress_dynamic_range(
250
+ threshold=-20.0,
251
+ ratio=4.0,
252
+ attack=5.0,
253
+ release=50.0
254
+ )
255
+ merged_audio = normalize(merged_audio)
256
+
257
+ # Export with high quality
258
+ merged_audio.export(output_file, format="mp3", bitrate="192k")
259
+ print(f"✅ Audio successfully generated: {output_file}")
260
+
261
+ return output_file
262
+
263
+ except Exception as main_error:
264
+ print(f"Main error in bilingual TTS: {main_error}")
265
+ return None
266
+
267
+ async def generate_tts_optimized(id, lines, lang):
268
+ """Optimized TTS generation function."""
269
+ voice = {
270
+ "English": "en-US-JennyNeural",
271
+ "Tamil": "ta-IN-PallaviNeural",
272
+ "Hindi": "hi-IN-SwaraNeural",
273
+ "Malayalam": "ml-IN-SobhanaNeural",
274
+ "Kannada": "kn-IN-SapnaNeural",
275
+ "Telugu": "te-IN-ShrutiNeural",
276
+ "Bengali": "bn-IN-TanishaaNeural",
277
+ "Marathi": "mr-IN-AarohiNeural",
278
+ "Gujarati": "gu-IN-DhwaniNeural",
279
+ "Punjabi": "pa-IN-VaaniNeural",
280
+ "Urdu": "ur-IN-GulNeural",
281
+ "French": "fr-FR-DeniseNeural",
282
+ "German": "de-DE-KatjaNeural",
283
+ "Spanish": "es-ES-ElviraNeural",
284
+ "Italian": "it-IT-IsabellaNeural",
285
+ "Russian": "ru-RU-SvetlanaNeural",
286
+ "Japanese": "ja-JP-NanamiNeural",
287
+ "Korean": "ko-KR-SunHiNeural",
288
+ "Chinese": "zh-CN-XiaoxiaoNeural",
289
+ "Arabic": "ar-SA-ZariyahNeural",
290
+ "Portuguese": "pt-BR-FranciscaNeural",
291
+ "Dutch": "nl-NL-FennaNeural",
292
+ "Greek": "el-GR-AthinaNeural",
293
+ "Hebrew": "he-IL-HilaNeural",
294
+ "Turkish": "tr-TR-EmelNeural",
295
+ "Polish": "pl-PL-AgnieszkaNeural",
296
+ "Thai": "th-TH-AcharaNeural",
297
+ "Vietnamese": "vi-VN-HoaiMyNeural",
298
+ "Swedish": "sv-SE-SofieNeural",
299
+ "Finnish": "fi-FI-NooraNeural",
300
+ "Czech": "cs-CZ-VlastaNeural",
301
+ "Hungarian": "hu-HU-NoemiNeural"
302
+ }
303
+
304
+ audio_name = f"audio{id}.mp3"
305
+ audio_path = os.path.join(AUDIO_DIR, audio_name)
306
+
307
+ if "&&&" in lang:
308
+ listf = lang.split("&&&")
309
+ text = listf[0].strip()
310
+ lang_name = listf[1].strip()
311
+ voice_to_use = voice.get(lang_name, VOICE_EN)
312
+ else:
313
+ text = lines[id]
314
+ voice_to_use = voice.get(lang, VOICE_EN)
315
+
316
+ # Increase max_concurrent for more speed (adjust based on your system)
317
+ output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=15)
318
+
319
+ if output and os.path.exists(audio_path):
320
+ audio = MP3(audio_path)
321
+ duration = audio.info.length
322
+ return duration, audio_path
323
+
324
+ return None, None
325
+
326
+ def audio_func(id, lines, lang):
327
+ """Synchronous wrapper for audio generation."""
328
+ return asyncio.run(generate_tts_optimized(id, lines, lang))
329
 
330
  def make_wrapped_paragraph(content, max_width, color, font, font_size, line_spacing, align_left=True):
331
  """
 
363
  para = para.align_to(LEFT)
364
  return para.strip()
365
 
366
+ def create_manim_script(problem_data, script_path,audio_path,scale=1):
367
  """Generate Manim script from problem data with robust wrapping for title, text, and equations."""
368
 
369
  # Defaults
 
393
  class GeneratedMathScene(Scene):
394
  def construct(self):
395
  # Scene settings
396
+ self.add_sound({audio_path})
397
  self.camera.background_color = "{settings.get('background_color', '#0f0f23')}"
398
  default_color = {settings.get('text_color', 'WHITE')}
399
  highlight_color = {settings.get('highlight_color', 'YELLOW')}
 
445
  obj = None
446
  content = slide.get("content", "")
447
  animation = slide.get("animation", "write_left")
448
+ scalelen = slide.get("duration", 1.0)
449
+ duration=scalelen*{scale}
450
  slide_type = slide.get("type", "text")
451
 
452
  if slide_type == "title":
 
548
  cleaned = re.sub(r'(\d)\s*\.\s*(\d)', r'\1.\2', lst[0])
549
  nlist = ast.literal_eval(cleaned)
550
  datalst=[]
551
+ total=0
552
+ scale=1
553
  for line in range(len(nlist)):
554
+ total=total+float(nlist[line][3])
555
  datalst.append({
556
  "type": nlist[line][0].strip(),
557
  "content": nlist[line][1].strip(),
 
570
  "title_size": 48
571
  },
572
  "slides":datalst}
573
+ #audio generating code here
574
+ lines=extract_english_paragraphs(lst[1])
575
+ lang=extract_native_text(lst[1])
576
+ length, audio_path = audio_func(id, lines, lang)
577
+ if not duration or not audio_path:
578
+ print("Failed to generate audio.")
579
+
580
+ scale=total/length
581
+
582
  # Now proceed with video generation using 'data'
583
  print(json.dumps(data, indent=2)) # For debugging
584
  # ✅ Final validation
 
600
 
601
  # Generate Manim script
602
  script_path = os.path.join(temp_work_dir, "scene.py")
603
+ create_manim_script(data, script_path,audio_path,scale)
604
  print(f"Created Manim script at {script_path}")
605
 
606
  # Render video using subprocess