sreepathi-ravikumar commited on
Commit
e0b72ba
·
verified ·
1 Parent(s): 91fa8e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +245 -257
app.py CHANGED
@@ -7,13 +7,20 @@ import shutil
7
  from datetime import datetime
8
  import traceback
9
  import json
10
- import ast
11
  import re
12
- import textwrap
13
- from manim import *
 
 
 
 
 
 
 
14
 
15
  app = Flask(__name__)
16
- CORS(app) # Enable CORS for all routes
17
 
18
  # Configuration
19
  BASE_DIR = "/app"
@@ -23,27 +30,13 @@ AUDIO_DIR = os.path.join(BASE_DIR, "sound")
23
  os.makedirs(MEDIA_DIR, exist_ok=True)
24
  os.makedirs(TEMP_DIR, exist_ok=True)
25
  os.makedirs(AUDIO_DIR, exist_ok=True)
 
26
  # API Key for security (optional)
27
  API_KEY = "rkmentormindzofficaltokenkey12345"
28
 
29
-
30
-
31
- import re
32
- import html
33
- import unicodedata
34
- import tempfile
35
- import os
36
- import asyncio
37
- from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
38
- from functools import lru_cache
39
- import edge_tts
40
- from pydub import AudioSegment
41
- from pydub.effects import normalize
42
- from mutagen.mp3 import MP3
43
-
44
  VOICE_EN = "en-IN-NeerjaNeural"
45
 
46
- # Pre-compiled regex patterns for speed (compiled once, reused many times)
47
  URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
48
  TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
49
  BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
@@ -52,40 +45,40 @@ WHITESPACE_PATTERN = re.compile(r'\s+')
52
  SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
53
  SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
54
 
55
- @lru_cache(maxsize=1024) # Cache cleaned text to avoid re-processing
 
56
  def clean_text_for_tts(text):
57
  """Cleans text before TTS with optimized regex and caching."""
58
  if not text:
59
  return ""
60
  text = str(text).strip()
61
  text = html.unescape(text)
62
-
63
- # Use pre-compiled patterns (much faster)
64
  text = URL_PATTERN.sub('', text)
65
  text = TAG_PATTERN.sub('', text)
66
  text = BRACKET_PATTERN.sub('', text)
67
  text = SPECIAL_CHAR_PATTERN.sub('', text)
68
  text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
69
-
70
- # Batch remove keywords (faster than multiple re.sub calls)
71
  for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
72
  text = text.replace(keyword, '').replace(keyword.upper(), '')
73
-
74
  text = unicodedata.normalize('NFKD', text)
75
  text = WHITESPACE_PATTERN.sub(' ', text)
76
  return text.strip()
77
 
 
78
  async def generate_safe_audio(text, voice, semaphore):
79
  """Generate clean audio with rate limiting."""
80
- async with semaphore: # Limit concurrent TTS requests
81
  cleaned_text = clean_text_for_tts(text)
82
  if not cleaned_text:
83
  return None
84
-
85
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
86
  fname = temp_file.name
87
  temp_file.close()
88
-
89
  try:
90
  comm = edge_tts.Communicate(cleaned_text, voice=voice)
91
  await comm.save(fname)
@@ -93,24 +86,28 @@ async def generate_safe_audio(text, voice, semaphore):
93
  except Exception as e:
94
  print(f"Error generating audio: {e}")
95
  if os.path.exists(fname):
96
- os.unlink(fname)
 
 
 
97
  return None
98
 
 
99
  @lru_cache(maxsize=256)
100
  def smart_text_chunking(text, max_chars=80):
101
  """Cached text chunking for speed."""
102
  text = clean_text_for_tts(text)
103
  if not text:
104
- return tuple() # Return tuple for hashability (required by lru_cache)
105
-
106
  sentences = SENTENCE_PATTERN.split(text)
107
  chunks = []
108
-
109
  for sentence in sentences:
110
  sentence = sentence.strip()
111
  if not sentence:
112
  continue
113
-
114
  if len(sentence) <= max_chars:
115
  chunks.append(sentence)
116
  else:
@@ -119,7 +116,7 @@ def smart_text_chunking(text, max_chars=80):
119
  part = part.strip()
120
  if not part:
121
  continue
122
-
123
  if len(part) <= max_chars:
124
  chunks.append(part)
125
  else:
@@ -135,109 +132,105 @@ def smart_text_chunking(text, max_chars=80):
135
  current_chunk = word
136
  if current_chunk:
137
  chunks.append(current_chunk.strip())
138
-
139
  return tuple(chunk for chunk in chunks if chunk.strip())
140
 
 
141
  def process_audio_segment_fast(audio_file):
142
  """Fast audio processing in separate thread."""
143
  try:
 
 
 
144
  segment = AudioSegment.from_file(audio_file)
145
  segment = normalize(segment)
146
-
147
- # Only strip silence for longer segments
148
  if len(segment) > 200:
149
  try:
150
  segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
151
  except:
152
- pass # Skip if fails
153
-
154
  return segment
155
  except Exception as e:
156
  print(f"Warning: Error processing audio segment: {e}")
157
  return None
158
  finally:
159
- # Cleanup temp file immediately
160
  try:
161
  if os.path.exists(audio_file):
162
  os.unlink(audio_file)
163
  except:
164
  pass
165
 
 
166
  async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=10):
167
  """Ultra-optimized bilingual TTS with parallel processing."""
168
  print("Starting optimized bilingual TTS processing...")
169
-
170
  try:
171
  chunks = smart_text_chunking(text)
172
  if not chunks:
173
  print("Error: No valid text chunks after cleaning")
174
  return None
175
-
176
  print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
177
-
178
  is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
179
-
180
- # Semaphore to limit concurrent TTS requests (prevents rate limiting)
181
  semaphore = asyncio.Semaphore(max_concurrent)
182
-
183
- # Prepare all tasks
184
  tasks = []
185
  for i, chunk in enumerate(chunks):
186
  is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
187
  voice = VOICE_TA if (is_bilingual_tamil and is_tamil) else (VOICE_TA or VOICE_EN)
188
  tasks.append(generate_safe_audio(chunk, voice, semaphore))
189
-
190
- # Generate all audio files concurrently
191
  audio_files = await asyncio.gather(*tasks, return_exceptions=True)
192
-
193
- # Filter successful files
194
- processed_audio_files = [f for f in audio_files if isinstance(f, str) and f]
195
-
196
  if not processed_audio_files:
197
  print("Error: No audio was successfully generated")
198
  return None
199
-
200
  print(f"Successfully generated {len(processed_audio_files)} audio segments")
201
-
202
- # Process audio segments in parallel using ThreadPoolExecutor
203
  with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
204
  audio_segments = list(executor.map(process_audio_segment_fast, processed_audio_files))
205
-
206
- # Filter out None segments
207
  audio_segments = [seg for seg in audio_segments if seg is not None]
208
-
209
  if not audio_segments:
210
  print("Error: No audio segments were successfully processed")
211
  return None
212
-
213
- # Merge audio segments (fast concatenation)
214
  print("Merging audio segments...")
215
  merged_audio = audio_segments[0]
216
  pause = AudioSegment.silent(duration=200)
217
-
218
  for segment in audio_segments[1:]:
219
  merged_audio += pause + segment
220
-
221
- # Apply final processing (compression and normalization)
222
  print("Applying final audio processing...")
223
  merged_audio = merged_audio.compress_dynamic_range(
224
- threshold=-20.0,
225
- ratio=4.0,
226
- attack=5.0,
227
  release=50.0
228
  )
229
  merged_audio = normalize(merged_audio)
230
-
231
- # Export with high quality
232
  merged_audio.export(output_file, format="mp3", bitrate="192k")
233
- print(f" Audio successfully generated: {output_file}")
234
-
235
  return output_file
236
-
237
  except Exception as main_error:
238
  print(f"Main error in bilingual TTS: {main_error}")
 
239
  return None
240
 
 
241
  async def generate_tts_optimized(id, lines, lang):
242
  """Optimized TTS generation function."""
243
  voice = {
@@ -274,82 +267,60 @@ async def generate_tts_optimized(id, lines, lang):
274
  "Czech": "cs-CZ-VlastaNeural",
275
  "Hungarian": "hu-HU-NoemiNeural"
276
  }
277
-
278
  audio_name = f"audio{id}.mp3"
279
  audio_path = os.path.join(AUDIO_DIR, audio_name)
280
-
281
  if "&&&" in lang:
282
  listf = lang.split("&&&")
283
  text = listf[0].strip()
284
- lang_name = listf[1].strip()
285
  voice_to_use = voice.get(lang_name, VOICE_EN)
286
  else:
287
- text = lines[id]
288
  voice_to_use = voice.get(lang, VOICE_EN)
289
-
290
- # Increase max_concurrent for more speed (adjust based on your system)
291
  output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=15)
292
-
293
  if output and os.path.exists(audio_path):
294
- audio = MP3(audio_path)
295
- duration = audio.info.length
296
- return duration, audio_path
297
-
298
- return None , None
 
 
 
 
 
299
 
300
  def audio_func(id, lines, lang):
301
  """Synchronous wrapper for audio generation."""
302
- return asyncio.run(generate_tts_optimized(id, lines, lang))
303
-
304
- def make_wrapped_paragraph(content, max_width, color, font, font_size, line_spacing, align_left=True):
305
- """
306
- Build a vertically stacked group of Text lines that together form a paragraph.
307
- It splits content into lines that fit within max_width by measuring rendered width.
308
- Each line is a separate Text object joined into a VGroup and arranged downward.
309
- """
310
- words = content.split()
311
- lines = []
312
- current = ""
313
-
314
- # Create a temporary Text to measure width; use the same font/size as final lines
315
- temp = Text("", color=color, font=font, font_size=font_size)
316
-
317
- for w in words:
318
- test = w if not current else current + " " + w
319
- test_obj = Text(test, color=color, font=font, font_size=font_size)
320
- if test_obj.width <= max_width:
321
- current = test
322
- else:
323
- # flush the current line
324
- line = Text(current, color=color, font=font, font_size=font_size)
325
- lines.append(line)
326
- current = w
327
- if current:
328
- lines.append(Text(current, color=color, font=font, font_size=font_size))
329
-
330
- if not lines:
331
- return VGroup()
332
-
333
- para = VGroup(*lines)
334
- # Space lines vertically; arrange them as a column
335
- para.arrange(DOWN, buff=line_spacing)
336
- if align_left:
337
- para = para.align_to(LEFT)
338
- return para.strip()
339
-
340
- def create_manim_script(problem_data, script_path,audio_path,scale=1):
341
- """Generate Manim script from problem data with robust wrapping for title, text, and equations."""
342
-
343
- # Defaults
344
  settings = problem_data.get("video_settings", {
345
  "background_color": "#0f0f23",
346
  "text_color": "WHITE",
347
  "highlight_color": "YELLOW",
348
- "font": "",
349
  "text_size": 36,
350
  "equation_size": 45,
351
  "title_size": 48,
352
- "wrap_width": 15.5 # in scene width units; adjust to taste
353
  })
354
 
355
  slides = problem_data.get("slides", [])
@@ -357,122 +328,115 @@ def create_manim_script(problem_data, script_path,audio_path,scale=1):
357
  raise ValueError("No slides provided in input data")
358
 
359
  slides_repr = repr(slides)
 
360
 
361
- # Use a dedicated wrap width in scene units; you can adapt how max_width is computed
362
  wrap_width = float(settings.get("wrap_width", 15.5))
 
 
 
 
 
 
 
 
 
363
 
364
- manim_code = f'''
365
- from manim import *
366
- import textwrap
367
  class GeneratedMathScene(Scene):
368
  def construct(self):
369
  # Scene settings
370
- self.add_sound({audio_path})
371
- self.camera.background_color = "{settings.get('background_color', '#0f0f23')}"
372
- default_color = {settings.get('text_color', 'WHITE')}
373
- highlight_color = {settings.get('highlight_color', 'YELLOW')}
374
- default_font = "{settings.get('font', 'CMU Serif')}"
375
- text_size = {settings.get('text_size', 36)}
376
- equation_size = {settings.get('equation_size', 45)}
377
- title_size = {settings.get('title_size', 48)}
378
  wrap_width = {wrap_width}
379
-
380
- # Helper to wrap text into lines that fit within max width
381
  def make_wrapped_paragraph(content, color, font, font_size, line_spacing=0.2):
382
  lines = []
383
  words = content.split()
384
  current = ""
385
-
386
  for w in words:
387
  test = w if not current else current + " " + w
388
  test_obj = Text(test, color=color, font=font, font_size=font_size)
389
-
390
  if test_obj.width <= wrap_width * 0.9:
391
  current = test
392
  else:
393
- line_obj = Text(current, color=color, font=font, font_size=font_size)
394
- lines.append(line_obj)
 
395
  current = w
396
-
397
  if current:
398
  lines.append(Text(current, color=color, font=font, font_size=font_size))
399
-
400
  if not lines:
401
  return VGroup()
402
-
403
- # --- FIX: Force every line to align to LEFT like line 1 ---
404
  first_line = lines[0]
405
  for ln in lines:
406
  ln.align_to(first_line, LEFT)
407
-
408
  para = VGroup(*lines).arrange(DOWN, aligned_edge=LEFT, buff=line_spacing)
409
  return para
410
- class GeneratedMathSceneInner(Scene):
411
- pass
412
  content_group = VGroup()
413
  current_y = 3.0
414
  line_spacing = 0.8
415
  slides = {slides_repr}
416
-
417
- # Build each slide
418
  for idx, slide in enumerate(slides):
419
  obj = None
420
  content = slide.get("content", "")
421
  animation = slide.get("animation", "write_left")
422
  scalelen = slide.get("duration", 1.0)
423
- duration=scalelen*{scale}
424
  slide_type = slide.get("type", "text")
425
-
426
  if slide_type == "title":
427
- # Wrap title text
428
  title_text = content
429
- # Use paragraph wrapping to keep multi-line titles readable
430
- lines = []
431
  if title_text:
432
- lines = []
433
- # Reuse make_wrapped_paragraph by simulating a single paragraph
434
  lines_group = make_wrapped_paragraph(title_text, highlight_color, default_font, title_size, line_spacing=0.2)
435
  obj = lines_group if len(lines_group) > 0 else Text(title_text, color=highlight_color, font=default_font, font_size=title_size)
436
  else:
437
  obj = Text("", color=highlight_color, font=default_font, font_size=title_size)
 
438
  if obj.width > wrap_width:
439
  obj.scale_to_fit_width(wrap_width)
440
-
441
  obj.move_to(ORIGIN)
442
  self.play(FadeIn(obj), run_time=duration * 0.8)
443
  self.wait(duration * 0.3)
444
  self.play(FadeOut(obj), run_time=duration * 0.3)
445
  continue
446
-
447
  elif slide_type == "text":
448
- # Use wrapping for normal text
449
  obj = make_wrapped_paragraph(content, default_color, default_font, text_size, line_spacing=0.25)
450
-
451
  elif slide_type == "equation":
452
- # Wrap long equations by splitting content into lines if needed
453
- # Heuristic: if content is too wide, create a multi-line TeX using \\ line breaks
454
  eq_content = content
455
- # Optional: insert line breaks at common math breakpoints if needed
456
  test = MathTex(eq_content, color=default_color, font_size=equation_size)
457
  if test.width > wrap_width:
458
- # naive wrap: insert line breaks at spaces near the middle
459
  parts = eq_content.split(" ")
460
- mid = len(parts)//2
461
  line1 = " ".join(parts[:mid])
462
  line2 = " ".join(parts[mid:])
463
- wrapped_eq = f"{{line1}} \\\\\\\\ {{line2}}"
464
  obj = MathTex(wrapped_eq, color=default_color, font_size=equation_size)
465
  else:
466
  obj = MathTex(eq_content, color=default_color, font_size=equation_size)
467
-
468
  if obj.width > wrap_width:
469
  obj.scale_to_fit_width(wrap_width)
470
-
471
  if obj:
472
- # Position and animate
473
  obj.to_edge(LEFT, buff=0.3)
474
- obj.shift(UP * (current_y - obj.height/2))
475
-
476
  obj_bottom = obj.get_bottom()[1]
477
  if obj_bottom < -3.5:
478
  scroll_amount = abs(obj_bottom - (-3.5)) + 0.3
@@ -480,7 +444,7 @@ class GeneratedMathScene(Scene):
480
  current_y += scroll_amount
481
  obj.shift(UP * scroll_amount)
482
  obj.to_edge(LEFT, buff=0.3)
483
-
484
  if animation == "write_left":
485
  self.play(Write(obj), run_time=duration)
486
  elif animation == "fade_in":
@@ -490,99 +454,113 @@ class GeneratedMathScene(Scene):
490
  self.play(obj.animate.set_color(highlight_color), run_time=duration * 0.4)
491
  else:
492
  self.play(Write(obj), run_time=duration)
493
-
494
  content_group.add(obj)
495
- # Decrease y for next item
496
  current_y -= (getattr(obj, "height", 0) + line_spacing)
497
  self.wait(0.3)
498
-
499
  if len(content_group) > 0:
500
  final_box = SurroundingRectangle(content_group[-1], color=highlight_color, buff=0.2)
501
  self.play(Create(final_box), run_time=0.8)
502
  self.wait(1.5)
503
- '''
 
 
 
 
 
 
 
 
504
 
505
- with open(script_path, 'w', encoding='utf-8') as f:
506
- f.write(manim_code)
507
-
508
- print(f"Generated script preview (first 500 chars):{manim_code[:500]}...")
509
 
510
  @app.route("/")
511
  def home():
512
  return "Flask Manim Video Generator is Running"
513
 
 
514
  @app.route("/generate", methods=["POST"])
515
  def generate_video():
 
516
  try:
517
  raw_data = request.get_json()
518
- raw_body=raw_data.get("jsondata" , '')
519
- #print(f"Raw body length: {len(raw_body)}")
520
- #print(f"First 200 chars: {raw_body[:200]}")
 
 
 
 
521
  lst = raw_body.split("&&&&")
 
 
 
522
  cleaned = re.sub(r'(\d)\s*\.\s*(\d)', r'\1.\2', lst[0])
523
- nlist = ast.literal_eval(cleaned)
524
- datalst=[]
525
- total=0
526
- scale=1
 
 
 
 
 
527
  for line in range(len(nlist)):
528
- total=total+float(nlist[line][3])
529
- datalst.append({
530
- "type": nlist[line][0].strip(),
531
- "content": nlist[line][1].strip(),
532
- "animation": nlist[line][2].strip().replace(" ",""),
533
- "duration": nlist[line][3]
534
- })
535
-
536
- data={
537
- "video_settings": {
538
- "background_color": "#0f0f23",
539
- "text_color": "WHITE",
540
- "highlight_color": "YELLOW",
541
- "font": "CMU Serif",
542
- "text_size": 36,
543
- "equation_size": 42,
544
- "title_size": 48
545
- },
546
- "slides":datalst}
547
- #audio generating code here
548
- best=lst[1].split("&&&")
549
- lines=best[0]
 
 
 
 
 
 
 
550
  try:
551
- lang=best[1]
552
  except:
553
- print(best)
 
554
  length, audio_path = audio_func(0, lines, lang)
555
- if not length or not audio_path:
556
- print("Failed to generate audio.")
557
-
558
- scale=float(length)/total
559
-
560
- # Now proceed with video generation using 'data'
561
- print(json.dumps(data, indent=2)) # For debugging
562
- # ✅ Final validation
563
- if "slides" not in data or not data["slides"]:
564
- return jsonify({"error": "No slides provided in request"}), 400
565
-
566
- print(f"✅ Parsed {len(data['slides'])} slides successfully.")
567
-
568
- # Validate input
569
  if "slides" not in data or not data["slides"]:
570
  return jsonify({"error": "No slides provided in request"}), 400
571
-
572
  print(f"Received request with {len(data['slides'])} slides")
573
-
574
- # Create unique temporary directory
575
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
576
  temp_work_dir = os.path.join(TEMP_DIR, f"manim_{timestamp}")
577
  os.makedirs(temp_work_dir, exist_ok=True)
578
-
579
- # Generate Manim script
580
  script_path = os.path.join(temp_work_dir, "scene.py")
581
- create_manim_script(data, script_path,audio_path,scale)
582
  print(f"Created Manim script at {script_path}")
583
-
584
- # Render video using subprocess
585
- quality = 'l' # l=low, m=medium, h=high
586
  render_command = [
587
  "manim",
588
  f"-q{quality}",
@@ -591,9 +569,9 @@ def generate_video():
591
  script_path,
592
  "GeneratedMathScene"
593
  ]
594
-
595
  print(f"Running command: {' '.join(render_command)}")
596
-
597
  result = subprocess.run(
598
  render_command,
599
  capture_output=True,
@@ -601,7 +579,7 @@ def generate_video():
601
  cwd=temp_work_dir,
602
  timeout=120
603
  )
604
-
605
  if result.returncode != 0:
606
  error_msg = result.stderr or result.stdout
607
  print(f"Manim rendering failed: {error_msg}")
@@ -609,13 +587,12 @@ def generate_video():
609
  "error": "Manim rendering failed",
610
  "details": error_msg
611
  }), 500
612
-
613
  print("Manim rendering completed successfully")
614
-
615
- # Find generated video
616
  quality_map = {'l': '480p15', 'm': '720p30', 'h': '1080p60'}
617
  video_quality = quality_map.get(quality, '480p15')
618
-
619
  video_path = os.path.join(
620
  temp_work_dir,
621
  "videos",
@@ -623,25 +600,24 @@ def generate_video():
623
  video_quality,
624
  "GeneratedMathScene.mp4"
625
  )
626
-
627
  if not os.path.exists(video_path):
628
  print(f"Video not found at expected path: {video_path}")
629
  return jsonify({
630
  "error": "Video file not found after rendering",
631
  "expected_path": video_path
632
  }), 500
633
-
634
  print(f"Video found at: {video_path}")
635
-
636
- # Copy to media directory
637
  output_filename = f"math_video_{timestamp}.mp4"
638
  output_path = os.path.join(MEDIA_DIR, output_filename)
639
  shutil.copy(video_path, output_path)
640
  print(f"Video copied to: {output_path}")
641
-
642
- # Clean up temp directory
643
  try:
644
- shutil.rmtree(temp_work_dir)
 
645
  print("Cleaned up temp directory")
646
  except Exception as e:
647
  print(f"Failed to clean temp dir: {e}")
@@ -652,18 +628,30 @@ def generate_video():
652
  as_attachment=False,
653
  download_name=output_filename
654
  )
655
-
656
  except subprocess.TimeoutExpired:
657
  print("Video rendering timeout")
 
 
 
 
 
658
  return jsonify({"error": "Video rendering timeout (120s)"}), 504
 
659
  except Exception as e:
660
  print(f"Error: {str(e)}")
661
  traceback.print_exc()
 
 
 
 
 
662
  return jsonify({
663
  "error": str(e),
664
  "traceback": traceback.format_exc()
665
  }), 500
666
 
 
667
  if __name__ == '__main__':
668
  port = int(os.environ.get('PORT', 7860))
669
- app.run(host='0.0.0.0', port=port, debug=False)
 
7
  from datetime import datetime
8
  import traceback
9
  import json
10
+ import ast
11
  import re
12
+ import html
13
+ import unicodedata
14
+ import asyncio
15
+ from concurrent.futures import ThreadPoolExecutor
16
+ from functools import lru_cache
17
+ import edge_tts
18
+ from pydub import AudioSegment
19
+ from pydub.effects import normalize
20
+ from mutagen.mp3 import MP3
21
 
22
  app = Flask(__name__)
23
+ CORS(app)
24
 
25
  # Configuration
26
  BASE_DIR = "/app"
 
30
  os.makedirs(MEDIA_DIR, exist_ok=True)
31
  os.makedirs(TEMP_DIR, exist_ok=True)
32
  os.makedirs(AUDIO_DIR, exist_ok=True)
33
+
34
  # API Key for security (optional)
35
  API_KEY = "rkmentormindzofficaltokenkey12345"
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  VOICE_EN = "en-IN-NeerjaNeural"
38
 
39
+ # Pre-compiled regex patterns for speed
40
  URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
41
  TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
42
  BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
 
45
  SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
46
  SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
47
 
48
+
49
+ @lru_cache(maxsize=1024)
50
  def clean_text_for_tts(text):
51
  """Cleans text before TTS with optimized regex and caching."""
52
  if not text:
53
  return ""
54
  text = str(text).strip()
55
  text = html.unescape(text)
56
+
 
57
  text = URL_PATTERN.sub('', text)
58
  text = TAG_PATTERN.sub('', text)
59
  text = BRACKET_PATTERN.sub('', text)
60
  text = SPECIAL_CHAR_PATTERN.sub('', text)
61
  text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
62
+
 
63
  for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
64
  text = text.replace(keyword, '').replace(keyword.upper(), '')
65
+
66
  text = unicodedata.normalize('NFKD', text)
67
  text = WHITESPACE_PATTERN.sub(' ', text)
68
  return text.strip()
69
 
70
+
71
  async def generate_safe_audio(text, voice, semaphore):
72
  """Generate clean audio with rate limiting."""
73
+ async with semaphore:
74
  cleaned_text = clean_text_for_tts(text)
75
  if not cleaned_text:
76
  return None
77
+
78
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
79
  fname = temp_file.name
80
  temp_file.close()
81
+
82
  try:
83
  comm = edge_tts.Communicate(cleaned_text, voice=voice)
84
  await comm.save(fname)
 
86
  except Exception as e:
87
  print(f"Error generating audio: {e}")
88
  if os.path.exists(fname):
89
+ try:
90
+ os.unlink(fname)
91
+ except:
92
+ pass
93
  return None
94
 
95
+
96
  @lru_cache(maxsize=256)
97
  def smart_text_chunking(text, max_chars=80):
98
  """Cached text chunking for speed."""
99
  text = clean_text_for_tts(text)
100
  if not text:
101
+ return tuple()
102
+
103
  sentences = SENTENCE_PATTERN.split(text)
104
  chunks = []
105
+
106
  for sentence in sentences:
107
  sentence = sentence.strip()
108
  if not sentence:
109
  continue
110
+
111
  if len(sentence) <= max_chars:
112
  chunks.append(sentence)
113
  else:
 
116
  part = part.strip()
117
  if not part:
118
  continue
119
+
120
  if len(part) <= max_chars:
121
  chunks.append(part)
122
  else:
 
132
  current_chunk = word
133
  if current_chunk:
134
  chunks.append(current_chunk.strip())
135
+
136
  return tuple(chunk for chunk in chunks if chunk.strip())
137
 
138
+
139
  def process_audio_segment_fast(audio_file):
140
  """Fast audio processing in separate thread."""
141
  try:
142
+ if not os.path.exists(audio_file):
143
+ return None
144
+
145
  segment = AudioSegment.from_file(audio_file)
146
  segment = normalize(segment)
147
+
 
148
  if len(segment) > 200:
149
  try:
150
  segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
151
  except:
152
+ pass
153
+
154
  return segment
155
  except Exception as e:
156
  print(f"Warning: Error processing audio segment: {e}")
157
  return None
158
  finally:
 
159
  try:
160
  if os.path.exists(audio_file):
161
  os.unlink(audio_file)
162
  except:
163
  pass
164
 
165
+
166
  async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=10):
167
  """Ultra-optimized bilingual TTS with parallel processing."""
168
  print("Starting optimized bilingual TTS processing...")
169
+
170
  try:
171
  chunks = smart_text_chunking(text)
172
  if not chunks:
173
  print("Error: No valid text chunks after cleaning")
174
  return None
175
+
176
  print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
177
+
178
  is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
179
+
 
180
  semaphore = asyncio.Semaphore(max_concurrent)
181
+
 
182
  tasks = []
183
  for i, chunk in enumerate(chunks):
184
  is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
185
  voice = VOICE_TA if (is_bilingual_tamil and is_tamil) else (VOICE_TA or VOICE_EN)
186
  tasks.append(generate_safe_audio(chunk, voice, semaphore))
187
+
 
188
  audio_files = await asyncio.gather(*tasks, return_exceptions=True)
189
+
190
+ processed_audio_files = [f for f in audio_files if isinstance(f, str) and f and os.path.exists(f)]
191
+
 
192
  if not processed_audio_files:
193
  print("Error: No audio was successfully generated")
194
  return None
195
+
196
  print(f"Successfully generated {len(processed_audio_files)} audio segments")
197
+
 
198
  with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
199
  audio_segments = list(executor.map(process_audio_segment_fast, processed_audio_files))
200
+
 
201
  audio_segments = [seg for seg in audio_segments if seg is not None]
202
+
203
  if not audio_segments:
204
  print("Error: No audio segments were successfully processed")
205
  return None
206
+
 
207
  print("Merging audio segments...")
208
  merged_audio = audio_segments[0]
209
  pause = AudioSegment.silent(duration=200)
210
+
211
  for segment in audio_segments[1:]:
212
  merged_audio += pause + segment
213
+
 
214
  print("Applying final audio processing...")
215
  merged_audio = merged_audio.compress_dynamic_range(
216
+ threshold=-20.0,
217
+ ratio=4.0,
218
+ attack=5.0,
219
  release=50.0
220
  )
221
  merged_audio = normalize(merged_audio)
222
+
 
223
  merged_audio.export(output_file, format="mp3", bitrate="192k")
224
+ print(f"✅ Audio successfully generated: {output_file}")
225
+
226
  return output_file
227
+
228
  except Exception as main_error:
229
  print(f"Main error in bilingual TTS: {main_error}")
230
+ traceback.print_exc()
231
  return None
232
 
233
+
234
  async def generate_tts_optimized(id, lines, lang):
235
  """Optimized TTS generation function."""
236
  voice = {
 
267
  "Czech": "cs-CZ-VlastaNeural",
268
  "Hungarian": "hu-HU-NoemiNeural"
269
  }
270
+
271
  audio_name = f"audio{id}.mp3"
272
  audio_path = os.path.join(AUDIO_DIR, audio_name)
273
+
274
  if "&&&" in lang:
275
  listf = lang.split("&&&")
276
  text = listf[0].strip()
277
+ lang_name = listf[1].strip() if len(listf) > 1 else "English"
278
  voice_to_use = voice.get(lang_name, VOICE_EN)
279
  else:
280
+ text = lines[id] if isinstance(lines, (list, tuple)) and id < len(lines) else str(lines)
281
  voice_to_use = voice.get(lang, VOICE_EN)
282
+
 
283
  output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=15)
284
+
285
  if output and os.path.exists(audio_path):
286
+ try:
287
+ audio = MP3(audio_path)
288
+ duration = audio.info.length
289
+ return duration, audio_path
290
+ except Exception as e:
291
+ print(f"Error reading audio file: {e}")
292
+ return None, None
293
+
294
+ return None, None
295
+
296
 
297
  def audio_func(id, lines, lang):
298
  """Synchronous wrapper for audio generation."""
299
+ try:
300
+ loop = asyncio.new_event_loop()
301
+ asyncio.set_event_loop(loop)
302
+ try:
303
+ return loop.run_until_complete(generate_tts_optimized(id, lines, lang))
304
+ finally:
305
+ loop.close()
306
+ except Exception as e:
307
+ print(f"Error in audio_func: {e}")
308
+ traceback.print_exc()
309
+ return None, None
310
+
311
+
312
+ def create_manim_script(problem_data, script_path, audio_path, scale=1):
313
+ """Generate Manim script from problem data with robust wrapping."""
314
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
  settings = problem_data.get("video_settings", {
316
  "background_color": "#0f0f23",
317
  "text_color": "WHITE",
318
  "highlight_color": "YELLOW",
319
+ "font": "CMU Serif",
320
  "text_size": 36,
321
  "equation_size": 45,
322
  "title_size": 48,
323
+ "wrap_width": 15.5
324
  })
325
 
326
  slides = problem_data.get("slides", [])
 
328
  raise ValueError("No slides provided in input data")
329
 
330
  slides_repr = repr(slides)
331
+ audio_path_repr = repr(audio_path)
332
 
 
333
  wrap_width = float(settings.get("wrap_width", 15.5))
334
+ background_color = settings.get("background_color", "#0f0f23")
335
+ text_color = settings.get("text_color", "WHITE")
336
+ highlight_color = settings.get("highlight_color", "YELLOW")
337
+ font = settings.get("font", "CMU Serif")
338
+ text_size = settings.get("text_size", 36)
339
+ equation_size = settings.get("equation_size", 45)
340
+ title_size = settings.get("title_size", 48)
341
+
342
+ manim_code = f"""from manim import *
343
 
 
 
 
344
  class GeneratedMathScene(Scene):
345
  def construct(self):
346
  # Scene settings
347
+ self.add_sound({audio_path_repr})
348
+ self.camera.background_color = "{background_color}"
349
+ default_color = {text_color}
350
+ highlight_color = {highlight_color}
351
+ default_font = "{font}"
352
+ text_size = {text_size}
353
+ equation_size = {equation_size}
354
+ title_size = {title_size}
355
  wrap_width = {wrap_width}
356
+
 
357
  def make_wrapped_paragraph(content, color, font, font_size, line_spacing=0.2):
358
  lines = []
359
  words = content.split()
360
  current = ""
361
+
362
  for w in words:
363
  test = w if not current else current + " " + w
364
  test_obj = Text(test, color=color, font=font, font_size=font_size)
365
+
366
  if test_obj.width <= wrap_width * 0.9:
367
  current = test
368
  else:
369
+ if current:
370
+ line_obj = Text(current, color=color, font=font, font_size=font_size)
371
+ lines.append(line_obj)
372
  current = w
373
+
374
  if current:
375
  lines.append(Text(current, color=color, font=font, font_size=font_size))
376
+
377
  if not lines:
378
  return VGroup()
379
+
 
380
  first_line = lines[0]
381
  for ln in lines:
382
  ln.align_to(first_line, LEFT)
383
+
384
  para = VGroup(*lines).arrange(DOWN, aligned_edge=LEFT, buff=line_spacing)
385
  return para
386
+
 
387
  content_group = VGroup()
388
  current_y = 3.0
389
  line_spacing = 0.8
390
  slides = {slides_repr}
391
+
 
392
  for idx, slide in enumerate(slides):
393
  obj = None
394
  content = slide.get("content", "")
395
  animation = slide.get("animation", "write_left")
396
  scalelen = slide.get("duration", 1.0)
397
+ duration = scalelen * {scale}
398
  slide_type = slide.get("type", "text")
399
+
400
  if slide_type == "title":
 
401
  title_text = content
 
 
402
  if title_text:
 
 
403
  lines_group = make_wrapped_paragraph(title_text, highlight_color, default_font, title_size, line_spacing=0.2)
404
  obj = lines_group if len(lines_group) > 0 else Text(title_text, color=highlight_color, font=default_font, font_size=title_size)
405
  else:
406
  obj = Text("", color=highlight_color, font=default_font, font_size=title_size)
407
+
408
  if obj.width > wrap_width:
409
  obj.scale_to_fit_width(wrap_width)
410
+
411
  obj.move_to(ORIGIN)
412
  self.play(FadeIn(obj), run_time=duration * 0.8)
413
  self.wait(duration * 0.3)
414
  self.play(FadeOut(obj), run_time=duration * 0.3)
415
  continue
416
+
417
  elif slide_type == "text":
 
418
  obj = make_wrapped_paragraph(content, default_color, default_font, text_size, line_spacing=0.25)
419
+
420
  elif slide_type == "equation":
 
 
421
  eq_content = content
 
422
  test = MathTex(eq_content, color=default_color, font_size=equation_size)
423
  if test.width > wrap_width:
 
424
  parts = eq_content.split(" ")
425
+ mid = len(parts) // 2
426
  line1 = " ".join(parts[:mid])
427
  line2 = " ".join(parts[mid:])
428
+ wrapped_eq = f"{{{{line1}}}} \\\\ {{{{line2}}}}"
429
  obj = MathTex(wrapped_eq, color=default_color, font_size=equation_size)
430
  else:
431
  obj = MathTex(eq_content, color=default_color, font_size=equation_size)
432
+
433
  if obj.width > wrap_width:
434
  obj.scale_to_fit_width(wrap_width)
435
+
436
  if obj:
 
437
  obj.to_edge(LEFT, buff=0.3)
438
+ obj.shift(UP * (current_y - obj.height / 2))
439
+
440
  obj_bottom = obj.get_bottom()[1]
441
  if obj_bottom < -3.5:
442
  scroll_amount = abs(obj_bottom - (-3.5)) + 0.3
 
444
  current_y += scroll_amount
445
  obj.shift(UP * scroll_amount)
446
  obj.to_edge(LEFT, buff=0.3)
447
+
448
  if animation == "write_left":
449
  self.play(Write(obj), run_time=duration)
450
  elif animation == "fade_in":
 
454
  self.play(obj.animate.set_color(highlight_color), run_time=duration * 0.4)
455
  else:
456
  self.play(Write(obj), run_time=duration)
457
+
458
  content_group.add(obj)
 
459
  current_y -= (getattr(obj, "height", 0) + line_spacing)
460
  self.wait(0.3)
461
+
462
  if len(content_group) > 0:
463
  final_box = SurroundingRectangle(content_group[-1], color=highlight_color, buff=0.2)
464
  self.play(Create(final_box), run_time=0.8)
465
  self.wait(1.5)
466
+ """
467
+
468
+ try:
469
+ with open(script_path, 'w', encoding='utf-8') as f:
470
+ f.write(manim_code)
471
+ print(f"Generated script at {script_path}")
472
+ except Exception as e:
473
+ print(f"Error writing script: {e}")
474
+ raise
475
 
 
 
 
 
476
 
477
  @app.route("/")
478
  def home():
479
  return "Flask Manim Video Generator is Running"
480
 
481
+
482
  @app.route("/generate", methods=["POST"])
483
  def generate_video():
484
+ temp_work_dir = None
485
  try:
486
  raw_data = request.get_json()
487
+ if not raw_data:
488
+ return jsonify({"error": "No JSON data provided"}), 400
489
+
490
+ raw_body = raw_data.get("jsondata", '')
491
+ if not raw_body:
492
+ return jsonify({"error": "No jsondata field in request"}), 400
493
+
494
  lst = raw_body.split("&&&&")
495
+ if len(lst) < 2:
496
+ return jsonify({"error": "Invalid data format, missing &&&&separator"}), 400
497
+
498
  cleaned = re.sub(r'(\d)\s*\.\s*(\d)', r'\1.\2', lst[0])
499
+
500
+ try:
501
+ nlist = ast.literal_eval(cleaned)
502
+ except Exception as e:
503
+ return jsonify({"error": f"Failed to parse slide data: {str(e)}"}), 400
504
+
505
+ datalst = []
506
+ total = 0.0
507
+
508
  for line in range(len(nlist)):
509
+ try:
510
+ total += float(nlist[line][3])
511
+ datalst.append({
512
+ "type": nlist[line][0].strip(),
513
+ "content": nlist[line][1].strip(),
514
+ "animation": nlist[line][2].strip().replace(" ", ""),
515
+ "duration": float(nlist[line][3])
516
+ })
517
+ except (IndexError, ValueError) as e:
518
+ return jsonify({"error": f"Invalid slide data at index {line}: {str(e)}"}), 400
519
+
520
+ if total <= 0:
521
+ total = 1.0
522
+
523
+ data = {
524
+ "video_settings": {
525
+ "background_color": "#0f0f23",
526
+ "text_color": "WHITE",
527
+ "highlight_color": "YELLOW",
528
+ "font": "CMU Serif",
529
+ "text_size": 36,
530
+ "equation_size": 42,
531
+ "title_size": 48
532
+ },
533
+ "slides": datalst
534
+ }
535
+
536
+ best = lst[1].split("&&&")
537
+ lines = best[0]
538
  try:
539
+ lang = best[1] if len(best) > 1 else "English"
540
  except:
541
+ lang = "English"
542
+
543
  length, audio_path = audio_func(0, lines, lang)
544
+
545
+ if not length or not audio_path or not os.path.exists(audio_path):
546
+ return jsonify({"error": "Failed to generate audio"}), 500
547
+
548
+ scale = float(length) / total if total > 0 else 1.0
549
+
 
 
 
 
 
 
 
 
550
  if "slides" not in data or not data["slides"]:
551
  return jsonify({"error": "No slides provided in request"}), 400
552
+
553
  print(f"Received request with {len(data['slides'])} slides")
554
+
 
555
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
556
  temp_work_dir = os.path.join(TEMP_DIR, f"manim_{timestamp}")
557
  os.makedirs(temp_work_dir, exist_ok=True)
558
+
 
559
  script_path = os.path.join(temp_work_dir, "scene.py")
560
+ create_manim_script(data, script_path, audio_path, scale)
561
  print(f"Created Manim script at {script_path}")
562
+
563
+ quality = 'l'
 
564
  render_command = [
565
  "manim",
566
  f"-q{quality}",
 
569
  script_path,
570
  "GeneratedMathScene"
571
  ]
572
+
573
  print(f"Running command: {' '.join(render_command)}")
574
+
575
  result = subprocess.run(
576
  render_command,
577
  capture_output=True,
 
579
  cwd=temp_work_dir,
580
  timeout=120
581
  )
582
+
583
  if result.returncode != 0:
584
  error_msg = result.stderr or result.stdout
585
  print(f"Manim rendering failed: {error_msg}")
 
587
  "error": "Manim rendering failed",
588
  "details": error_msg
589
  }), 500
590
+
591
  print("Manim rendering completed successfully")
592
+
 
593
  quality_map = {'l': '480p15', 'm': '720p30', 'h': '1080p60'}
594
  video_quality = quality_map.get(quality, '480p15')
595
+
596
  video_path = os.path.join(
597
  temp_work_dir,
598
  "videos",
 
600
  video_quality,
601
  "GeneratedMathScene.mp4"
602
  )
603
+
604
  if not os.path.exists(video_path):
605
  print(f"Video not found at expected path: {video_path}")
606
  return jsonify({
607
  "error": "Video file not found after rendering",
608
  "expected_path": video_path
609
  }), 500
610
+
611
  print(f"Video found at: {video_path}")
612
+
 
613
  output_filename = f"math_video_{timestamp}.mp4"
614
  output_path = os.path.join(MEDIA_DIR, output_filename)
615
  shutil.copy(video_path, output_path)
616
  print(f"Video copied to: {output_path}")
617
+
 
618
  try:
619
+ if temp_work_dir and os.path.exists(temp_work_dir):
620
+ shutil.rmtree(temp_work_dir)
621
  print("Cleaned up temp directory")
622
  except Exception as e:
623
  print(f"Failed to clean temp dir: {e}")
 
628
  as_attachment=False,
629
  download_name=output_filename
630
  )
631
+
632
  except subprocess.TimeoutExpired:
633
  print("Video rendering timeout")
634
+ if temp_work_dir and os.path.exists(temp_work_dir):
635
+ try:
636
+ shutil.rmtree(temp_work_dir)
637
+ except:
638
+ pass
639
  return jsonify({"error": "Video rendering timeout (120s)"}), 504
640
+
641
  except Exception as e:
642
  print(f"Error: {str(e)}")
643
  traceback.print_exc()
644
+ if temp_work_dir and os.path.exists(temp_work_dir):
645
+ try:
646
+ shutil.rmtree(temp_work_dir)
647
+ except:
648
+ pass
649
  return jsonify({
650
  "error": str(e),
651
  "traceback": traceback.format_exc()
652
  }), 500
653
 
654
+
655
  if __name__ == '__main__':
656
  port = int(os.environ.get('PORT', 7860))
657
+ app.run(host='0.0.0.0', port=port, debug=False)