sreepathi-ravikumar commited on
Commit
05986fb
·
verified ·
1 Parent(s): be98355

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -184
app.py CHANGED
@@ -47,6 +47,7 @@ import json
47
  from concurrent.futures import ThreadPoolExecutor
48
  from functools import lru_cache
49
  from typing import List, Tuple, Optional, Dict
 
50
 
51
  import edge_tts
52
  from pydub import AudioSegment
@@ -103,136 +104,108 @@ def clean_text_for_tts(text: str) -> str:
103
 
104
  return text.strip()
105
 
106
- def split_by_word_boundary(text: str) -> List[str]:
107
  """
108
- Intelligently splits text by language boundaries while preserving code-switched words.
109
- Example: "Voltage னு" → ["Voltage", " னு"]
110
  """
111
  if not text:
112
  return []
113
 
114
  segments = []
115
  current_segment = ""
116
- current_lang = None # 'en', 'ta', or None
117
 
118
- i = 0
119
- while i < len(text):
120
- char = text[i]
 
 
121
 
122
- # Detect language of current character
123
- if '\u0B80' <= char <= '\u0BFF': # Tamil range
124
- char_lang = 'ta'
125
- elif char.isalpha() or char in '-':
126
- char_lang = 'en'
127
  else:
128
- char_lang = current_lang # Punctuation/space keeps current language
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
  # Start new segment on language boundary
131
- if current_lang and char_lang and current_lang != char_lang:
132
- # Don't split on hyphens in code-switched words like "simple-ஆ"
133
- if char == '-' and i > 0 and i < len(text) - 1:
134
- # Check if it's a code-switched hyphen (English-Tamil)
135
- prev_char = text[i-1]
136
- next_char = text[i+1]
137
- if prev_char.isalpha() and ('\u0B80' <= next_char <= '\u0BFF'):
138
- # Keep hyphen with current segment
139
- current_segment += char
140
- i += 1
141
- continue
142
-
143
  if current_segment.strip():
144
- segments.append(current_segment)
145
- current_segment = char
146
- current_lang = char_lang
147
  else:
148
- current_segment += char
149
- current_lang = char_lang or current_lang
150
-
151
- i += 1
 
152
 
 
153
  if current_segment.strip():
154
- segments.append(current_segment)
155
 
156
  return segments
157
 
158
- def chunk_text_with_overlap(text: str, max_chars: int = 250) -> List[Tuple[str, int]]:
159
  """
160
- Creates chunks with overlap for smooth transitions.
161
- Returns list of (chunk_text, chunk_index)
162
  """
163
- # Clean first
164
  cleaned = clean_text_for_tts(text)
165
  if not cleaned:
166
  return []
167
 
168
- # Split into segments by language boundary
169
- segments = split_by_word_boundary(cleaned)
170
 
171
- # Group segments into chunks
172
  chunks = []
173
  current_chunk = ""
174
- current_words = []
 
175
 
176
- for segment in segments:
177
- test_chunk = current_chunk + segment if current_chunk else segment
178
- test_words = test_chunk.split()
179
-
180
- if len(test_chunk) <= max_chars and len(test_words) <= 20:
181
- current_chunk = test_chunk
182
- current_words = test_words
183
- else:
184
- # Need to start new chunk
185
  if current_chunk:
186
- chunks.append(current_chunk)
 
187
 
188
- # Handle long segments
189
- if len(segment) > max_chars:
190
- # Split long segment by words
191
- words = segment.split()
192
- temp_chunk = ""
193
- temp_words = []
194
-
195
- for word in words:
196
- test = temp_chunk + " " + word if temp_chunk else word
197
- if len(test) <= max_chars:
198
- temp_chunk = test
199
- temp_words.append(word)
200
- else:
201
- if temp_chunk:
202
- chunks.append(temp_chunk)
203
- temp_chunk = word
204
- temp_words = [word]
205
-
206
- if temp_chunk:
207
- current_chunk = temp_chunk
208
- current_words = temp_words
209
  else:
210
  current_chunk = segment
211
- current_words = segment.split()
212
 
213
  # Add final chunk
214
  if current_chunk:
215
- chunks.append(current_chunk)
216
 
217
- # Add overlap between chunks (last 3 words of chunk N become first 3 words of chunk N+1)
218
- overlapped_chunks = []
219
- for i, chunk in enumerate(chunks):
220
- if i > 0:
221
- # Get last 3 words from previous chunk
222
- prev_chunk = chunks[i-1]
223
- prev_words = prev_chunk.split()
224
- overlap_words = prev_words[-3:] if len(prev_words) >= 3 else prev_words
225
-
226
- if overlap_words:
227
- overlap_text = " ".join(overlap_words)
228
- # Add overlap if it won't make the chunk too long
229
- test_chunk = overlap_text + " " + chunk
230
- if len(test_chunk) <= max_chars:
231
- chunk = test_chunk
232
-
233
- overlapped_chunks.append((chunk, i))
234
-
235
- return overlapped_chunks
236
 
237
  async def generate_safe_audio(text: str, voice: str, semaphore: asyncio.Semaphore,
238
  chunk_index: int) -> Tuple[Optional[str], int]:
@@ -314,38 +287,31 @@ async def bilingual_tts_optimized(text: str, output_file: str = "audio0.mp3",
314
  print("Starting bilingual TTS processing...")
315
 
316
  try:
317
- # Split text into chunks with overlap
318
- chunks_with_indices = chunk_text_with_overlap(text, max_chars=250)
319
- if not chunks_with_indices:
320
  print("Error: No valid text chunks after processing")
321
  return None
322
 
323
- print(f"Processing {len(chunks_with_indices)} text chunks...")
324
 
325
- # Determine which chunks need Tamil voice
326
- chunks_to_generate = []
327
- for chunk_text, chunk_index in chunks_with_indices:
328
- has_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk_text)
329
-
330
- if VOICE_TA and has_tamil:
 
331
  voice = VOICE_TA
332
  else:
333
  voice = VOICE_TA or VOICE_EN
334
 
335
- chunks_to_generate.append((chunk_text, voice, chunk_index))
336
-
337
- # Semaphore for rate limiting
338
- semaphore = asyncio.Semaphore(max_concurrent)
339
-
340
- # Prepare tasks
341
- tasks = []
342
- for chunk_text, voice, chunk_index in chunks_to_generate:
343
  tasks.append(generate_safe_audio(chunk_text, voice, semaphore, chunk_index))
344
 
345
  # Generate all audio files
346
  results = await asyncio.gather(*tasks, return_exceptions=False)
347
 
348
- # Filter successful results and maintain order
349
  audio_data = []
350
  for result in results:
351
  if isinstance(result, tuple) and result[0] and os.path.exists(result[0]):
@@ -355,7 +321,7 @@ async def bilingual_tts_optimized(text: str, output_file: str = "audio0.mp3",
355
  print("Error: No audio was successfully generated")
356
  return None
357
 
358
- # Sort by chunk index
359
  audio_data.sort(key=lambda x: x[1])
360
 
361
  print(f"Successfully generated {len(audio_data)} audio segments")
@@ -364,7 +330,7 @@ async def bilingual_tts_optimized(text: str, output_file: str = "audio0.mp3",
364
  with ThreadPoolExecutor(max_workers=min(len(audio_data), 8)) as executor:
365
  processed = list(executor.map(process_audio_segment_fast, audio_data))
366
 
367
- # Filter and sort
368
  processed = [(seg, idx) for seg, idx in processed if seg is not None]
369
  processed.sort(key=lambda x: x[1])
370
 
@@ -374,25 +340,26 @@ async def bilingual_tts_optimized(text: str, output_file: str = "audio0.mp3",
374
  print("Error: No audio segments were successfully processed")
375
  return None
376
 
377
- print(f"Merging {len(audio_segments)} audio segments with crossfade...")
378
 
379
- # Merge with crossfade for smooth transitions
380
  merged_audio = audio_segments[0]
381
 
382
- for segment in audio_segments[1:]:
383
- # Crossfade 30ms for smooth transition
384
- merged_audio = merged_audio.append(segment, crossfade=30)
 
385
 
386
  # Apply compression for consistent volume
387
  try:
388
  merged_audio = merged_audio.compress_dynamic_range(
389
  threshold=-20.0,
390
- ratio=2.5, # Gentler compression for more natural sound
391
  attack=5.0,
392
  release=50.0
393
  )
394
  except:
395
- pass # Skip if compression fails
396
 
397
  merged_audio = normalize(merged_audio)
398
 
@@ -403,7 +370,7 @@ async def bilingual_tts_optimized(text: str, output_file: str = "audio0.mp3",
403
  print(f"✅ Audio successfully generated: {output_file}")
404
  return output_file
405
  else:
406
- print(f"Error: Generated file is empty or missing")
407
  return None
408
 
409
  except Exception as main_error:
@@ -487,9 +454,8 @@ def audio_func(id: int, lines, lang: str) -> Tuple[Optional[float], Optional[str
487
  print(f"Error in audio_func: {e}")
488
  traceback.print_exc()
489
  return None, None
490
-
491
- def create_manim_script(problem_data, script_path, audio_path, scale=1):
492
- """Generate Manim script from problem data with robust wrapping."""
493
 
494
  settings = problem_data.get("video_settings", {
495
  "background_color": "#0f0f23",
@@ -506,6 +472,27 @@ def create_manim_script(problem_data, script_path, audio_path, scale=1):
506
  if not slides:
507
  raise ValueError("No slides provided in input data")
508
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
509
  slides_repr = repr(slides)
510
  audio_path_repr = repr(audio_path)
511
 
@@ -519,6 +506,7 @@ def create_manim_script(problem_data, script_path, audio_path, scale=1):
519
  title_size = settings.get("title_size", 48)
520
 
521
  manim_code = f"""from manim import *
 
522
  class GeneratedMathScene(Scene):
523
  def construct(self):
524
  # Scene settings
@@ -531,55 +519,47 @@ class GeneratedMathScene(Scene):
531
  equation_size = {equation_size}
532
  title_size = {title_size}
533
  wrap_width = {wrap_width}
534
-
 
535
  def make_inline_segments(content, color, font, text_size, equation_size):
536
  if not content:
537
  return VGroup()
538
-
539
- # Split by # separator
540
  segments = content.split("#")
541
- all_lines = [] # Store all lines
542
- current_line = [] # Current line being built
543
- current_width = 0
544
-
545
  for segment in segments:
546
  segment = segment.strip()
547
  if not segment:
548
  continue
549
-
550
- # Create the mobject (Text or MathTex)
551
  if segment.startswith("%"):
552
  latex_content = segment[1:]
553
  mob = MathTex(latex_content, color=color, font_size=equation_size)
554
  else:
555
  mob = Text(segment, color=color, font=font, font_size=text_size)
556
-
557
- # Calculate what width would be if we add this segment
558
  test_line = current_line + [mob]
559
  test_group = VGroup(*test_line).arrange(RIGHT, buff=0.05)
560
-
561
- # Check if adding this segment exceeds wrap_width
562
  if test_group.width > wrap_width and len(current_line) > 0:
563
- # Save current line and start new line
564
  line_group = VGroup(*current_line).arrange(RIGHT, buff=0.05)
565
  all_lines.append(line_group)
566
- current_line = [mob] # Start new line with current segment
567
  else:
568
- # Add to current line
569
  current_line.append(mob)
570
-
571
- # Add the last line
572
  if current_line:
573
  line_group = VGroup(*current_line).arrange(RIGHT, buff=0.05)
574
  all_lines.append(line_group)
575
-
576
  if not all_lines:
577
  return VGroup()
578
-
579
- # Stack all lines vertically
580
  final_group = VGroup(*all_lines).arrange(DOWN, aligned_edge=LEFT, buff=0.2)
581
  return final_group
582
-
583
  def make_wrapped_paragraph(content, color, font, font_size, line_spacing=0.2):
584
  lines = []
585
  words = content.split()
@@ -603,28 +583,31 @@ class GeneratedMathScene(Scene):
603
  ln.align_to(first_line, LEFT)
604
  para = VGroup(*lines).arrange(DOWN, aligned_edge=LEFT, buff=line_spacing)
605
  return para
606
-
607
  content_group = VGroup()
608
  current_y = 3.0
609
  line_spacing = 0.8
610
  slides = {slides_repr}
611
-
612
  for idx, slide in enumerate(slides):
613
  obj = None
614
  content = slide.get("content", "")
615
  animation = slide.get("animation", "write_left")
616
- scalelen = slide.get("duration", 1.0)
617
- duration = scalelen * {scale}
618
  slide_type = slide.get("type", "text")
619
-
 
 
 
 
 
 
620
  if slide_type == "title":
621
- # Use inline segments for title
622
  obj = make_inline_segments(content, highlight_color, default_font, title_size, equation_size)
623
-
624
- # Fallback to simple text if no inline segments
625
  if len(obj) == 0:
626
  obj = Text(content, color=highlight_color, font=default_font, font_size=title_size)
627
-
628
  if obj.width > wrap_width:
629
  obj.scale_to_fit_width(wrap_width)
630
  obj.move_to(ORIGIN)
@@ -632,19 +615,16 @@ class GeneratedMathScene(Scene):
632
  self.wait(duration * 0.3)
633
  self.play(FadeOut(obj), run_time=duration * 0.3)
634
  continue
635
-
636
  elif slide_type == "text":
637
- # Use inline segments for text
638
  obj = make_inline_segments(content, default_color, default_font, text_size, equation_size)
639
-
640
- # Fallback if no inline segments detected
641
  if len(obj) == 0:
642
  obj = make_wrapped_paragraph(content, default_color, default_font, text_size, line_spacing=0.25)
643
-
644
- # Handle width overflow
645
  if obj.width > wrap_width:
646
  obj.scale_to_fit_width(wrap_width)
647
-
648
  elif slide_type == "equation":
649
  eq_content = content
650
  test = MathTex(eq_content, color=default_color, font_size=equation_size)
@@ -653,25 +633,25 @@ class GeneratedMathScene(Scene):
653
  mid = len(parts) // 2
654
  line1 = " ".join(parts[:mid])
655
  line2 = " ".join(parts[mid:])
656
- wrapped_eq = f"{{{{line1}}}} \\\\\\\\ {{{{line2}}}}"
657
  obj = MathTex(wrapped_eq, color=default_color, font_size=equation_size)
658
  else:
659
  obj = MathTex(eq_content, color=default_color, font_size=equation_size)
660
  if obj.width > wrap_width:
661
  obj.scale_to_fit_width(wrap_width)
662
-
663
  if obj:
664
  obj.to_edge(LEFT, buff=0.3)
665
  obj.shift(UP * (current_y - obj.height / 2))
666
  obj_bottom = obj.get_bottom()[1]
667
-
668
  if obj_bottom < -3.5:
669
  scroll_amount = abs(obj_bottom - (-3.5)) + 0.3
670
  self.play(content_group.animate.shift(UP * scroll_amount), run_time=0.5)
671
  current_y += scroll_amount
672
  obj.shift(UP * scroll_amount)
673
  obj.to_edge(LEFT, buff=0.3)
674
-
675
  if animation == "write_left":
676
  self.play(Write(obj), run_time=duration)
677
  elif animation == "fade_in":
@@ -681,11 +661,11 @@ class GeneratedMathScene(Scene):
681
  self.play(obj.animate.set_color(highlight_color), run_time=duration * 0.4)
682
  else:
683
  self.play(Write(obj), run_time=duration)
684
-
685
  content_group.add(obj)
686
  current_y -= (getattr(obj, "height", 0) + line_spacing)
687
  self.wait(0.3)
688
-
689
  if len(content_group) > 0:
690
  final_box = SurroundingRectangle(content_group[-1], color=highlight_color, buff=0.2)
691
  self.play(Create(final_box), run_time=0.8)
@@ -696,16 +676,14 @@ class GeneratedMathScene(Scene):
696
  with open(script_path, 'w', encoding='utf-8') as f:
697
  f.write(manim_code)
698
  print(f"Generated script at {script_path}")
 
 
 
699
  except Exception as e:
700
  print(f"Error writing script: {e}")
701
  raise
702
 
703
 
704
- @app.route("/")
705
- def home():
706
- return "Flask Manim Video Generator is Running"
707
-
708
-
709
  @app.route("/generate", methods=["POST"])
710
  def generate_video():
711
  temp_work_dir = None
@@ -730,11 +708,9 @@ def generate_video():
730
  return jsonify({"error": f"Failed to parse slide data: {str(e)}"}), 400
731
 
732
  datalst = []
733
- total = 0.0
734
 
735
  for line in range(len(nlist)):
736
  try:
737
- total += float(nlist[line][3])
738
  datalst.append({
739
  "type": nlist[line][0].strip(),
740
  "content": nlist[line][1].strip(),
@@ -744,9 +720,6 @@ def generate_video():
744
  except (IndexError, ValueError) as e:
745
  return jsonify({"error": f"Invalid slide data at index {line}: {str(e)}"}), 400
746
 
747
- if total <= 0:
748
- total = 1.0
749
-
750
  data = {
751
  "video_settings": {
752
  "background_color": "#0f0f23",
@@ -767,24 +740,25 @@ def generate_video():
767
  except:
768
  lang = "English"
769
 
770
- length, audio_path = audio_func(0, lines, lang)
771
 
772
- if not length or not audio_path or not os.path.exists(audio_path):
773
  return jsonify({"error": "Failed to generate audio"}), 500
774
 
775
- scale = float(length) / total if total > 0 else 1.0
776
-
777
  if "slides" not in data or not data["slides"]:
778
  return jsonify({"error": "No slides provided in request"}), 400
779
 
780
  print(f"Received request with {len(data['slides'])} slides")
 
781
 
782
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
783
  temp_work_dir = os.path.join(TEMP_DIR, f"manim_{timestamp}")
784
  os.makedirs(temp_work_dir, exist_ok=True)
785
 
786
  script_path = os.path.join(temp_work_dir, "scene.py")
787
- create_manim_script(data, script_path, audio_path, scale)
 
 
788
  print(f"Created Manim script at {script_path}")
789
 
790
  quality = 'l'
 
47
  from concurrent.futures import ThreadPoolExecutor
48
  from functools import lru_cache
49
  from typing import List, Tuple, Optional, Dict
50
+ import heapq
51
 
52
  import edge_tts
53
  from pydub import AudioSegment
 
104
 
105
  return text.strip()
106
 
107
+ def split_by_language_and_words(text: str) -> List[Tuple[str, str]]:
108
  """
109
+ Intelligently splits text by language boundaries and groups words logically.
110
+ Returns list of (text_segment, language)
111
  """
112
  if not text:
113
  return []
114
 
115
  segments = []
116
  current_segment = ""
117
+ current_lang = None
118
 
119
+ words = text.split()
120
+
121
+ for word in words:
122
+ # Check if word contains Tamil characters
123
+ has_tamil = any('\u0B80' <= char <= '\u0BFF' for char in word)
124
 
125
+ # Determine language for this word
126
+ if has_tamil:
127
+ word_lang = 'ta'
 
 
128
  else:
129
+ word_lang = 'en'
130
+
131
+ # Check for code-switched hyphenated words like "simple-ஆ"
132
+ if '-' in word:
133
+ parts = word.split('-')
134
+ if len(parts) == 2:
135
+ first_has_tamil = any('\u0B80' <= char <= '\u0BFF' for char in parts[0])
136
+ second_has_tamil = any('\u0B80' <= char <= '\u0BFF' for char in parts[1])
137
+
138
+ if first_has_tamil and not second_has_tamil:
139
+ word_lang = 'ta' # Tamil-English
140
+ elif not first_has_tamil and second_has_tamil:
141
+ word_lang = 'ta' # English-Tamil
142
+ elif first_has_tamil and second_has_tamil:
143
+ word_lang = 'ta'
144
+ else:
145
+ word_lang = 'en'
146
 
147
  # Start new segment on language boundary
148
+ if current_lang and current_lang != word_lang:
 
 
 
 
 
 
 
 
 
 
 
149
  if current_segment.strip():
150
+ segments.append((current_segment.strip(), current_lang))
151
+ current_segment = word
152
+ current_lang = word_lang
153
  else:
154
+ if current_segment:
155
+ current_segment += " " + word
156
+ else:
157
+ current_segment = word
158
+ current_lang = word_lang or current_lang
159
 
160
+ # Add final segment
161
  if current_segment.strip():
162
+ segments.append((current_segment.strip(), current_lang))
163
 
164
  return segments
165
 
166
+ def create_intelligent_chunks(text: str, max_chars: int = 250) -> List[Tuple[str, int, str]]:
167
  """
168
+ Create chunks that respect language boundaries and logical grouping.
169
+ Returns list of (chunk_text, chunk_index, language)
170
  """
 
171
  cleaned = clean_text_for_tts(text)
172
  if not cleaned:
173
  return []
174
 
175
+ # Split into language-based segments
176
+ language_segments = split_by_language_and_words(cleaned)
177
 
 
178
  chunks = []
179
  current_chunk = ""
180
+ current_lang = None
181
+ chunk_index = 0
182
 
183
+ for segment, seg_lang in language_segments:
184
+ if not segment:
185
+ continue
186
+
187
+ # If this is a new language or chunk would be too long, start new chunk
188
+ if (current_lang and current_lang != seg_lang) or \
189
+ (current_chunk and len(current_chunk + " " + segment) > max_chars):
190
+
 
191
  if current_chunk:
192
+ chunks.append((current_chunk, chunk_index, current_lang))
193
+ chunk_index += 1
194
 
195
+ current_chunk = segment
196
+ current_lang = seg_lang
197
+ else:
198
+ if current_chunk:
199
+ current_chunk += " " + segment
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  else:
201
  current_chunk = segment
202
+ current_lang = seg_lang
203
 
204
  # Add final chunk
205
  if current_chunk:
206
+ chunks.append((current_chunk, chunk_index, current_lang))
207
 
208
+ return chunks
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
  async def generate_safe_audio(text: str, voice: str, semaphore: asyncio.Semaphore,
211
  chunk_index: int) -> Tuple[Optional[str], int]:
 
287
  print("Starting bilingual TTS processing...")
288
 
289
  try:
290
+ # Create intelligent chunks
291
+ chunks_info = create_intelligent_chunks(text, max_chars=250)
292
+ if not chunks_info:
293
  print("Error: No valid text chunks after processing")
294
  return None
295
 
296
+ print(f"Processing {len(chunks_info)} text chunks...")
297
 
298
+ # Prepare tasks with proper voice assignment
299
+ tasks = []
300
+ semaphore = asyncio.Semaphore(max_concurrent)
301
+
302
+ for chunk_text, chunk_index, chunk_lang in chunks_info:
303
+ # Determine voice for this chunk
304
+ if VOICE_TA and chunk_lang == 'ta':
305
  voice = VOICE_TA
306
  else:
307
  voice = VOICE_TA or VOICE_EN
308
 
 
 
 
 
 
 
 
 
309
  tasks.append(generate_safe_audio(chunk_text, voice, semaphore, chunk_index))
310
 
311
  # Generate all audio files
312
  results = await asyncio.gather(*tasks, return_exceptions=False)
313
 
314
+ # Filter successful results and sort by INTEGER index (not string!)
315
  audio_data = []
316
  for result in results:
317
  if isinstance(result, tuple) and result[0] and os.path.exists(result[0]):
 
321
  print("Error: No audio was successfully generated")
322
  return None
323
 
324
+ # Sort by chunk index (integer)
325
  audio_data.sort(key=lambda x: x[1])
326
 
327
  print(f"Successfully generated {len(audio_data)} audio segments")
 
330
  with ThreadPoolExecutor(max_workers=min(len(audio_data), 8)) as executor:
331
  processed = list(executor.map(process_audio_segment_fast, audio_data))
332
 
333
+ # Filter and sort by index
334
  processed = [(seg, idx) for seg, idx in processed if seg is not None]
335
  processed.sort(key=lambda x: x[1])
336
 
 
340
  print("Error: No audio segments were successfully processed")
341
  return None
342
 
343
+ print(f"Merging {len(audio_segments)} audio segments...")
344
 
345
+ # Merge segments in correct order
346
  merged_audio = audio_segments[0]
347
 
348
+ for i in range(1, len(audio_segments)):
349
+ # Add a small pause between segments
350
+ pause = AudioSegment.silent(duration=100)
351
+ merged_audio = merged_audio + pause + audio_segments[i]
352
 
353
  # Apply compression for consistent volume
354
  try:
355
  merged_audio = merged_audio.compress_dynamic_range(
356
  threshold=-20.0,
357
+ ratio=2.5,
358
  attack=5.0,
359
  release=50.0
360
  )
361
  except:
362
+ pass
363
 
364
  merged_audio = normalize(merged_audio)
365
 
 
370
  print(f"✅ Audio successfully generated: {output_file}")
371
  return output_file
372
  else:
373
+ print("Error: Generated file is empty or missing")
374
  return None
375
 
376
  except Exception as main_error:
 
454
  print(f"Error in audio_func: {e}")
455
  traceback.print_exc()
456
  return None, None
457
+ def create_manim_script(problem_data, script_path, audio_path, audio_length):
458
+ """Generate Manim script with selective timing adjustment - only equations scale to audio."""
 
459
 
460
  settings = problem_data.get("video_settings", {
461
  "background_color": "#0f0f23",
 
472
  if not slides:
473
  raise ValueError("No slides provided in input data")
474
 
475
+ # Calculate separate durations for different slide types
476
+ equation_duration = 0.0
477
+ text_title_duration = 0.0
478
+
479
+ for slide in slides:
480
+ slide_duration = float(slide.get("duration", 1.0))
481
+ if slide.get("type") == "equation":
482
+ equation_duration += slide_duration
483
+ else: # text or title
484
+ text_title_duration += slide_duration
485
+
486
+ # Calculate equation scale factor to fill remaining audio time
487
+ target_equation_time = audio_length - text_title_duration
488
+
489
+ if equation_duration > 0 and target_equation_time > 0:
490
+ equation_scale = target_equation_time / equation_duration
491
+ # Prevent extreme scaling (between 0.5x and 2.5x)
492
+ equation_scale = max(0.5, min(2.5, equation_scale))
493
+ else:
494
+ equation_scale = 1.0
495
+
496
  slides_repr = repr(slides)
497
  audio_path_repr = repr(audio_path)
498
 
 
506
  title_size = settings.get("title_size", 48)
507
 
508
  manim_code = f"""from manim import *
509
+
510
  class GeneratedMathScene(Scene):
511
  def construct(self):
512
  # Scene settings
 
519
  equation_size = {equation_size}
520
  title_size = {title_size}
521
  wrap_width = {wrap_width}
522
+ equation_scale = {equation_scale} # Only equations scale to audio
523
+
524
  def make_inline_segments(content, color, font, text_size, equation_size):
525
  if not content:
526
  return VGroup()
527
+
 
528
  segments = content.split("#")
529
+ all_lines = []
530
+ current_line = []
531
+
 
532
  for segment in segments:
533
  segment = segment.strip()
534
  if not segment:
535
  continue
536
+
 
537
  if segment.startswith("%"):
538
  latex_content = segment[1:]
539
  mob = MathTex(latex_content, color=color, font_size=equation_size)
540
  else:
541
  mob = Text(segment, color=color, font=font, font_size=text_size)
542
+
 
543
  test_line = current_line + [mob]
544
  test_group = VGroup(*test_line).arrange(RIGHT, buff=0.05)
545
+
 
546
  if test_group.width > wrap_width and len(current_line) > 0:
 
547
  line_group = VGroup(*current_line).arrange(RIGHT, buff=0.05)
548
  all_lines.append(line_group)
549
+ current_line = [mob]
550
  else:
 
551
  current_line.append(mob)
552
+
 
553
  if current_line:
554
  line_group = VGroup(*current_line).arrange(RIGHT, buff=0.05)
555
  all_lines.append(line_group)
556
+
557
  if not all_lines:
558
  return VGroup()
559
+
 
560
  final_group = VGroup(*all_lines).arrange(DOWN, aligned_edge=LEFT, buff=0.2)
561
  return final_group
562
+
563
  def make_wrapped_paragraph(content, color, font, font_size, line_spacing=0.2):
564
  lines = []
565
  words = content.split()
 
583
  ln.align_to(first_line, LEFT)
584
  para = VGroup(*lines).arrange(DOWN, aligned_edge=LEFT, buff=line_spacing)
585
  return para
586
+
587
  content_group = VGroup()
588
  current_y = 3.0
589
  line_spacing = 0.8
590
  slides = {slides_repr}
591
+
592
  for idx, slide in enumerate(slides):
593
  obj = None
594
  content = slide.get("content", "")
595
  animation = slide.get("animation", "write_left")
596
+ base_duration = slide.get("duration", 1.0)
 
597
  slide_type = slide.get("type", "text")
598
+
599
+ # Apply scale ONLY to equations, not text or title
600
+ if slide_type == "equation":
601
+ duration = base_duration * equation_scale
602
+ else:
603
+ duration = base_duration # Keep original timing for text/title
604
+
605
  if slide_type == "title":
 
606
  obj = make_inline_segments(content, highlight_color, default_font, title_size, equation_size)
607
+
 
608
  if len(obj) == 0:
609
  obj = Text(content, color=highlight_color, font=default_font, font_size=title_size)
610
+
611
  if obj.width > wrap_width:
612
  obj.scale_to_fit_width(wrap_width)
613
  obj.move_to(ORIGIN)
 
615
  self.wait(duration * 0.3)
616
  self.play(FadeOut(obj), run_time=duration * 0.3)
617
  continue
618
+
619
  elif slide_type == "text":
 
620
  obj = make_inline_segments(content, default_color, default_font, text_size, equation_size)
621
+
 
622
  if len(obj) == 0:
623
  obj = make_wrapped_paragraph(content, default_color, default_font, text_size, line_spacing=0.25)
624
+
 
625
  if obj.width > wrap_width:
626
  obj.scale_to_fit_width(wrap_width)
627
+
628
  elif slide_type == "equation":
629
  eq_content = content
630
  test = MathTex(eq_content, color=default_color, font_size=equation_size)
 
633
  mid = len(parts) // 2
634
  line1 = " ".join(parts[:mid])
635
  line2 = " ".join(parts[mid:])
636
+ wrapped_eq = f"{{{{line1}}}} \\\\ {{{{line2}}}}"
637
  obj = MathTex(wrapped_eq, color=default_color, font_size=equation_size)
638
  else:
639
  obj = MathTex(eq_content, color=default_color, font_size=equation_size)
640
  if obj.width > wrap_width:
641
  obj.scale_to_fit_width(wrap_width)
642
+
643
  if obj:
644
  obj.to_edge(LEFT, buff=0.3)
645
  obj.shift(UP * (current_y - obj.height / 2))
646
  obj_bottom = obj.get_bottom()[1]
647
+
648
  if obj_bottom < -3.5:
649
  scroll_amount = abs(obj_bottom - (-3.5)) + 0.3
650
  self.play(content_group.animate.shift(UP * scroll_amount), run_time=0.5)
651
  current_y += scroll_amount
652
  obj.shift(UP * scroll_amount)
653
  obj.to_edge(LEFT, buff=0.3)
654
+
655
  if animation == "write_left":
656
  self.play(Write(obj), run_time=duration)
657
  elif animation == "fade_in":
 
661
  self.play(obj.animate.set_color(highlight_color), run_time=duration * 0.4)
662
  else:
663
  self.play(Write(obj), run_time=duration)
664
+
665
  content_group.add(obj)
666
  current_y -= (getattr(obj, "height", 0) + line_spacing)
667
  self.wait(0.3)
668
+
669
  if len(content_group) > 0:
670
  final_box = SurroundingRectangle(content_group[-1], color=highlight_color, buff=0.2)
671
  self.play(Create(final_box), run_time=0.8)
 
676
  with open(script_path, 'w', encoding='utf-8') as f:
677
  f.write(manim_code)
678
  print(f"Generated script at {script_path}")
679
+ print(f"Equation scale factor: {equation_scale:.2f}x")
680
+ print(f"Text/Title duration: {text_title_duration:.2f}s (unchanged)")
681
+ print(f"Equation duration: {equation_duration:.2f}s -> {equation_duration * equation_scale:.2f}s")
682
  except Exception as e:
683
  print(f"Error writing script: {e}")
684
  raise
685
 
686
 
 
 
 
 
 
687
  @app.route("/generate", methods=["POST"])
688
  def generate_video():
689
  temp_work_dir = None
 
708
  return jsonify({"error": f"Failed to parse slide data: {str(e)}"}), 400
709
 
710
  datalst = []
 
711
 
712
  for line in range(len(nlist)):
713
  try:
 
714
  datalst.append({
715
  "type": nlist[line][0].strip(),
716
  "content": nlist[line][1].strip(),
 
720
  except (IndexError, ValueError) as e:
721
  return jsonify({"error": f"Invalid slide data at index {line}: {str(e)}"}), 400
722
 
 
 
 
723
  data = {
724
  "video_settings": {
725
  "background_color": "#0f0f23",
 
740
  except:
741
  lang = "English"
742
 
743
+ audio_length, audio_path = audio_func(0, lines, lang)
744
 
745
+ if not audio_length or not audio_path or not os.path.exists(audio_path):
746
  return jsonify({"error": "Failed to generate audio"}), 500
747
 
 
 
748
  if "slides" not in data or not data["slides"]:
749
  return jsonify({"error": "No slides provided in request"}), 400
750
 
751
  print(f"Received request with {len(data['slides'])} slides")
752
+ print(f"Audio length: {audio_length}s")
753
 
754
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
755
  temp_work_dir = os.path.join(TEMP_DIR, f"manim_{timestamp}")
756
  os.makedirs(temp_work_dir, exist_ok=True)
757
 
758
  script_path = os.path.join(temp_work_dir, "scene.py")
759
+
760
+ # Pass audio_length instead of scale
761
+ create_manim_script(data, script_path, audio_path, audio_length)
762
  print(f"Created Manim script at {script_path}")
763
 
764
  quality = 'l'