Spaces:
Configuration error
Configuration error
Commit ·
ee6d55e
1
Parent(s): 83cbaf8
Fix visual clutter and audio sync: enhanced layout zones, audio concatenation, proper timing
Browse files- backend/compiler.py +23 -8
- backend/main.py +22 -5
- backend/narrator.py +56 -1
- backend/runner.py +10 -1
backend/compiler.py
CHANGED
|
@@ -79,15 +79,21 @@ VISUAL RULES (CRITICAL)
|
|
| 79 |
- Do not let squares, shapes, or arrows clip off-screen.
|
| 80 |
- Standard frame is [-7, 7] horizontally and [-4, 4] vertically. Keep well within this.
|
| 81 |
|
| 82 |
-
2. No overlapping elements.
|
| 83 |
-
-
|
| 84 |
-
-
|
| 85 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
3. Visual accuracy FIRST.
|
| 88 |
- Show geometry clearly.
|
| 89 |
- Avoid rotating or stretching objects unnecessarily.
|
| 90 |
- Avoid random effects.
|
|
|
|
| 91 |
|
| 92 |
────────────────────────────
|
| 93 |
|
|
@@ -95,10 +101,12 @@ ANIMATION RULES
|
|
| 95 |
|
| 96 |
────────────────────────────
|
| 97 |
|
| 98 |
-
1.
|
| 99 |
-
-
|
| 100 |
-
-
|
| 101 |
-
-
|
|
|
|
|
|
|
| 102 |
|
| 103 |
2. Only use these animations:
|
| 104 |
Create
|
|
@@ -112,6 +120,13 @@ ANIMATION RULES
|
|
| 112 |
|
| 113 |
3. No 3D, no camera zoom, no cinematic effects, no physics.
|
| 114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
────────────────────────────
|
| 116 |
|
| 117 |
STRUCTURE & PACING
|
|
|
|
| 79 |
- Do not let squares, shapes, or arrows clip off-screen.
|
| 80 |
- Standard frame is [-7, 7] horizontally and [-4, 4] vertically. Keep well within this.
|
| 81 |
|
| 82 |
+
2. MANDATORY SPACING - No overlapping elements.
|
| 83 |
+
- Use FadeOut() to CLEAR previous content before showing new content
|
| 84 |
+
- Minimum buffer between objects: buff=0.5 (NEVER less than 0.4)
|
| 85 |
+
- Use screen zones:
|
| 86 |
+
* TOP zone (y=3 to y=2): For titles/headers
|
| 87 |
+
* MIDDLE zone (y=1 to y=-1): For main content
|
| 88 |
+
* BOTTOM zone (y=-2 to y=-3): For explanations/labels
|
| 89 |
+
- ALWAYS clear screen between major steps using self.play(FadeOut(VGroup(*self.mobjects)))
|
| 90 |
+
- Position text with .to_edge(UP/DOWN) or .shift(UP*2 / DOWN*2) to avoid center crowding
|
| 91 |
|
| 92 |
3. Visual accuracy FIRST.
|
| 93 |
- Show geometry clearly.
|
| 94 |
- Avoid rotating or stretching objects unnecessarily.
|
| 95 |
- Avoid random effects.
|
| 96 |
+
- Use scale(0.7) for text if needed to prevent overflow
|
| 97 |
|
| 98 |
────────────────────────────
|
| 99 |
|
|
|
|
| 101 |
|
| 102 |
────────────────────────────
|
| 103 |
|
| 104 |
+
1. Timing for audio synchronization:
|
| 105 |
+
- Each narration step gets approximately 3-4 seconds of animation
|
| 106 |
+
- Use run_time=1.0 for Write() and Create()
|
| 107 |
+
- Use run_time=0.8 for FadeIn/FadeOut
|
| 108 |
+
- Add self.wait(1.5) between major steps for narration
|
| 109 |
+
- TOTAL scene duration should be 12-20 seconds
|
| 110 |
|
| 111 |
2. Only use these animations:
|
| 112 |
Create
|
|
|
|
| 120 |
|
| 121 |
3. No 3D, no camera zoom, no cinematic effects, no physics.
|
| 122 |
|
| 123 |
+
4. STRUCTURE each step clearly:
|
| 124 |
+
- Clear previous content with FadeOut
|
| 125 |
+
- Show new title/concept
|
| 126 |
+
- Display visual elements one by one
|
| 127 |
+
- Add wait time for narration
|
| 128 |
+
- Transition to next step
|
| 129 |
+
|
| 130 |
────────────────────────────
|
| 131 |
|
| 132 |
STRUCTURE & PACING
|
backend/main.py
CHANGED
|
@@ -74,15 +74,30 @@ async def process_video_generation(prompt: str):
|
|
| 74 |
# 2. Narrator: Generate per-step narration audio files
|
| 75 |
steps = outline.get("steps", [])
|
| 76 |
step_audio_paths = []
|
| 77 |
-
from narrator import generate_narration_audio
|
|
|
|
|
|
|
| 78 |
for idx, step in enumerate(steps):
|
| 79 |
narration = step.get("narration", "")
|
| 80 |
if narration:
|
| 81 |
audio_filename = f"step_{idx+1}_narration.mp3"
|
| 82 |
audio_path = generate_narration_audio(narration, filename=audio_filename)
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
else:
|
| 85 |
step_audio_paths.append(None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
# 3. Coding
|
| 88 |
job_status["stage"] = "coding"
|
|
@@ -94,10 +109,12 @@ async def process_video_generation(prompt: str):
|
|
| 94 |
job_status["message"] = "Rendering Animation Frames..."
|
| 95 |
video_path = await render_scene(code)
|
| 96 |
|
| 97 |
-
# 5. Merge audio if available
|
| 98 |
-
if
|
| 99 |
from narrator import merge_audio_video
|
| 100 |
-
|
|
|
|
|
|
|
| 101 |
|
| 102 |
# Success
|
| 103 |
relative_path = os.path.relpath(video_path, start=MEDIA_DIR).replace("\\", "/")
|
|
|
|
| 74 |
# 2. Narrator: Generate per-step narration audio files
|
| 75 |
steps = outline.get("steps", [])
|
| 76 |
step_audio_paths = []
|
| 77 |
+
from narrator import generate_narration_audio, concatenate_audio_files, get_audio_duration
|
| 78 |
+
|
| 79 |
+
total_audio_duration = 0.0
|
| 80 |
for idx, step in enumerate(steps):
|
| 81 |
narration = step.get("narration", "")
|
| 82 |
if narration:
|
| 83 |
audio_filename = f"step_{idx+1}_narration.mp3"
|
| 84 |
audio_path = generate_narration_audio(narration, filename=audio_filename)
|
| 85 |
+
if audio_path:
|
| 86 |
+
step_audio_paths.append(audio_path)
|
| 87 |
+
duration = get_audio_duration(audio_path)
|
| 88 |
+
total_audio_duration += duration
|
| 89 |
+
print(f"✓ Step {idx+1} audio: {duration:.2f}s")
|
| 90 |
+
else:
|
| 91 |
+
step_audio_paths.append(None)
|
| 92 |
else:
|
| 93 |
step_audio_paths.append(None)
|
| 94 |
+
|
| 95 |
+
# Combine all step audios into one file
|
| 96 |
+
combined_audio_path = None
|
| 97 |
+
if step_audio_paths and any(step_audio_paths):
|
| 98 |
+
combined_audio_path = concatenate_audio_files(step_audio_paths, output_filename="combined_narration.mp3")
|
| 99 |
+
if combined_audio_path:
|
| 100 |
+
print(f"✓ Combined audio duration: {total_audio_duration:.2f}s")
|
| 101 |
|
| 102 |
# 3. Coding
|
| 103 |
job_status["stage"] = "coding"
|
|
|
|
| 109 |
job_status["message"] = "Rendering Animation Frames..."
|
| 110 |
video_path = await render_scene(code)
|
| 111 |
|
| 112 |
+
# 5. Merge audio with video if available
|
| 113 |
+
if combined_audio_path:
|
| 114 |
from narrator import merge_audio_video
|
| 115 |
+
job_status["message"] = "Merging Audio with Video..."
|
| 116 |
+
video_path = merge_audio_video(video_path, combined_audio_path)
|
| 117 |
+
print(f"✓ Final video with audio: {video_path}")
|
| 118 |
|
| 119 |
# Success
|
| 120 |
relative_path = os.path.relpath(video_path, start=MEDIA_DIR).replace("\\", "/")
|
backend/narrator.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import os
|
| 2 |
from gtts import gTTS
|
| 3 |
-
from moviepy import VideoFileClip, AudioFileClip, CompositeAudioClip
|
| 4 |
import uuid
|
| 5 |
|
| 6 |
# Get the directory of the current script (backend/)
|
|
@@ -11,6 +11,61 @@ AUDIO_DIR = os.path.join(MEDIA_DIR, "audio")
|
|
| 11 |
# Ensure audio directory exists
|
| 12 |
os.makedirs(AUDIO_DIR, exist_ok=True)
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
def generate_narration_audio(text: str, filename: str = None) -> str:
|
| 15 |
"""
|
| 16 |
Generates an MP3 audio file from the given text using gTTS.
|
|
|
|
| 1 |
import os
|
| 2 |
from gtts import gTTS
|
| 3 |
+
from moviepy import VideoFileClip, AudioFileClip, CompositeAudioClip, concatenate_audioclips
|
| 4 |
import uuid
|
| 5 |
|
| 6 |
# Get the directory of the current script (backend/)
|
|
|
|
| 11 |
# Ensure audio directory exists
|
| 12 |
os.makedirs(AUDIO_DIR, exist_ok=True)
|
| 13 |
|
| 14 |
+
def get_audio_duration(audio_path: str) -> float:
|
| 15 |
+
"""Get the duration of an audio file in seconds"""
|
| 16 |
+
try:
|
| 17 |
+
if not audio_path or not os.path.exists(audio_path):
|
| 18 |
+
return 0.0
|
| 19 |
+
audio = AudioFileClip(audio_path)
|
| 20 |
+
duration = audio.duration
|
| 21 |
+
audio.close()
|
| 22 |
+
return duration
|
| 23 |
+
except Exception as e:
|
| 24 |
+
print(f"Error getting audio duration: {e}")
|
| 25 |
+
return 0.0
|
| 26 |
+
|
| 27 |
+
def concatenate_audio_files(audio_paths: list, output_filename: str = None) -> str:
|
| 28 |
+
"""
|
| 29 |
+
Concatenates multiple audio files into one.
|
| 30 |
+
Returns the path to the combined audio file.
|
| 31 |
+
"""
|
| 32 |
+
try:
|
| 33 |
+
# Filter out None values and non-existent files
|
| 34 |
+
valid_paths = [p for p in audio_paths if p and os.path.exists(p)]
|
| 35 |
+
|
| 36 |
+
if not valid_paths:
|
| 37 |
+
print("No valid audio files to concatenate")
|
| 38 |
+
return None
|
| 39 |
+
|
| 40 |
+
if len(valid_paths) == 1:
|
| 41 |
+
# Only one file, just return it
|
| 42 |
+
return valid_paths[0]
|
| 43 |
+
|
| 44 |
+
# Load all audio clips
|
| 45 |
+
clips = [AudioFileClip(path) for path in valid_paths]
|
| 46 |
+
|
| 47 |
+
# Concatenate
|
| 48 |
+
final_audio = concatenate_audioclips(clips)
|
| 49 |
+
|
| 50 |
+
# Generate output path
|
| 51 |
+
if not output_filename:
|
| 52 |
+
run_id = str(uuid.uuid4())
|
| 53 |
+
output_filename = f"combined_narration_{run_id}.mp3"
|
| 54 |
+
|
| 55 |
+
output_path = os.path.join(BASE_DIR, output_filename)
|
| 56 |
+
final_audio.write_audiofile(output_path, logger=None)
|
| 57 |
+
|
| 58 |
+
# Cleanup
|
| 59 |
+
for clip in clips:
|
| 60 |
+
clip.close()
|
| 61 |
+
final_audio.close()
|
| 62 |
+
|
| 63 |
+
return output_path
|
| 64 |
+
|
| 65 |
+
except Exception as e:
|
| 66 |
+
print(f"Error concatenating audio files: {e}")
|
| 67 |
+
return None
|
| 68 |
+
|
| 69 |
def generate_narration_audio(text: str, filename: str = None) -> str:
|
| 70 |
"""
|
| 71 |
Generates an MP3 audio file from the given text using gTTS.
|
backend/runner.py
CHANGED
|
@@ -35,7 +35,7 @@ def cleanup_old_files():
|
|
| 35 |
except Exception as e:
|
| 36 |
print(f"⚠️ Could not remove {old_folder}: {e}")
|
| 37 |
|
| 38 |
-
# Remove old audio files (keep only last 5)
|
| 39 |
audio_files = sorted(glob.glob(os.path.join(BASE_DIR, "step_*_narration.mp3")), key=os.path.getmtime)
|
| 40 |
for old_audio in audio_files[:-5]:
|
| 41 |
try:
|
|
@@ -43,6 +43,15 @@ def cleanup_old_files():
|
|
| 43 |
print(f"🗑️ Cleaned up old audio file: {old_audio}")
|
| 44 |
except Exception as e:
|
| 45 |
print(f"⚠️ Could not remove {old_audio}: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
except Exception as e:
|
| 48 |
print(f"⚠️ Cleanup error: {e}")
|
|
|
|
| 35 |
except Exception as e:
|
| 36 |
print(f"⚠️ Could not remove {old_folder}: {e}")
|
| 37 |
|
| 38 |
+
# Remove old step audio files (keep only last 5)
|
| 39 |
audio_files = sorted(glob.glob(os.path.join(BASE_DIR, "step_*_narration.mp3")), key=os.path.getmtime)
|
| 40 |
for old_audio in audio_files[:-5]:
|
| 41 |
try:
|
|
|
|
| 43 |
print(f"🗑️ Cleaned up old audio file: {old_audio}")
|
| 44 |
except Exception as e:
|
| 45 |
print(f"⚠️ Could not remove {old_audio}: {e}")
|
| 46 |
+
|
| 47 |
+
# Remove old combined audio files (keep only last 2)
|
| 48 |
+
combined_files = sorted(glob.glob(os.path.join(BASE_DIR, "combined_narration*.mp3")), key=os.path.getmtime)
|
| 49 |
+
for old_combined in combined_files[:-2]:
|
| 50 |
+
try:
|
| 51 |
+
os.remove(old_combined)
|
| 52 |
+
print(f"🗑️ Cleaned up old combined audio: {old_combined}")
|
| 53 |
+
except Exception as e:
|
| 54 |
+
print(f"⚠️ Could not remove {old_combined}: {e}")
|
| 55 |
|
| 56 |
except Exception as e:
|
| 57 |
print(f"⚠️ Cleanup error: {e}")
|