Spaces:

vclmax
/

Element-8-Video

Running on Zero

App Files Files Community

Vicente Alvarez commited on 21 days ago

Commit

75c4c3f

1 Parent(s): 47cb0be

Move Whisper inside main GPU session (90s) - no separate GPU call, all in one reservation

Browse files

Files changed (1) hide show

app.py +20 -14

app.py CHANGED Viewed

@@ -396,9 +396,8 @@ def loop_clips_with_audio_track(clip_paths: list[str], audio_path: str) -> str:
         return clip_paths[0] if clip_paths else None
-@spaces.GPU(duration=30)
 def transcribe_with_whisper_gpu(video_path: str, model_size: str = "small") -> list[dict]:
-    """Transcribe video audio with Whisper on GPU. Returns segments with timestamps."""
     import whisper
     try:
@@ -537,6 +536,7 @@ def generate_video(
     negative_prompt: str = DEFAULT_NEGATIVE_PROMPT,
     blur_amount: int = 0,
     remove_music: bool = False,
     progress=gr.Progress(track_tqdm=True),
 ):
     try:
@@ -632,14 +632,22 @@ def generate_video(
             generated_clips.append(str(output_path))
-        # Return all generated clips
-        return generated_clips, base_seed
     except Exception as e:
         import traceback
         log_memory("on error")
         print(f"Error: {str(e)}\n{traceback.format_exc()}")
-        return [], base_seed
 def full_generation_process(
@@ -671,11 +679,11 @@ def full_generation_process(
     print(f"Generating {len(prompts)} clip(s)")
-    # Phase 1: Generate clips (GPU time counted)
-    clips, final_seed = generate_video(
         first_image, last_image, prompts, duration, enhance_prompt,
         seed, randomize_seed, height, width, negative_prompt,
-        blur_amount, remove_music, progress
     )
     if not clips:
@@ -694,13 +702,11 @@ def full_generation_process(
     if add_subtitles or watermark:
         print("[CPU] Adding subtitles/watermark...")
-        # Transcribe if subtitles requested (GPU work - fast!)
         subtitle_file = None
-        if add_subtitles:
-            segments = transcribe_with_whisper_gpu(final_video, model_size="small")
-            if segments:
-                subtitle_file = tempfile.mktemp(suffix=".ass")
-                create_beautiful_ass_subtitles(segments, subtitle_file, int(width), int(height))
         # Burn subtitles and/or watermark
         output_with_extras = tempfile.mktemp(suffix=".mp4")

         return clip_paths[0] if clip_paths else None
 def transcribe_with_whisper_gpu(video_path: str, model_size: str = "small") -> list[dict]:
+    """Transcribe video audio with Whisper on GPU (already inside GPU context). Returns segments with timestamps."""
     import whisper
     try:
     negative_prompt: str = DEFAULT_NEGATIVE_PROMPT,
     blur_amount: int = 0,
     remove_music: bool = False,
+    add_subtitles: bool = False,
     progress=gr.Progress(track_tqdm=True),
 ):
     try:
             generated_clips.append(str(output_path))
+        # Transcribe with Whisper if requested (still within GPU context)
+        subtitle_segments = []
+        if add_subtitles and generated_clips:
+            print("[GPU] Transcribing with Whisper...")
+            # Transcribe the first clip (or you could transcribe all clips)
+            subtitle_segments = transcribe_with_whisper_gpu(generated_clips[0], model_size="small")
+            log_memory("after whisper")
+        # Return all generated clips and subtitle segments
+        return generated_clips, subtitle_segments, base_seed
     except Exception as e:
         import traceback
         log_memory("on error")
         print(f"Error: {str(e)}\n{traceback.format_exc()}")
+        return [], [], base_seed
 def full_generation_process(
     print(f"Generating {len(prompts)} clip(s)")
+    # Phase 1: Generate clips + transcribe (GPU time counted)
+    clips, subtitle_segments, final_seed = generate_video(
         first_image, last_image, prompts, duration, enhance_prompt,
         seed, randomize_seed, height, width, negative_prompt,
+        blur_amount, remove_music, add_subtitles, progress
     )
     if not clips:
     if add_subtitles or watermark:
         print("[CPU] Adding subtitles/watermark...")
+        # Use subtitle segments from GPU transcription
         subtitle_file = None
+        if add_subtitles and subtitle_segments:
+            subtitle_file = tempfile.mktemp(suffix=".ass")
+            create_beautiful_ass_subtitles(subtitle_segments, subtitle_file, int(width), int(height))
         # Burn subtitles and/or watermark
         output_with_extras = tempfile.mktemp(suffix=".mp4")