Vicente Alvarez commited on
Commit
75c4c3f
·
1 Parent(s): 47cb0be

Move Whisper inside main GPU session (90s) - no separate GPU call, all in one reservation

Browse files
Files changed (1) hide show
  1. app.py +20 -14
app.py CHANGED
@@ -396,9 +396,8 @@ def loop_clips_with_audio_track(clip_paths: list[str], audio_path: str) -> str:
396
  return clip_paths[0] if clip_paths else None
397
 
398
 
399
- @spaces.GPU(duration=30)
400
  def transcribe_with_whisper_gpu(video_path: str, model_size: str = "small") -> list[dict]:
401
- """Transcribe video audio with Whisper on GPU. Returns segments with timestamps."""
402
  import whisper
403
 
404
  try:
@@ -537,6 +536,7 @@ def generate_video(
537
  negative_prompt: str = DEFAULT_NEGATIVE_PROMPT,
538
  blur_amount: int = 0,
539
  remove_music: bool = False,
 
540
  progress=gr.Progress(track_tqdm=True),
541
  ):
542
  try:
@@ -632,14 +632,22 @@ def generate_video(
632
 
633
  generated_clips.append(str(output_path))
634
 
635
- # Return all generated clips
636
- return generated_clips, base_seed
 
 
 
 
 
 
 
 
637
 
638
  except Exception as e:
639
  import traceback
640
  log_memory("on error")
641
  print(f"Error: {str(e)}\n{traceback.format_exc()}")
642
- return [], base_seed
643
 
644
 
645
  def full_generation_process(
@@ -671,11 +679,11 @@ def full_generation_process(
671
 
672
  print(f"Generating {len(prompts)} clip(s)")
673
 
674
- # Phase 1: Generate clips (GPU time counted)
675
- clips, final_seed = generate_video(
676
  first_image, last_image, prompts, duration, enhance_prompt,
677
  seed, randomize_seed, height, width, negative_prompt,
678
- blur_amount, remove_music, progress
679
  )
680
 
681
  if not clips:
@@ -694,13 +702,11 @@ def full_generation_process(
694
  if add_subtitles or watermark:
695
  print("[CPU] Adding subtitles/watermark...")
696
 
697
- # Transcribe if subtitles requested (GPU work - fast!)
698
  subtitle_file = None
699
- if add_subtitles:
700
- segments = transcribe_with_whisper_gpu(final_video, model_size="small")
701
- if segments:
702
- subtitle_file = tempfile.mktemp(suffix=".ass")
703
- create_beautiful_ass_subtitles(segments, subtitle_file, int(width), int(height))
704
 
705
  # Burn subtitles and/or watermark
706
  output_with_extras = tempfile.mktemp(suffix=".mp4")
 
396
  return clip_paths[0] if clip_paths else None
397
 
398
 
 
399
  def transcribe_with_whisper_gpu(video_path: str, model_size: str = "small") -> list[dict]:
400
+ """Transcribe video audio with Whisper on GPU (already inside GPU context). Returns segments with timestamps."""
401
  import whisper
402
 
403
  try:
 
536
  negative_prompt: str = DEFAULT_NEGATIVE_PROMPT,
537
  blur_amount: int = 0,
538
  remove_music: bool = False,
539
+ add_subtitles: bool = False,
540
  progress=gr.Progress(track_tqdm=True),
541
  ):
542
  try:
 
632
 
633
  generated_clips.append(str(output_path))
634
 
635
+ # Transcribe with Whisper if requested (still within GPU context)
636
+ subtitle_segments = []
637
+ if add_subtitles and generated_clips:
638
+ print("[GPU] Transcribing with Whisper...")
639
+ # Transcribe the first clip (or you could transcribe all clips)
640
+ subtitle_segments = transcribe_with_whisper_gpu(generated_clips[0], model_size="small")
641
+ log_memory("after whisper")
642
+
643
+ # Return all generated clips and subtitle segments
644
+ return generated_clips, subtitle_segments, base_seed
645
 
646
  except Exception as e:
647
  import traceback
648
  log_memory("on error")
649
  print(f"Error: {str(e)}\n{traceback.format_exc()}")
650
+ return [], [], base_seed
651
 
652
 
653
  def full_generation_process(
 
679
 
680
  print(f"Generating {len(prompts)} clip(s)")
681
 
682
+ # Phase 1: Generate clips + transcribe (GPU time counted)
683
+ clips, subtitle_segments, final_seed = generate_video(
684
  first_image, last_image, prompts, duration, enhance_prompt,
685
  seed, randomize_seed, height, width, negative_prompt,
686
+ blur_amount, remove_music, add_subtitles, progress
687
  )
688
 
689
  if not clips:
 
702
  if add_subtitles or watermark:
703
  print("[CPU] Adding subtitles/watermark...")
704
 
705
+ # Use subtitle segments from GPU transcription
706
  subtitle_file = None
707
+ if add_subtitles and subtitle_segments:
708
+ subtitle_file = tempfile.mktemp(suffix=".ass")
709
+ create_beautiful_ass_subtitles(subtitle_segments, subtitle_file, int(width), int(height))
 
 
710
 
711
  # Burn subtitles and/or watermark
712
  output_with_extras = tempfile.mktemp(suffix=".mp4")