Vicente Alvarez commited on
Commit
9d8a71e
·
1 Parent(s): acfd94b

Move Whisper to GPU session (30s reservation) - 20-30s for 10min audio vs 30-40min on CPU

Browse files
Files changed (1) hide show
  1. app.py +9 -8
app.py CHANGED
@@ -396,16 +396,17 @@ def loop_clips_with_audio_track(clip_paths: list[str], audio_path: str) -> str:
396
  return clip_paths[0] if clip_paths else None
397
 
398
 
399
- def transcribe_with_whisper(video_path: str, model_size: str = "small") -> list[dict]:
400
- """Transcribe video audio with Whisper. Returns segments with timestamps."""
 
401
  import whisper
402
 
403
  try:
404
- print(f"[whisper] Loading {model_size} model...")
405
- model = whisper.load_model(model_size)
406
 
407
- print(f"[whisper] Transcribing audio...")
408
- result = model.transcribe(video_path, word_timestamps=True)
409
 
410
  print(f"[whisper] Transcription complete: {len(result['segments'])} segments")
411
  return result['segments']
@@ -689,10 +690,10 @@ def full_generation_process(
689
  if add_subtitles or watermark:
690
  print("[CPU] Adding subtitles/watermark...")
691
 
692
- # Transcribe if subtitles requested
693
  subtitle_file = None
694
  if add_subtitles:
695
- segments = transcribe_with_whisper(final_video, model_size="small")
696
  if segments:
697
  subtitle_file = tempfile.mktemp(suffix=".ass")
698
  create_beautiful_ass_subtitles(segments, subtitle_file, int(width), int(height))
 
396
  return clip_paths[0] if clip_paths else None
397
 
398
 
399
+ @spaces.GPU(duration=30)
400
+ def transcribe_with_whisper_gpu(video_path: str, model_size: str = "small") -> list[dict]:
401
+ """Transcribe video audio with Whisper on GPU. Returns segments with timestamps."""
402
  import whisper
403
 
404
  try:
405
+ print(f"[whisper] Loading {model_size} model on GPU...")
406
+ model = whisper.load_model(model_size).to('cuda')
407
 
408
+ print(f"[whisper] Transcribing audio on GPU...")
409
+ result = model.transcribe(video_path, word_timestamps=True, fp16=True)
410
 
411
  print(f"[whisper] Transcription complete: {len(result['segments'])} segments")
412
  return result['segments']
 
690
  if add_subtitles or watermark:
691
  print("[CPU] Adding subtitles/watermark...")
692
 
693
+ # Transcribe if subtitles requested (GPU work - fast!)
694
  subtitle_file = None
695
  if add_subtitles:
696
+ segments = transcribe_with_whisper_gpu(final_video, model_size="small")
697
  if segments:
698
  subtitle_file = tempfile.mktemp(suffix=".ass")
699
  create_beautiful_ass_subtitles(segments, subtitle_file, int(width), int(height))