Spaces:
Running on Zero
Running on Zero
Vicente Alvarez commited on
Commit ·
9d8a71e
1
Parent(s): acfd94b
Move Whisper to GPU session (30s reservation) - 20-30s for 10min audio vs 30-40min on CPU
Browse files
app.py
CHANGED
|
@@ -396,16 +396,17 @@ def loop_clips_with_audio_track(clip_paths: list[str], audio_path: str) -> str:
|
|
| 396 |
return clip_paths[0] if clip_paths else None
|
| 397 |
|
| 398 |
|
| 399 |
-
|
| 400 |
-
|
|
|
|
| 401 |
import whisper
|
| 402 |
|
| 403 |
try:
|
| 404 |
-
print(f"[whisper] Loading {model_size} model...")
|
| 405 |
-
model = whisper.load_model(model_size)
|
| 406 |
|
| 407 |
-
print(f"[whisper] Transcribing audio...")
|
| 408 |
-
result = model.transcribe(video_path, word_timestamps=True)
|
| 409 |
|
| 410 |
print(f"[whisper] Transcription complete: {len(result['segments'])} segments")
|
| 411 |
return result['segments']
|
|
@@ -689,10 +690,10 @@ def full_generation_process(
|
|
| 689 |
if add_subtitles or watermark:
|
| 690 |
print("[CPU] Adding subtitles/watermark...")
|
| 691 |
|
| 692 |
-
# Transcribe if subtitles requested
|
| 693 |
subtitle_file = None
|
| 694 |
if add_subtitles:
|
| 695 |
-
segments =
|
| 696 |
if segments:
|
| 697 |
subtitle_file = tempfile.mktemp(suffix=".ass")
|
| 698 |
create_beautiful_ass_subtitles(segments, subtitle_file, int(width), int(height))
|
|
|
|
| 396 |
return clip_paths[0] if clip_paths else None
|
| 397 |
|
| 398 |
|
| 399 |
+
@spaces.GPU(duration=30)
|
| 400 |
+
def transcribe_with_whisper_gpu(video_path: str, model_size: str = "small") -> list[dict]:
|
| 401 |
+
"""Transcribe video audio with Whisper on GPU. Returns segments with timestamps."""
|
| 402 |
import whisper
|
| 403 |
|
| 404 |
try:
|
| 405 |
+
print(f"[whisper] Loading {model_size} model on GPU...")
|
| 406 |
+
model = whisper.load_model(model_size).to('cuda')
|
| 407 |
|
| 408 |
+
print(f"[whisper] Transcribing audio on GPU...")
|
| 409 |
+
result = model.transcribe(video_path, word_timestamps=True, fp16=True)
|
| 410 |
|
| 411 |
print(f"[whisper] Transcription complete: {len(result['segments'])} segments")
|
| 412 |
return result['segments']
|
|
|
|
| 690 |
if add_subtitles or watermark:
|
| 691 |
print("[CPU] Adding subtitles/watermark...")
|
| 692 |
|
| 693 |
+
# Transcribe if subtitles requested (GPU work - fast!)
|
| 694 |
subtitle_file = None
|
| 695 |
if add_subtitles:
|
| 696 |
+
segments = transcribe_with_whisper_gpu(final_video, model_size="small")
|
| 697 |
if segments:
|
| 698 |
subtitle_file = tempfile.mktemp(suffix=".ass")
|
| 699 |
create_beautiful_ass_subtitles(segments, subtitle_file, int(width), int(height))
|