Spaces:
Running on Zero
Running on Zero
Vicente Alvarez commited on
Commit ·
75c4c3f
1
Parent(s): 47cb0be
Move Whisper inside main GPU session (90s) - no separate GPU call, all in one reservation
Browse files
app.py
CHANGED
|
@@ -396,9 +396,8 @@ def loop_clips_with_audio_track(clip_paths: list[str], audio_path: str) -> str:
|
|
| 396 |
return clip_paths[0] if clip_paths else None
|
| 397 |
|
| 398 |
|
| 399 |
-
@spaces.GPU(duration=30)
|
| 400 |
def transcribe_with_whisper_gpu(video_path: str, model_size: str = "small") -> list[dict]:
|
| 401 |
-
"""Transcribe video audio with Whisper on GPU. Returns segments with timestamps."""
|
| 402 |
import whisper
|
| 403 |
|
| 404 |
try:
|
|
@@ -537,6 +536,7 @@ def generate_video(
|
|
| 537 |
negative_prompt: str = DEFAULT_NEGATIVE_PROMPT,
|
| 538 |
blur_amount: int = 0,
|
| 539 |
remove_music: bool = False,
|
|
|
|
| 540 |
progress=gr.Progress(track_tqdm=True),
|
| 541 |
):
|
| 542 |
try:
|
|
@@ -632,14 +632,22 @@ def generate_video(
|
|
| 632 |
|
| 633 |
generated_clips.append(str(output_path))
|
| 634 |
|
| 635 |
-
#
|
| 636 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 637 |
|
| 638 |
except Exception as e:
|
| 639 |
import traceback
|
| 640 |
log_memory("on error")
|
| 641 |
print(f"Error: {str(e)}\n{traceback.format_exc()}")
|
| 642 |
-
return [], base_seed
|
| 643 |
|
| 644 |
|
| 645 |
def full_generation_process(
|
|
@@ -671,11 +679,11 @@ def full_generation_process(
|
|
| 671 |
|
| 672 |
print(f"Generating {len(prompts)} clip(s)")
|
| 673 |
|
| 674 |
-
# Phase 1: Generate clips (GPU time counted)
|
| 675 |
-
clips, final_seed = generate_video(
|
| 676 |
first_image, last_image, prompts, duration, enhance_prompt,
|
| 677 |
seed, randomize_seed, height, width, negative_prompt,
|
| 678 |
-
blur_amount, remove_music, progress
|
| 679 |
)
|
| 680 |
|
| 681 |
if not clips:
|
|
@@ -694,13 +702,11 @@ def full_generation_process(
|
|
| 694 |
if add_subtitles or watermark:
|
| 695 |
print("[CPU] Adding subtitles/watermark...")
|
| 696 |
|
| 697 |
-
#
|
| 698 |
subtitle_file = None
|
| 699 |
-
if add_subtitles:
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
subtitle_file = tempfile.mktemp(suffix=".ass")
|
| 703 |
-
create_beautiful_ass_subtitles(segments, subtitle_file, int(width), int(height))
|
| 704 |
|
| 705 |
# Burn subtitles and/or watermark
|
| 706 |
output_with_extras = tempfile.mktemp(suffix=".mp4")
|
|
|
|
| 396 |
return clip_paths[0] if clip_paths else None
|
| 397 |
|
| 398 |
|
|
|
|
| 399 |
def transcribe_with_whisper_gpu(video_path: str, model_size: str = "small") -> list[dict]:
|
| 400 |
+
"""Transcribe video audio with Whisper on GPU (already inside GPU context). Returns segments with timestamps."""
|
| 401 |
import whisper
|
| 402 |
|
| 403 |
try:
|
|
|
|
| 536 |
negative_prompt: str = DEFAULT_NEGATIVE_PROMPT,
|
| 537 |
blur_amount: int = 0,
|
| 538 |
remove_music: bool = False,
|
| 539 |
+
add_subtitles: bool = False,
|
| 540 |
progress=gr.Progress(track_tqdm=True),
|
| 541 |
):
|
| 542 |
try:
|
|
|
|
| 632 |
|
| 633 |
generated_clips.append(str(output_path))
|
| 634 |
|
| 635 |
+
# Transcribe with Whisper if requested (still within GPU context)
|
| 636 |
+
subtitle_segments = []
|
| 637 |
+
if add_subtitles and generated_clips:
|
| 638 |
+
print("[GPU] Transcribing with Whisper...")
|
| 639 |
+
# Transcribe the first clip (or you could transcribe all clips)
|
| 640 |
+
subtitle_segments = transcribe_with_whisper_gpu(generated_clips[0], model_size="small")
|
| 641 |
+
log_memory("after whisper")
|
| 642 |
+
|
| 643 |
+
# Return all generated clips and subtitle segments
|
| 644 |
+
return generated_clips, subtitle_segments, base_seed
|
| 645 |
|
| 646 |
except Exception as e:
|
| 647 |
import traceback
|
| 648 |
log_memory("on error")
|
| 649 |
print(f"Error: {str(e)}\n{traceback.format_exc()}")
|
| 650 |
+
return [], [], base_seed
|
| 651 |
|
| 652 |
|
| 653 |
def full_generation_process(
|
|
|
|
| 679 |
|
| 680 |
print(f"Generating {len(prompts)} clip(s)")
|
| 681 |
|
| 682 |
+
# Phase 1: Generate clips + transcribe (GPU time counted)
|
| 683 |
+
clips, subtitle_segments, final_seed = generate_video(
|
| 684 |
first_image, last_image, prompts, duration, enhance_prompt,
|
| 685 |
seed, randomize_seed, height, width, negative_prompt,
|
| 686 |
+
blur_amount, remove_music, add_subtitles, progress
|
| 687 |
)
|
| 688 |
|
| 689 |
if not clips:
|
|
|
|
| 702 |
if add_subtitles or watermark:
|
| 703 |
print("[CPU] Adding subtitles/watermark...")
|
| 704 |
|
| 705 |
+
# Use subtitle segments from GPU transcription
|
| 706 |
subtitle_file = None
|
| 707 |
+
if add_subtitles and subtitle_segments:
|
| 708 |
+
subtitle_file = tempfile.mktemp(suffix=".ass")
|
| 709 |
+
create_beautiful_ass_subtitles(subtitle_segments, subtitle_file, int(width), int(height))
|
|
|
|
|
|
|
| 710 |
|
| 711 |
# Burn subtitles and/or watermark
|
| 712 |
output_with_extras = tempfile.mktemp(suffix=".mp4")
|