Update app.py
Browse filesAttempt to split the GPU token across different video clips so each has their own token.
app.py
CHANGED
|
@@ -39,36 +39,49 @@ INPUT_DIR.mkdir(exist_ok=True)
|
|
| 39 |
# GPU-accelerated steps (decorated only on Spaces)
|
| 40 |
# ---------------------------------------------------------------------------
|
| 41 |
|
| 42 |
-
def
|
| 43 |
-
"""GPU phase: generate all images
|
| 44 |
-
|
| 45 |
-
Combined into one @spaces.GPU call so the GPU token stays valid
|
| 46 |
-
for both steps (acquiring a GPU after long CPU work expires the token).
|
| 47 |
-
"""
|
| 48 |
-
print(f"[GPU] Entered _generate_images_and_videos(run_dir={run_dir}, style={style_name})")
|
| 49 |
-
# --- Images ---
|
| 50 |
if IS_SPACES:
|
| 51 |
from src.image_generator_hf import run as gen_images
|
| 52 |
else:
|
| 53 |
from src.image_generator_api import run as gen_images
|
| 54 |
-
print("[GPU] Starting image generation...")
|
| 55 |
gen_images(run_dir, style_name=style_name)
|
| 56 |
torch.cuda.empty_cache()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
-
# --- Videos ---
|
| 59 |
if IS_SPACES:
|
| 60 |
-
from src.video_generator_hf import
|
| 61 |
-
gen_videos(run_dir)
|
| 62 |
-
from src.video_generator_hf import unload
|
| 63 |
-
unload()
|
| 64 |
else:
|
| 65 |
-
from src.video_generator_api import
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
|
| 69 |
-
# Apply @spaces.GPU decorator on Spaces
|
| 70 |
if IS_SPACES:
|
| 71 |
-
|
|
|
|
| 72 |
|
| 73 |
|
| 74 |
# ---------------------------------------------------------------------------
|
|
@@ -216,16 +229,13 @@ _COLOR_PRESETS = {
|
|
| 216 |
}
|
| 217 |
|
| 218 |
|
| 219 |
-
def
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
"""
|
| 223 |
|
| 224 |
-
Returns
|
| 225 |
-
Path to the final video.
|
| 226 |
"""
|
| 227 |
-
font_name = DEFAULT_FONT
|
| 228 |
-
font_color = DEFAULT_FONT_COLOR
|
| 229 |
style = get_style(style_name)
|
| 230 |
is_resume = run_mode == "Resume Existing"
|
| 231 |
|
|
@@ -241,17 +251,14 @@ def generate(audio_file: str, style_name: str, cover_art: str | None,
|
|
| 241 |
print(f"Resuming {existing_run} from step {step_num}")
|
| 242 |
|
| 243 |
# Always clear assembly output (cheap to redo)
|
| 244 |
-
import shutil
|
| 245 |
out_dir = run_dir / "output"
|
| 246 |
if out_dir.exists():
|
| 247 |
shutil.rmtree(out_dir)
|
| 248 |
-
# Also clear intermediate assembly artifacts
|
| 249 |
for d in ["clips_split", "clips_trimmed"]:
|
| 250 |
p = run_dir / d
|
| 251 |
if p.exists():
|
| 252 |
shutil.rmtree(p)
|
| 253 |
|
| 254 |
-
# If not reusing files, also clear images and video clips
|
| 255 |
if not reuse_files:
|
| 256 |
if step_num <= 6:
|
| 257 |
img_dir = run_dir / "images"
|
|
@@ -269,7 +276,6 @@ def generate(audio_file: str, style_name: str, cover_art: str | None,
|
|
| 269 |
import gc
|
| 270 |
|
| 271 |
def _flush_memory():
|
| 272 |
-
"""Aggressively free memory between heavy ML steps."""
|
| 273 |
gc.collect()
|
| 274 |
if hasattr(torch, "mps") and torch.backends.mps.is_available():
|
| 275 |
torch.mps.empty_cache()
|
|
@@ -280,7 +286,6 @@ def generate(audio_file: str, style_name: str, cover_art: str | None,
|
|
| 280 |
if step_num <= 1:
|
| 281 |
progress(0.0, desc="Separating stems...")
|
| 282 |
from src.stem_separator import separate_stems
|
| 283 |
-
# For resume: find original audio in song dir; for new run: use uploaded file
|
| 284 |
if is_resume:
|
| 285 |
song_dir = run_dir.parent
|
| 286 |
audio_candidates = list(song_dir.glob("*.wav")) + list(song_dir.glob("*.mp3")) + \
|
|
@@ -326,26 +331,63 @@ def generate(audio_file: str, style_name: str, cover_art: str | None,
|
|
| 326 |
quality_suffix=style.get("quality_suffix", ""))
|
| 327 |
print("Prompt generation complete.")
|
| 328 |
|
| 329 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
if step_num <= 7:
|
| 331 |
-
progress(0.50, desc="Generating images
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
progress(0.90, desc="Assembling final video...")
|
| 343 |
from src.assembler import run as assemble_video
|
| 344 |
-
final_path = assemble_video(run_dir, font_name=
|
| 345 |
-
cover_art=
|
| 346 |
|
| 347 |
progress(1.0, desc="Done!")
|
| 348 |
-
return str(final_path),
|
| 349 |
|
| 350 |
|
| 351 |
def reshuffle(run_dir_str: str, cover_art: str | None, progress=gr.Progress()):
|
|
@@ -665,6 +707,12 @@ with gr.Blocks(
|
|
| 665 |
reshuffle_btn = gr.Button("Reshuffle", variant="secondary", visible=False)
|
| 666 |
last_run_dir = gr.State(value="")
|
| 667 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 668 |
# --- Event handlers ---
|
| 669 |
example_song.change(
|
| 670 |
fn=_on_example_song,
|
|
@@ -689,9 +737,21 @@ with gr.Blocks(
|
|
| 689 |
)
|
| 690 |
|
| 691 |
generate_btn.click(
|
| 692 |
-
fn=
|
| 693 |
inputs=[audio_input, style_dropdown,
|
| 694 |
cover_art_input, run_mode, existing_run, start_step, reuse_files],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 695 |
outputs=[video_output, last_run_dir, reshuffle_btn],
|
| 696 |
)
|
| 697 |
reshuffle_btn.click(
|
|
|
|
| 39 |
# GPU-accelerated steps (decorated only on Spaces)
|
| 40 |
# ---------------------------------------------------------------------------
|
| 41 |
|
| 42 |
+
def _gpu_generate_images(run_dir, style_name):
|
| 43 |
+
"""GPU phase: generate all images."""
|
| 44 |
+
print(f"[GPU] Generating images (run_dir={run_dir}, style={style_name})")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
if IS_SPACES:
|
| 46 |
from src.image_generator_hf import run as gen_images
|
| 47 |
else:
|
| 48 |
from src.image_generator_api import run as gen_images
|
|
|
|
| 49 |
gen_images(run_dir, style_name=style_name)
|
| 50 |
torch.cuda.empty_cache()
|
| 51 |
+
print("[GPU] Image generation complete.")
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def _gpu_generate_one_video(run_dir, segment_idx, prompt, negative_prompt, seed):
|
| 55 |
+
"""GPU phase: generate a single video clip. Each call gets a fresh ZeroGPU token."""
|
| 56 |
+
import time
|
| 57 |
+
run_dir = Path(run_dir)
|
| 58 |
+
image_path = run_dir / "images" / f"segment_{segment_idx:03d}.png"
|
| 59 |
+
clip_path = run_dir / "clips" / f"clip_{segment_idx:03d}.mp4"
|
| 60 |
+
clip_path.parent.mkdir(parents=True, exist_ok=True)
|
| 61 |
+
|
| 62 |
+
if clip_path.exists():
|
| 63 |
+
print(f" [GPU] Clip {segment_idx}: already exists, skipping")
|
| 64 |
+
return
|
| 65 |
+
|
| 66 |
+
if not image_path.exists():
|
| 67 |
+
print(f" [GPU] Clip {segment_idx}: image not found, skipping")
|
| 68 |
+
return
|
| 69 |
|
|
|
|
| 70 |
if IS_SPACES:
|
| 71 |
+
from src.video_generator_hf import generate_clip
|
|
|
|
|
|
|
|
|
|
| 72 |
else:
|
| 73 |
+
from src.video_generator_api import generate_clip
|
| 74 |
+
|
| 75 |
+
print(f" [GPU] Generating clip {segment_idx}...")
|
| 76 |
+
t0 = time.time()
|
| 77 |
+
generate_clip(image_path, prompt, clip_path, negative_prompt, seed=seed)
|
| 78 |
+
print(f" [GPU] Clip {segment_idx} done ({time.time() - t0:.1f}s)")
|
| 79 |
|
| 80 |
|
| 81 |
+
# Apply @spaces.GPU decorator on Spaces — each gets a fresh token
|
| 82 |
if IS_SPACES:
|
| 83 |
+
_gpu_generate_images = spaces.GPU(duration=300)(_gpu_generate_images)
|
| 84 |
+
_gpu_generate_one_video = spaces.GPU(duration=300)(_gpu_generate_one_video)
|
| 85 |
|
| 86 |
|
| 87 |
# ---------------------------------------------------------------------------
|
|
|
|
| 229 |
}
|
| 230 |
|
| 231 |
|
| 232 |
+
def generate_cpu(audio_file: str, style_name: str, cover_art: str | None,
|
| 233 |
+
run_mode: str, existing_run: str | None, start_step: str | None,
|
| 234 |
+
reuse_files: bool, progress=gr.Progress()):
|
| 235 |
+
"""CPU phase: steps 1-5 (stems, lyrics, beats, segmentation, prompts).
|
| 236 |
|
| 237 |
+
Returns state dict for the GPU phases.
|
|
|
|
| 238 |
"""
|
|
|
|
|
|
|
| 239 |
style = get_style(style_name)
|
| 240 |
is_resume = run_mode == "Resume Existing"
|
| 241 |
|
|
|
|
| 251 |
print(f"Resuming {existing_run} from step {step_num}")
|
| 252 |
|
| 253 |
# Always clear assembly output (cheap to redo)
|
|
|
|
| 254 |
out_dir = run_dir / "output"
|
| 255 |
if out_dir.exists():
|
| 256 |
shutil.rmtree(out_dir)
|
|
|
|
| 257 |
for d in ["clips_split", "clips_trimmed"]:
|
| 258 |
p = run_dir / d
|
| 259 |
if p.exists():
|
| 260 |
shutil.rmtree(p)
|
| 261 |
|
|
|
|
| 262 |
if not reuse_files:
|
| 263 |
if step_num <= 6:
|
| 264 |
img_dir = run_dir / "images"
|
|
|
|
| 276 |
import gc
|
| 277 |
|
| 278 |
def _flush_memory():
|
|
|
|
| 279 |
gc.collect()
|
| 280 |
if hasattr(torch, "mps") and torch.backends.mps.is_available():
|
| 281 |
torch.mps.empty_cache()
|
|
|
|
| 286 |
if step_num <= 1:
|
| 287 |
progress(0.0, desc="Separating stems...")
|
| 288 |
from src.stem_separator import separate_stems
|
|
|
|
| 289 |
if is_resume:
|
| 290 |
song_dir = run_dir.parent
|
| 291 |
audio_candidates = list(song_dir.glob("*.wav")) + list(song_dir.glob("*.mp3")) + \
|
|
|
|
| 331 |
quality_suffix=style.get("quality_suffix", ""))
|
| 332 |
print("Prompt generation complete.")
|
| 333 |
|
| 334 |
+
progress(0.45, desc="CPU steps done, requesting GPU...")
|
| 335 |
+
# Return state for GPU phases (all values must be picklable strings)
|
| 336 |
+
return str(run_dir), style_name, str(step_num), cover_art or ""
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
def generate_images(run_dir_str: str, style_name: str, step_num_str: str,
|
| 340 |
+
cover_art: str, progress=gr.Progress()):
|
| 341 |
+
"""GPU phase: step 6 — generate images. Gets a fresh ZeroGPU token."""
|
| 342 |
+
step_num = int(step_num_str)
|
| 343 |
if step_num <= 7:
|
| 344 |
+
progress(0.50, desc="Generating images...")
|
| 345 |
+
_gpu_generate_images(run_dir_str, style_name)
|
| 346 |
+
return run_dir_str, style_name, step_num_str, cover_art
|
| 347 |
+
|
| 348 |
+
|
| 349 |
+
def generate_videos(run_dir_str: str, style_name: str, step_num_str: str,
|
| 350 |
+
cover_art: str, progress=gr.Progress()):
|
| 351 |
+
"""GPU phase: step 7 — generate video clips, one per GPU session."""
|
| 352 |
+
step_num = int(step_num_str)
|
| 353 |
+
if step_num <= 7:
|
| 354 |
+
run_dir = Path(run_dir_str)
|
| 355 |
+
with open(run_dir / "segments.json") as f:
|
| 356 |
+
segments = json.load(f)
|
| 357 |
+
|
| 358 |
+
seed = 42
|
| 359 |
+
for i, seg in enumerate(segments):
|
| 360 |
+
idx = seg["segment"]
|
| 361 |
+
prompt = seg.get("video_prompt", seg.get("scene", seg.get("prompt", "")))
|
| 362 |
+
neg = seg.get("negative_prompt", "")
|
| 363 |
+
progress(0.50 + 0.35 * (i / len(segments)),
|
| 364 |
+
desc=f"Generating video clip {i+1}/{len(segments)}...")
|
| 365 |
+
_gpu_generate_one_video(run_dir_str, idx, prompt, neg, seed + idx)
|
| 366 |
+
|
| 367 |
+
# Unload video model after all clips are done
|
| 368 |
+
if IS_SPACES:
|
| 369 |
+
try:
|
| 370 |
+
from src.video_generator_hf import unload
|
| 371 |
+
unload()
|
| 372 |
+
except Exception:
|
| 373 |
+
pass
|
| 374 |
+
|
| 375 |
+
print(f"All {len(segments)} video clips generated.")
|
| 376 |
+
return run_dir_str, cover_art
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
def generate_assembly(run_dir_str: str, cover_art: str, progress=gr.Progress()):
|
| 380 |
+
"""CPU phase: step 8 — assemble final video."""
|
| 381 |
+
run_dir = Path(run_dir_str)
|
| 382 |
+
cover = cover_art if cover_art else None
|
| 383 |
+
|
| 384 |
progress(0.90, desc="Assembling final video...")
|
| 385 |
from src.assembler import run as assemble_video
|
| 386 |
+
final_path = assemble_video(run_dir, font_name=DEFAULT_FONT, font_color=DEFAULT_FONT_COLOR,
|
| 387 |
+
cover_art=cover)
|
| 388 |
|
| 389 |
progress(1.0, desc="Done!")
|
| 390 |
+
return str(final_path), run_dir_str, gr.update(visible=True)
|
| 391 |
|
| 392 |
|
| 393 |
def reshuffle(run_dir_str: str, cover_art: str | None, progress=gr.Progress()):
|
|
|
|
| 707 |
reshuffle_btn = gr.Button("Reshuffle", variant="secondary", visible=False)
|
| 708 |
last_run_dir = gr.State(value="")
|
| 709 |
|
| 710 |
+
# Hidden state for passing data between chained pipeline phases
|
| 711 |
+
_st_run_dir = gr.State(value="")
|
| 712 |
+
_st_style = gr.State(value="")
|
| 713 |
+
_st_step = gr.State(value="1")
|
| 714 |
+
_st_cover = gr.State(value="")
|
| 715 |
+
|
| 716 |
# --- Event handlers ---
|
| 717 |
example_song.change(
|
| 718 |
fn=_on_example_song,
|
|
|
|
| 737 |
)
|
| 738 |
|
| 739 |
generate_btn.click(
|
| 740 |
+
fn=generate_cpu,
|
| 741 |
inputs=[audio_input, style_dropdown,
|
| 742 |
cover_art_input, run_mode, existing_run, start_step, reuse_files],
|
| 743 |
+
outputs=[_st_run_dir, _st_style, _st_step, _st_cover],
|
| 744 |
+
).then(
|
| 745 |
+
fn=generate_images,
|
| 746 |
+
inputs=[_st_run_dir, _st_style, _st_step, _st_cover],
|
| 747 |
+
outputs=[_st_run_dir, _st_style, _st_step, _st_cover],
|
| 748 |
+
).then(
|
| 749 |
+
fn=generate_videos,
|
| 750 |
+
inputs=[_st_run_dir, _st_style, _st_step, _st_cover],
|
| 751 |
+
outputs=[_st_run_dir, _st_cover],
|
| 752 |
+
).then(
|
| 753 |
+
fn=generate_assembly,
|
| 754 |
+
inputs=[_st_run_dir, _st_cover],
|
| 755 |
outputs=[video_output, last_run_dir, reshuffle_btn],
|
| 756 |
)
|
| 757 |
reshuffle_btn.click(
|