fix: cap working fps and surface prewarm failures (critique of b859009)
Browse filesCritical re-review of the previous round caught two real issues:
1. The 180β240s LaMa duration bump was insufficient and missed VACE
The original gap I tried to address (60fps Γ 15s exceeds @spaces.GPU
budget) wasn't actually closed:
- LaMa worst case β 270s vs new 240s budget β still overruns.
- VACE has the same problem and I missed it: 60fps Γ 15s = 900
frames β range(0, 900, 73) = 13 chunks Γ ~25s = ~325s vs the
300s VACE budget.
Real fix: PROCESS_FPS_MAX = 30. Both extraction and encoding use the
capped value via a working_meta clone (dataclasses.replace), so the
output mp4 duration matches the trimmed input. At 30fps both modes'
worst case fits comfortably:
- LaMa: 30 Γ 15 Γ 0.3 = 135s βͺ 240s
- VACE: 7 chunks Γ 25s = 175s βͺ 300s
on_video_upload now surfaces a "fps will be capped to 30" notice so
the user knows why a 60fps source is processed at 30fps.
2. wait_for_prewarm didn't track failure
If the prewarm thread crashed (network blip, disk full, mirror
permission error), is_prewarm_done() would return True (the thread
is dead), the user would click Quality, and _get_pipe's
from_pretrained(local_files_only=True) calls would fail with a
cryptic cache-miss error.
New module-level _prewarm_error captures whatever the thread raised
(using BaseException so KeyboardInterrupt etc. surface too). New
get_prewarm_error() accessor lets run_pipeline check before
acquiring the GPU lease and raise a friendly gr.Error pointing the
user to Fast/LaMa mode and a Space restart for retry.
Both LaMa duration (240s) and VACE duration (300s) stay where they
are β the worst case is now bounded by PROCESS_FPS_MAX, not by the
nominal upload limits.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
- app.py +44 -11
- pipeline/vace.py +16 -2
|
@@ -16,7 +16,7 @@ from __future__ import annotations
|
|
| 16 |
import os
|
| 17 |
import shutil
|
| 18 |
import tempfile
|
| 19 |
-
from dataclasses import asdict, fields
|
| 20 |
from pathlib import Path
|
| 21 |
|
| 22 |
import gradio as gr
|
|
@@ -46,8 +46,8 @@ from pipeline.crop import (
|
|
| 46 |
)
|
| 47 |
from pipeline.lama import inpaint_frames_lama_stream
|
| 48 |
from pipeline.vace import (
|
| 49 |
-
inpaint_frames_vace_stream, is_prewarm_done,
|
| 50 |
-
wait_for_prewarm,
|
| 51 |
)
|
| 52 |
from pipeline.video import (
|
| 53 |
VideoMeta, VideoWorkspace,
|
|
@@ -67,6 +67,13 @@ UPLOAD_DURATION_S = 60.0
|
|
| 67 |
# Maximum accepted source resolution. Larger uploads are rejected outright.
|
| 68 |
MAX_UPLOAD_W = 1920
|
| 69 |
MAX_UPLOAD_H = 1080
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
# Mode labels used in both the UI radio choices and the dispatch logic.
|
| 72 |
# Defining them once prevents drift between the two sites.
|
|
@@ -265,6 +272,7 @@ def on_video_upload(video_path: str | None):
|
|
| 265 |
f"Max {MAX_UPLOAD_W}Γ{MAX_UPLOAD_H}."
|
| 266 |
)
|
| 267 |
will_trim = meta.duration_s > PROCESS_DURATION_S
|
|
|
|
| 268 |
|
| 269 |
first_frame = extract_first_frame_array(video_path)
|
| 270 |
|
|
@@ -280,16 +288,23 @@ def on_video_upload(video_path: str | None):
|
|
| 280 |
"layers": [],
|
| 281 |
"composite": None,
|
| 282 |
}
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
return (
|
| 289 |
gr.update(value=editor_val),
|
| 290 |
gr.update(value=None),
|
| 291 |
asdict(meta),
|
| 292 |
-
f"β Loaded β {meta_str}{
|
| 293 |
f"\n\nNow draw over the watermark with the brush tool.",
|
| 294 |
)
|
| 295 |
except Exception as e:
|
|
@@ -487,6 +502,14 @@ def run_pipeline(
|
|
| 487 |
raw_mask, meta_state, context_px,
|
| 488 |
)
|
| 489 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 490 |
with VideoWorkspace() as ws:
|
| 491 |
# Preserve the original file extension so FFmpeg can detect the
|
| 492 |
# container format. Gradio always adds an extension for video
|
|
@@ -498,7 +521,7 @@ def run_pipeline(
|
|
| 498 |
# ββ Extract frames (CFR-forced for VFR safety) βββββββββββββββββ
|
| 499 |
progress(0.10, desc="Extracting framesβ¦")
|
| 500 |
frame_paths = extract_frames(
|
| 501 |
-
safe_video, ws.frames_dir, fps=
|
| 502 |
max_duration_s=PROCESS_DURATION_S,
|
| 503 |
)
|
| 504 |
total = len(frame_paths)
|
|
@@ -519,6 +542,16 @@ def run_pipeline(
|
|
| 519 |
if not is_prewarm_done():
|
| 520 |
progress(0.16, desc="Waiting for VACE checkpoint cache to finish prewarmingβ¦")
|
| 521 |
wait_for_prewarm()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 522 |
_gpu_inpaint_vace(
|
| 523 |
frame_paths, crop_region, inpaint_mask,
|
| 524 |
ws.out_frames_dir, progress,
|
|
@@ -527,7 +560,7 @@ def run_pipeline(
|
|
| 527 |
# ββ CPU: encode + mux ββββββββββββββββββββββββββββββββββββββοΏ½οΏ½ββββ
|
| 528 |
progress(0.95, desc="Encoding videoβ¦")
|
| 529 |
silent_path = ws.path("silent.mp4")
|
| 530 |
-
frames_to_video(ws.out_frames_dir, silent_path,
|
| 531 |
|
| 532 |
# The final mp4 outlives the VideoWorkspace (returned to Gradio
|
| 533 |
# for download), so it goes to the system tempdir, not ``ws``.
|
|
|
|
| 16 |
import os
|
| 17 |
import shutil
|
| 18 |
import tempfile
|
| 19 |
+
from dataclasses import asdict, fields, replace
|
| 20 |
from pathlib import Path
|
| 21 |
|
| 22 |
import gradio as gr
|
|
|
|
| 46 |
)
|
| 47 |
from pipeline.lama import inpaint_frames_lama_stream
|
| 48 |
from pipeline.vace import (
|
| 49 |
+
get_prewarm_error, inpaint_frames_vace_stream, is_prewarm_done,
|
| 50 |
+
prewarm_vace_cache, wait_for_prewarm,
|
| 51 |
)
|
| 52 |
from pipeline.video import (
|
| 53 |
VideoMeta, VideoWorkspace,
|
|
|
|
| 67 |
# Maximum accepted source resolution. Larger uploads are rejected outright.
|
| 68 |
MAX_UPLOAD_W = 1920
|
| 69 |
MAX_UPLOAD_H = 1080
|
| 70 |
+
# Cap working fps for both LaMa per-frame and VACE chunked inference. At
|
| 71 |
+
# 60 fps Γ 15 s the per-frame LaMa budget overflows the 240 s @spaces.GPU
|
| 72 |
+
# lease (~270 s estimated), and VACE chunking produces ~13 chunks worth
|
| 73 |
+
# >300 s. At 30 fps both fit comfortably. Sources above this rate are
|
| 74 |
+
# extracted *and encoded* at PROCESS_FPS_MAX so the output mp4 duration
|
| 75 |
+
# matches the trimmed input.
|
| 76 |
+
PROCESS_FPS_MAX = 30.0
|
| 77 |
|
| 78 |
# Mode labels used in both the UI radio choices and the dispatch logic.
|
| 79 |
# Defining them once prevents drift between the two sites.
|
|
|
|
| 272 |
f"Max {MAX_UPLOAD_W}Γ{MAX_UPLOAD_H}."
|
| 273 |
)
|
| 274 |
will_trim = meta.duration_s > PROCESS_DURATION_S
|
| 275 |
+
will_cap_fps = meta.fps > PROCESS_FPS_MAX
|
| 276 |
|
| 277 |
first_frame = extract_first_frame_array(video_path)
|
| 278 |
|
|
|
|
| 288 |
"layers": [],
|
| 289 |
"composite": None,
|
| 290 |
}
|
| 291 |
+
notes = []
|
| 292 |
+
if will_trim:
|
| 293 |
+
notes.append(
|
| 294 |
+
f"β οΈ Clip is {meta.duration_s:.1f}s β only the first "
|
| 295 |
+
f"{PROCESS_DURATION_S:.0f}s will be processed."
|
| 296 |
+
)
|
| 297 |
+
if will_cap_fps:
|
| 298 |
+
notes.append(
|
| 299 |
+
f"β οΈ Source is {meta.fps:.0f} fps β output will be "
|
| 300 |
+
f"{PROCESS_FPS_MAX:.0f} fps to fit GPU budget."
|
| 301 |
+
)
|
| 302 |
+
notes_str = ("\n\n" + "\n".join(notes)) if notes else ""
|
| 303 |
return (
|
| 304 |
gr.update(value=editor_val),
|
| 305 |
gr.update(value=None),
|
| 306 |
asdict(meta),
|
| 307 |
+
f"β Loaded β {meta_str}{notes_str}"
|
| 308 |
f"\n\nNow draw over the watermark with the brush tool.",
|
| 309 |
)
|
| 310 |
except Exception as e:
|
|
|
|
| 502 |
raw_mask, meta_state, context_px,
|
| 503 |
)
|
| 504 |
|
| 505 |
+
# Cap working fps so per-frame LaMa and per-chunk VACE both fit within
|
| 506 |
+
# their @spaces.GPU duration budgets at the worst-case input rate.
|
| 507 |
+
# ``working_meta`` is what frames_to_video uses to set the output's
|
| 508 |
+
# encode framerate β must match what extract_frames was given so the
|
| 509 |
+
# output mp4's duration equals the trimmed input duration.
|
| 510 |
+
working_fps = min(meta.fps, PROCESS_FPS_MAX)
|
| 511 |
+
working_meta = replace(meta, fps=working_fps)
|
| 512 |
+
|
| 513 |
with VideoWorkspace() as ws:
|
| 514 |
# Preserve the original file extension so FFmpeg can detect the
|
| 515 |
# container format. Gradio always adds an extension for video
|
|
|
|
| 521 |
# ββ Extract frames (CFR-forced for VFR safety) βββββββββββββββββ
|
| 522 |
progress(0.10, desc="Extracting framesβ¦")
|
| 523 |
frame_paths = extract_frames(
|
| 524 |
+
safe_video, ws.frames_dir, fps=working_fps,
|
| 525 |
max_duration_s=PROCESS_DURATION_S,
|
| 526 |
)
|
| 527 |
total = len(frame_paths)
|
|
|
|
| 542 |
if not is_prewarm_done():
|
| 543 |
progress(0.16, desc="Waiting for VACE checkpoint cache to finish prewarmingβ¦")
|
| 544 |
wait_for_prewarm()
|
| 545 |
+
# If prewarm raised, the cache is incomplete and the
|
| 546 |
+
# local_files_only=True from_pretrained calls inside
|
| 547 |
+
# _get_pipe would fail with a confusing cache-miss error.
|
| 548 |
+
# Surface the real cause and route the user to Fast mode.
|
| 549 |
+
err = get_prewarm_error()
|
| 550 |
+
if err is not None:
|
| 551 |
+
raise gr.Error(
|
| 552 |
+
f"VACE checkpoint download failed: {err}. "
|
| 553 |
+
f"Use Fast (LaMa) mode, or restart the Space to retry the download."
|
| 554 |
+
)
|
| 555 |
_gpu_inpaint_vace(
|
| 556 |
frame_paths, crop_region, inpaint_mask,
|
| 557 |
ws.out_frames_dir, progress,
|
|
|
|
| 560 |
# ββ CPU: encode + mux ββββββββββββββββββββββββββββββββββββββοΏ½οΏ½ββββ
|
| 561 |
progress(0.95, desc="Encoding videoβ¦")
|
| 562 |
silent_path = ws.path("silent.mp4")
|
| 563 |
+
frames_to_video(ws.out_frames_dir, silent_path, working_meta)
|
| 564 |
|
| 565 |
# The final mp4 outlives the VideoWorkspace (returned to Gradio
|
| 566 |
# for download), so it goes to the system tempdir, not ``ws``.
|
|
@@ -127,9 +127,11 @@ NEGATIVE_PROMPT = (
|
|
| 127 |
# populated before any user clicks "Quality" mode.
|
| 128 |
|
| 129 |
_prewarm_thread: Optional[threading.Thread] = None
|
|
|
|
| 130 |
|
| 131 |
|
| 132 |
def _prewarm_blocking() -> None:
|
|
|
|
| 133 |
try:
|
| 134 |
from huggingface_hub import snapshot_download
|
| 135 |
|
|
@@ -143,8 +145,9 @@ def _prewarm_blocking() -> None:
|
|
| 143 |
allow_patterns=[VACE_LORA_FILE],
|
| 144 |
)
|
| 145 |
print("[VACE] Checkpoint cache pre-warmed.")
|
| 146 |
-
except
|
| 147 |
-
|
|
|
|
| 148 |
|
| 149 |
|
| 150 |
def prewarm_vace_cache() -> None:
|
|
@@ -183,6 +186,17 @@ def is_prewarm_done() -> bool:
|
|
| 183 |
return _prewarm_thread is None or not _prewarm_thread.is_alive()
|
| 184 |
|
| 185 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
# ---------------------------------------------------------------------------
|
| 187 |
# Pipeline singleton (cold load is expensive β keep it warm across calls)
|
| 188 |
# ---------------------------------------------------------------------------
|
|
|
|
| 127 |
# populated before any user clicks "Quality" mode.
|
| 128 |
|
| 129 |
_prewarm_thread: Optional[threading.Thread] = None
|
| 130 |
+
_prewarm_error: Optional[BaseException] = None
|
| 131 |
|
| 132 |
|
| 133 |
def _prewarm_blocking() -> None:
|
| 134 |
+
global _prewarm_error
|
| 135 |
try:
|
| 136 |
from huggingface_hub import snapshot_download
|
| 137 |
|
|
|
|
| 145 |
allow_patterns=[VACE_LORA_FILE],
|
| 146 |
)
|
| 147 |
print("[VACE] Checkpoint cache pre-warmed.")
|
| 148 |
+
except BaseException as exc:
|
| 149 |
+
_prewarm_error = exc
|
| 150 |
+
print(f"[VACE] Pre-warm failed ({exc}).")
|
| 151 |
|
| 152 |
|
| 153 |
def prewarm_vace_cache() -> None:
|
|
|
|
| 186 |
return _prewarm_thread is None or not _prewarm_thread.is_alive()
|
| 187 |
|
| 188 |
|
| 189 |
+
def get_prewarm_error() -> Optional[BaseException]:
|
| 190 |
+
"""Return the exception raised by the prewarm thread, if any.
|
| 191 |
+
|
| 192 |
+
A non-None return means the cache is **incomplete** and any
|
| 193 |
+
``local_files_only=True`` from_pretrained call inside _get_pipe()
|
| 194 |
+
will fail. Callers should surface a friendly message and route the
|
| 195 |
+
user to Fast/LaMa mode instead of the cryptic cache-miss error.
|
| 196 |
+
"""
|
| 197 |
+
return _prewarm_error
|
| 198 |
+
|
| 199 |
+
|
| 200 |
# ---------------------------------------------------------------------------
|
| 201 |
# Pipeline singleton (cold load is expensive β keep it warm across calls)
|
| 202 |
# ---------------------------------------------------------------------------
|