BoxOfColors Claude Opus 4.7 (1M context) commited on
Commit
89dfe5f
Β·
1 Parent(s): b859009

fix: cap working fps and surface prewarm failures (critique of b859009)

Browse files

Critical re-review of the previous round caught two real issues:

1. The 180β†’240s LaMa duration bump was insufficient and missed VACE
The original gap I tried to address (60fps Γ— 15s exceeds @spaces.GPU
budget) wasn't actually closed:
- LaMa worst case β‰ˆ 270s vs new 240s budget β€” still overruns.
- VACE has the same problem and I missed it: 60fps Γ— 15s = 900
frames β†’ range(0, 900, 73) = 13 chunks Γ— ~25s = ~325s vs the
300s VACE budget.

Real fix: PROCESS_FPS_MAX = 30. Both extraction and encoding use the
capped value via a working_meta clone (dataclasses.replace), so the
output mp4 duration matches the trimmed input. At 30fps both modes'
worst case fits comfortably:
- LaMa: 30 Γ— 15 Γ— 0.3 = 135s β‰ͺ 240s
- VACE: 7 chunks Γ— 25s = 175s β‰ͺ 300s
on_video_upload now surfaces a "fps will be capped to 30" notice so
the user knows why a 60fps source is processed at 30fps.

2. wait_for_prewarm didn't track failure
If the prewarm thread crashed (network blip, disk full, mirror
permission error), is_prewarm_done() would return True (the thread
is dead), the user would click Quality, and _get_pipe's
from_pretrained(local_files_only=True) calls would fail with a
cryptic cache-miss error.

New module-level _prewarm_error captures whatever the thread raised
(using BaseException so KeyboardInterrupt etc. surface too). New
get_prewarm_error() accessor lets run_pipeline check before
acquiring the GPU lease and raise a friendly gr.Error pointing the
user to Fast/LaMa mode and a Space restart for retry.

Both LaMa duration (240s) and VACE duration (300s) stay where they
are β€” the worst case is now bounded by PROCESS_FPS_MAX, not by the
nominal upload limits.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (2) hide show
  1. app.py +44 -11
  2. pipeline/vace.py +16 -2
app.py CHANGED
@@ -16,7 +16,7 @@ from __future__ import annotations
16
  import os
17
  import shutil
18
  import tempfile
19
- from dataclasses import asdict, fields
20
  from pathlib import Path
21
 
22
  import gradio as gr
@@ -46,8 +46,8 @@ from pipeline.crop import (
46
  )
47
  from pipeline.lama import inpaint_frames_lama_stream
48
  from pipeline.vace import (
49
- inpaint_frames_vace_stream, is_prewarm_done, prewarm_vace_cache,
50
- wait_for_prewarm,
51
  )
52
  from pipeline.video import (
53
  VideoMeta, VideoWorkspace,
@@ -67,6 +67,13 @@ UPLOAD_DURATION_S = 60.0
67
  # Maximum accepted source resolution. Larger uploads are rejected outright.
68
  MAX_UPLOAD_W = 1920
69
  MAX_UPLOAD_H = 1080
 
 
 
 
 
 
 
70
 
71
  # Mode labels used in both the UI radio choices and the dispatch logic.
72
  # Defining them once prevents drift between the two sites.
@@ -265,6 +272,7 @@ def on_video_upload(video_path: str | None):
265
  f"Max {MAX_UPLOAD_W}Γ—{MAX_UPLOAD_H}."
266
  )
267
  will_trim = meta.duration_s > PROCESS_DURATION_S
 
268
 
269
  first_frame = extract_first_frame_array(video_path)
270
 
@@ -280,16 +288,23 @@ def on_video_upload(video_path: str | None):
280
  "layers": [],
281
  "composite": None,
282
  }
283
- trim_note = (
284
- f"\n\n⚠️ Clip is {meta.duration_s:.1f}s β€” only the first "
285
- f"{PROCESS_DURATION_S:.0f}s will be processed."
286
- if will_trim else ""
287
- )
 
 
 
 
 
 
 
288
  return (
289
  gr.update(value=editor_val),
290
  gr.update(value=None),
291
  asdict(meta),
292
- f"βœ“ Loaded β€” {meta_str}{trim_note}"
293
  f"\n\nNow draw over the watermark with the brush tool.",
294
  )
295
  except Exception as e:
@@ -487,6 +502,14 @@ def run_pipeline(
487
  raw_mask, meta_state, context_px,
488
  )
489
 
 
 
 
 
 
 
 
 
490
  with VideoWorkspace() as ws:
491
  # Preserve the original file extension so FFmpeg can detect the
492
  # container format. Gradio always adds an extension for video
@@ -498,7 +521,7 @@ def run_pipeline(
498
  # ── Extract frames (CFR-forced for VFR safety) ─────────────────
499
  progress(0.10, desc="Extracting frames…")
500
  frame_paths = extract_frames(
501
- safe_video, ws.frames_dir, fps=meta.fps,
502
  max_duration_s=PROCESS_DURATION_S,
503
  )
504
  total = len(frame_paths)
@@ -519,6 +542,16 @@ def run_pipeline(
519
  if not is_prewarm_done():
520
  progress(0.16, desc="Waiting for VACE checkpoint cache to finish prewarming…")
521
  wait_for_prewarm()
 
 
 
 
 
 
 
 
 
 
522
  _gpu_inpaint_vace(
523
  frame_paths, crop_region, inpaint_mask,
524
  ws.out_frames_dir, progress,
@@ -527,7 +560,7 @@ def run_pipeline(
527
  # ── CPU: encode + mux ──────────────────────────────────────��────
528
  progress(0.95, desc="Encoding video…")
529
  silent_path = ws.path("silent.mp4")
530
- frames_to_video(ws.out_frames_dir, silent_path, meta)
531
 
532
  # The final mp4 outlives the VideoWorkspace (returned to Gradio
533
  # for download), so it goes to the system tempdir, not ``ws``.
 
16
  import os
17
  import shutil
18
  import tempfile
19
+ from dataclasses import asdict, fields, replace
20
  from pathlib import Path
21
 
22
  import gradio as gr
 
46
  )
47
  from pipeline.lama import inpaint_frames_lama_stream
48
  from pipeline.vace import (
49
+ get_prewarm_error, inpaint_frames_vace_stream, is_prewarm_done,
50
+ prewarm_vace_cache, wait_for_prewarm,
51
  )
52
  from pipeline.video import (
53
  VideoMeta, VideoWorkspace,
 
67
  # Maximum accepted source resolution. Larger uploads are rejected outright.
68
  MAX_UPLOAD_W = 1920
69
  MAX_UPLOAD_H = 1080
70
+ # Cap working fps for both LaMa per-frame and VACE chunked inference. At
71
+ # 60 fps Γ— 15 s the per-frame LaMa budget overflows the 240 s @spaces.GPU
72
+ # lease (~270 s estimated), and VACE chunking produces ~13 chunks worth
73
+ # >300 s. At 30 fps both fit comfortably. Sources above this rate are
74
+ # extracted *and encoded* at PROCESS_FPS_MAX so the output mp4 duration
75
+ # matches the trimmed input.
76
+ PROCESS_FPS_MAX = 30.0
77
 
78
  # Mode labels used in both the UI radio choices and the dispatch logic.
79
  # Defining them once prevents drift between the two sites.
 
272
  f"Max {MAX_UPLOAD_W}Γ—{MAX_UPLOAD_H}."
273
  )
274
  will_trim = meta.duration_s > PROCESS_DURATION_S
275
+ will_cap_fps = meta.fps > PROCESS_FPS_MAX
276
 
277
  first_frame = extract_first_frame_array(video_path)
278
 
 
288
  "layers": [],
289
  "composite": None,
290
  }
291
+ notes = []
292
+ if will_trim:
293
+ notes.append(
294
+ f"⚠️ Clip is {meta.duration_s:.1f}s β€” only the first "
295
+ f"{PROCESS_DURATION_S:.0f}s will be processed."
296
+ )
297
+ if will_cap_fps:
298
+ notes.append(
299
+ f"⚠️ Source is {meta.fps:.0f} fps β€” output will be "
300
+ f"{PROCESS_FPS_MAX:.0f} fps to fit GPU budget."
301
+ )
302
+ notes_str = ("\n\n" + "\n".join(notes)) if notes else ""
303
  return (
304
  gr.update(value=editor_val),
305
  gr.update(value=None),
306
  asdict(meta),
307
+ f"βœ“ Loaded β€” {meta_str}{notes_str}"
308
  f"\n\nNow draw over the watermark with the brush tool.",
309
  )
310
  except Exception as e:
 
502
  raw_mask, meta_state, context_px,
503
  )
504
 
505
+ # Cap working fps so per-frame LaMa and per-chunk VACE both fit within
506
+ # their @spaces.GPU duration budgets at the worst-case input rate.
507
+ # ``working_meta`` is what frames_to_video uses to set the output's
508
+ # encode framerate β€” must match what extract_frames was given so the
509
+ # output mp4's duration equals the trimmed input duration.
510
+ working_fps = min(meta.fps, PROCESS_FPS_MAX)
511
+ working_meta = replace(meta, fps=working_fps)
512
+
513
  with VideoWorkspace() as ws:
514
  # Preserve the original file extension so FFmpeg can detect the
515
  # container format. Gradio always adds an extension for video
 
521
  # ── Extract frames (CFR-forced for VFR safety) ─────────────────
522
  progress(0.10, desc="Extracting frames…")
523
  frame_paths = extract_frames(
524
+ safe_video, ws.frames_dir, fps=working_fps,
525
  max_duration_s=PROCESS_DURATION_S,
526
  )
527
  total = len(frame_paths)
 
542
  if not is_prewarm_done():
543
  progress(0.16, desc="Waiting for VACE checkpoint cache to finish prewarming…")
544
  wait_for_prewarm()
545
+ # If prewarm raised, the cache is incomplete and the
546
+ # local_files_only=True from_pretrained calls inside
547
+ # _get_pipe would fail with a confusing cache-miss error.
548
+ # Surface the real cause and route the user to Fast mode.
549
+ err = get_prewarm_error()
550
+ if err is not None:
551
+ raise gr.Error(
552
+ f"VACE checkpoint download failed: {err}. "
553
+ f"Use Fast (LaMa) mode, or restart the Space to retry the download."
554
+ )
555
  _gpu_inpaint_vace(
556
  frame_paths, crop_region, inpaint_mask,
557
  ws.out_frames_dir, progress,
 
560
  # ── CPU: encode + mux ──────────────────────────────────────��────
561
  progress(0.95, desc="Encoding video…")
562
  silent_path = ws.path("silent.mp4")
563
+ frames_to_video(ws.out_frames_dir, silent_path, working_meta)
564
 
565
  # The final mp4 outlives the VideoWorkspace (returned to Gradio
566
  # for download), so it goes to the system tempdir, not ``ws``.
pipeline/vace.py CHANGED
@@ -127,9 +127,11 @@ NEGATIVE_PROMPT = (
127
  # populated before any user clicks "Quality" mode.
128
 
129
  _prewarm_thread: Optional[threading.Thread] = None
 
130
 
131
 
132
  def _prewarm_blocking() -> None:
 
133
  try:
134
  from huggingface_hub import snapshot_download
135
 
@@ -143,8 +145,9 @@ def _prewarm_blocking() -> None:
143
  allow_patterns=[VACE_LORA_FILE],
144
  )
145
  print("[VACE] Checkpoint cache pre-warmed.")
146
- except Exception as exc:
147
- print(f"[VACE] Pre-warm failed ({exc}); will download on first use.")
 
148
 
149
 
150
  def prewarm_vace_cache() -> None:
@@ -183,6 +186,17 @@ def is_prewarm_done() -> bool:
183
  return _prewarm_thread is None or not _prewarm_thread.is_alive()
184
 
185
 
 
 
 
 
 
 
 
 
 
 
 
186
  # ---------------------------------------------------------------------------
187
  # Pipeline singleton (cold load is expensive β€” keep it warm across calls)
188
  # ---------------------------------------------------------------------------
 
127
  # populated before any user clicks "Quality" mode.
128
 
129
  _prewarm_thread: Optional[threading.Thread] = None
130
+ _prewarm_error: Optional[BaseException] = None
131
 
132
 
133
  def _prewarm_blocking() -> None:
134
+ global _prewarm_error
135
  try:
136
  from huggingface_hub import snapshot_download
137
 
 
145
  allow_patterns=[VACE_LORA_FILE],
146
  )
147
  print("[VACE] Checkpoint cache pre-warmed.")
148
+ except BaseException as exc:
149
+ _prewarm_error = exc
150
+ print(f"[VACE] Pre-warm failed ({exc}).")
151
 
152
 
153
  def prewarm_vace_cache() -> None:
 
186
  return _prewarm_thread is None or not _prewarm_thread.is_alive()
187
 
188
 
189
+ def get_prewarm_error() -> Optional[BaseException]:
190
+ """Return the exception raised by the prewarm thread, if any.
191
+
192
+ A non-None return means the cache is **incomplete** and any
193
+ ``local_files_only=True`` from_pretrained call inside _get_pipe()
194
+ will fail. Callers should surface a friendly message and route the
195
+ user to Fast/LaMa mode instead of the cryptic cache-miss error.
196
+ """
197
+ return _prewarm_error
198
+
199
+
200
  # ---------------------------------------------------------------------------
201
  # Pipeline singleton (cold load is expensive β€” keep it warm across calls)
202
  # ---------------------------------------------------------------------------