Spaces:

JackIsNotInTheBox
/

watermark_remover

Paused

BoxOfColors Claude Opus 4.7 (1M context) commited on Apr 27

Commit

89dfe5f

1 Parent(s): b859009

fix: cap working fps and surface prewarm failures (critique of b859009)

Critical re-review of the previous round caught two real issues:

1. The 180→240s LaMa duration bump was insufficient and missed VACE
The original gap I tried to address (60fps × 15s exceeds @spaces.GPU
budget) wasn't actually closed:
- LaMa worst case ≈ 270s vs new 240s budget — still overruns.
- VACE has the same problem and I missed it: 60fps × 15s = 900
frames → range(0, 900, 73) = 13 chunks × ~25s = ~325s vs the
300s VACE budget.

Real fix: PROCESS_FPS_MAX = 30. Both extraction and encoding use the
capped value via a working_meta clone (dataclasses.replace), so the
output mp4 duration matches the trimmed input. At 30fps both modes'
worst case fits comfortably:
- LaMa: 30 × 15 × 0.3 = 135s ≪ 240s
- VACE: 7 chunks × 25s = 175s ≪ 300s
on_video_upload now surfaces a "fps will be capped to 30" notice so
the user knows why a 60fps source is processed at 30fps.

2. wait_for_prewarm didn't track failure
If the prewarm thread crashed (network blip, disk full, mirror
permission error), is_prewarm_done() would return True (the thread
is dead), the user would click Quality, and _get_pipe's
from_pretrained(local_files_only=True) calls would fail with a
cryptic cache-miss error.

New module-level _prewarm_error captures whatever the thread raised
(using BaseException so KeyboardInterrupt etc. surface too). New
get_prewarm_error() accessor lets run_pipeline check before
acquiring the GPU lease and raise a friendly gr.Error pointing the
user to Fast/LaMa mode and a Space restart for retry.

Both LaMa duration (240s) and VACE duration (300s) stay where they
are — the worst case is now bounded by PROCESS_FPS_MAX, not by the
nominal upload limits.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (2) hide show

app.py +44 -11
pipeline/vace.py +16 -2

app.py CHANGED Viewed

@@ -16,7 +16,7 @@ from __future__ import annotations
 import os
 import shutil
 import tempfile
-from dataclasses import asdict, fields
 from pathlib import Path
 import gradio as gr
@@ -46,8 +46,8 @@ from pipeline.crop import (
 )
 from pipeline.lama import inpaint_frames_lama_stream
 from pipeline.vace import (
-    inpaint_frames_vace_stream, is_prewarm_done, prewarm_vace_cache,
-    wait_for_prewarm,
 )
 from pipeline.video import (
     VideoMeta, VideoWorkspace,
@@ -67,6 +67,13 @@ UPLOAD_DURATION_S = 60.0
 # Maximum accepted source resolution. Larger uploads are rejected outright.
 MAX_UPLOAD_W = 1920
 MAX_UPLOAD_H = 1080
 # Mode labels used in both the UI radio choices and the dispatch logic.
 # Defining them once prevents drift between the two sites.
@@ -265,6 +272,7 @@ def on_video_upload(video_path: str | None):
                 f"Max {MAX_UPLOAD_W}×{MAX_UPLOAD_H}."
             )
         will_trim = meta.duration_s > PROCESS_DURATION_S
         first_frame = extract_first_frame_array(video_path)
@@ -280,16 +288,23 @@ def on_video_upload(video_path: str | None):
             "layers": [],
             "composite": None,
         }
-        trim_note = (
-            f"\n\n⚠️ Clip is {meta.duration_s:.1f}s — only the first "
-            f"{PROCESS_DURATION_S:.0f}s will be processed."
-            if will_trim else ""
-        )
         return (
             gr.update(value=editor_val),
             gr.update(value=None),
             asdict(meta),
-            f"✓ Loaded — {meta_str}{trim_note}"
             f"\n\nNow draw over the watermark with the brush tool.",
         )
     except Exception as e:
@@ -487,6 +502,14 @@ def run_pipeline(
         raw_mask, meta_state, context_px,
     )
     with VideoWorkspace() as ws:
         # Preserve the original file extension so FFmpeg can detect the
         # container format. Gradio always adds an extension for video
@@ -498,7 +521,7 @@ def run_pipeline(
         # ── Extract frames (CFR-forced for VFR safety) ─────────────────
         progress(0.10, desc="Extracting frames…")
         frame_paths = extract_frames(
-            safe_video, ws.frames_dir, fps=meta.fps,
             max_duration_s=PROCESS_DURATION_S,
         )
         total = len(frame_paths)
@@ -519,6 +542,16 @@ def run_pipeline(
             if not is_prewarm_done():
                 progress(0.16, desc="Waiting for VACE checkpoint cache to finish prewarming…")
                 wait_for_prewarm()
             _gpu_inpaint_vace(
                 frame_paths, crop_region, inpaint_mask,
                 ws.out_frames_dir, progress,
@@ -527,7 +560,7 @@ def run_pipeline(
         # ── CPU: encode + mux ──────────────────────────────────────��────
         progress(0.95, desc="Encoding video…")
         silent_path = ws.path("silent.mp4")
-        frames_to_video(ws.out_frames_dir, silent_path, meta)
         # The final mp4 outlives the VideoWorkspace (returned to Gradio
         # for download), so it goes to the system tempdir, not ``ws``.

 import os
 import shutil
 import tempfile
+from dataclasses import asdict, fields, replace
 from pathlib import Path
 import gradio as gr
 )
 from pipeline.lama import inpaint_frames_lama_stream
 from pipeline.vace import (
+    get_prewarm_error, inpaint_frames_vace_stream, is_prewarm_done,
+    prewarm_vace_cache, wait_for_prewarm,
 )
 from pipeline.video import (
     VideoMeta, VideoWorkspace,
 # Maximum accepted source resolution. Larger uploads are rejected outright.
 MAX_UPLOAD_W = 1920
 MAX_UPLOAD_H = 1080
+# Cap working fps for both LaMa per-frame and VACE chunked inference. At
+# 60 fps × 15 s the per-frame LaMa budget overflows the 240 s @spaces.GPU
+# lease (~270 s estimated), and VACE chunking produces ~13 chunks worth
+# >300 s. At 30 fps both fit comfortably. Sources above this rate are
+# extracted *and encoded* at PROCESS_FPS_MAX so the output mp4 duration
+# matches the trimmed input.
+PROCESS_FPS_MAX = 30.0
 # Mode labels used in both the UI radio choices and the dispatch logic.
 # Defining them once prevents drift between the two sites.
                 f"Max {MAX_UPLOAD_W}×{MAX_UPLOAD_H}."
             )
         will_trim = meta.duration_s > PROCESS_DURATION_S
+        will_cap_fps = meta.fps > PROCESS_FPS_MAX
         first_frame = extract_first_frame_array(video_path)
             "layers": [],
             "composite": None,
         }
+        notes = []
+        if will_trim:
+            notes.append(
+                f"⚠️ Clip is {meta.duration_s:.1f}s — only the first "
+                f"{PROCESS_DURATION_S:.0f}s will be processed."
+            )
+        if will_cap_fps:
+            notes.append(
+                f"⚠️ Source is {meta.fps:.0f} fps — output will be "
+                f"{PROCESS_FPS_MAX:.0f} fps to fit GPU budget."
+            )
+        notes_str = ("\n\n" + "\n".join(notes)) if notes else ""
         return (
             gr.update(value=editor_val),
             gr.update(value=None),
             asdict(meta),
+            f"✓ Loaded — {meta_str}{notes_str}"
             f"\n\nNow draw over the watermark with the brush tool.",
         )
     except Exception as e:
         raw_mask, meta_state, context_px,
     )
+    # Cap working fps so per-frame LaMa and per-chunk VACE both fit within
+    # their @spaces.GPU duration budgets at the worst-case input rate.
+    # ``working_meta`` is what frames_to_video uses to set the output's
+    # encode framerate — must match what extract_frames was given so the
+    # output mp4's duration equals the trimmed input duration.
+    working_fps = min(meta.fps, PROCESS_FPS_MAX)
+    working_meta = replace(meta, fps=working_fps)
     with VideoWorkspace() as ws:
         # Preserve the original file extension so FFmpeg can detect the
         # container format. Gradio always adds an extension for video
         # ── Extract frames (CFR-forced for VFR safety) ─────────────────
         progress(0.10, desc="Extracting frames…")
         frame_paths = extract_frames(
+            safe_video, ws.frames_dir, fps=working_fps,
             max_duration_s=PROCESS_DURATION_S,
         )
         total = len(frame_paths)
             if not is_prewarm_done():
                 progress(0.16, desc="Waiting for VACE checkpoint cache to finish prewarming…")
                 wait_for_prewarm()
+            # If prewarm raised, the cache is incomplete and the
+            # local_files_only=True from_pretrained calls inside
+            # _get_pipe would fail with a confusing cache-miss error.
+            # Surface the real cause and route the user to Fast mode.
+            err = get_prewarm_error()
+            if err is not None:
+                raise gr.Error(
+                    f"VACE checkpoint download failed: {err}. "
+                    f"Use Fast (LaMa) mode, or restart the Space to retry the download."
+                )
             _gpu_inpaint_vace(
                 frame_paths, crop_region, inpaint_mask,
                 ws.out_frames_dir, progress,
         # ── CPU: encode + mux ──────────────────────────────────────��────
         progress(0.95, desc="Encoding video…")
         silent_path = ws.path("silent.mp4")
+        frames_to_video(ws.out_frames_dir, silent_path, working_meta)
         # The final mp4 outlives the VideoWorkspace (returned to Gradio
         # for download), so it goes to the system tempdir, not ``ws``.

pipeline/vace.py CHANGED Viewed

@@ -127,9 +127,11 @@ NEGATIVE_PROMPT = (
 # populated before any user clicks "Quality" mode.
 _prewarm_thread: Optional[threading.Thread] = None
 def _prewarm_blocking() -> None:
     try:
         from huggingface_hub import snapshot_download
@@ -143,8 +145,9 @@ def _prewarm_blocking() -> None:
             allow_patterns=[VACE_LORA_FILE],
         )
         print("[VACE] Checkpoint cache pre-warmed.")
-    except Exception as exc:
-        print(f"[VACE] Pre-warm failed ({exc}); will download on first use.")
 def prewarm_vace_cache() -> None:
@@ -183,6 +186,17 @@ def is_prewarm_done() -> bool:
     return _prewarm_thread is None or not _prewarm_thread.is_alive()
 # ---------------------------------------------------------------------------
 # Pipeline singleton (cold load is expensive — keep it warm across calls)
 # ---------------------------------------------------------------------------

 # populated before any user clicks "Quality" mode.
 _prewarm_thread: Optional[threading.Thread] = None
+_prewarm_error: Optional[BaseException] = None
 def _prewarm_blocking() -> None:
+    global _prewarm_error
     try:
         from huggingface_hub import snapshot_download
             allow_patterns=[VACE_LORA_FILE],
         )
         print("[VACE] Checkpoint cache pre-warmed.")
+    except BaseException as exc:
+        _prewarm_error = exc
+        print(f"[VACE] Pre-warm failed ({exc}).")
 def prewarm_vace_cache() -> None:
     return _prewarm_thread is None or not _prewarm_thread.is_alive()
+def get_prewarm_error() -> Optional[BaseException]:
+    """Return the exception raised by the prewarm thread, if any.
+    A non-None return means the cache is **incomplete** and any
+    ``local_files_only=True`` from_pretrained call inside _get_pipe()
+    will fail. Callers should surface a friendly message and route the
+    user to Fast/LaMa mode instead of the cryptic cache-miss error.
+    """
+    return _prewarm_error
 # ---------------------------------------------------------------------------
 # Pipeline singleton (cold load is expensive — keep it warm across calls)
 # ---------------------------------------------------------------------------