Spaces:

JackIsNotInTheBox
/

watermark_remover

Paused

BoxOfColors Claude Opus 4.7 (1M context) commited on Apr 27

Commit

f2818e6

1 Parent(s): ed7d6c7

refactor: round 3 — stdout-pipe first-frame extract, _card_title, _msg helpers

pipeline/video.py
- Replace extract_first_frame (file-based) with extract_first_frame_array
that streams the PNG through ffmpeg's stdout pipe and returns an RGB
ndarray directly. Eliminates the mkstemp/open/unlink dance the only
caller (on_video_upload) had to perform — and removes a tempfile
artifact from /tmp on every upload.

app.py
- _card_title(text, step?, top_margin?) helper replaces 6 hand-built
'<div class="card-title">…</div>' gr.HTML calls in the UI block.
- on_video_upload: nested _msg(text) helper builds the (editor, crop,
state, status) 4-tuple for early-error returns. Drops the repetitive
``return gr.update(), gr.update(), None, "…"`` pattern at every guard.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (2) hide show

app.py +28 -30
pipeline/video.py +19 -23

app.py CHANGED Viewed

@@ -45,7 +45,7 @@ from pipeline.crop import (
 )
 from pipeline.video import (
     VideoMeta, VideoWorkspace,
-    attach_audio, extract_first_frame, extract_frames, frames_to_video, probe,
 )
 from pipeline.vace import prewarm_vace_cache
@@ -198,8 +198,12 @@ def _meta_from_state(d: dict) -> VideoMeta:
 def on_video_upload(video_path: str | None):
     """Extract first frame and populate the ImageEditor."""
     if not video_path:
-        return gr.update(), gr.update(), None, "Upload a video to begin."
     try:
         meta = probe(video_path)
@@ -211,37 +215,24 @@ def on_video_upload(video_path: str | None):
         # arbitrarily long clip through.
         max_frames = round(UPLOAD_DURATION_S * max(meta.fps, 1.0))
         if meta.duration_s > UPLOAD_DURATION_S:
-            return (
-                gr.update(), gr.update(), None,
                 f"❌ Clip too long ({meta.duration_s:.1f}s). "
                 f"Max {UPLOAD_DURATION_S:.0f}s; only the first "
-                f"{PROCESS_DURATION_S:.0f}s would be processed anyway.",
             )
         if meta.frame_count > max_frames:
-            return (
-                gr.update(), gr.update(), None,
-                f"❌ Clip too long ({meta.frame_count} frames at {meta.fps:.2f} fps). "
-                f"Max {UPLOAD_DURATION_S:.0f} seconds.",
             )
         if meta.width * meta.height > MAX_UPLOAD_W * MAX_UPLOAD_H:
-            return (
-                gr.update(), gr.update(), None,
                 f"❌ Resolution too high ({meta.width}×{meta.height}). "
-                f"Max {MAX_UPLOAD_W}×{MAX_UPLOAD_H}.",
             )
         will_trim = meta.duration_s > PROCESS_DURATION_S
-        # Extract first frame — mkstemp so the fd is closed before FFmpeg writes
-        fd, tmp_path = tempfile.mkstemp(suffix=".png", prefix="wm_frame_")
-        os.close(fd)
-        try:
-            extract_first_frame(video_path, tmp_path)
-            first_frame = np.array(Image.open(tmp_path).convert("RGB"))
-        finally:
-            try:
-                os.unlink(tmp_path)
-            except OSError:
-                pass
         meta_str = (
             f"{meta.width}×{meta.height} · {meta.fps:.3g} fps · "
@@ -268,7 +259,7 @@ def on_video_upload(video_path: str | None):
             f"\n\nNow draw over the watermark with the brush tool.",
         )
     except Exception as e:
-        return gr.update(), gr.update(), None, f"❌ Error: {e}"
 def on_preview_crop(editor_value: dict | None, meta_state: dict | None, context_px: int):
@@ -553,6 +544,13 @@ def run_pipeline(
 # UI
 # ---------------------------------------------------------------------------
 with gr.Blocks(title="Video Watermark Remover", css=CSS) as demo:
     # State
@@ -569,7 +567,7 @@ with gr.Blocks(title="Video Watermark Remover", css=CSS) as demo:
     # ── Step 1 + 2 side by side ─────────────────────────────────────────────
     with gr.Row(equal_height=False):
         with gr.Column(scale=1):
-            gr.HTML('<div class="card-title"><span class="step-badge">1</span>Upload Video</div>')
             video_input = gr.Video(
                 label=(
                     f"Source clip (up to {UPLOAD_DURATION_S:.0f}s, "
@@ -579,7 +577,7 @@ with gr.Blocks(title="Video Watermark Remover", css=CSS) as demo:
                 elem_id="video-input",
             )
-            gr.HTML('<div class="card-title" style="margin-top:16px"><span class="step-badge">2</span>Mode</div>')
             mode_radio = gr.Radio(
                 choices=list(ALL_MODES),
                 value=MODE_FAST,
@@ -587,7 +585,7 @@ with gr.Blocks(title="Video Watermark Remover", css=CSS) as demo:
                 elem_classes=["mode-radio"],
             )
-            gr.HTML('<div class="card-title" style="margin-top:16px">⚙️ Advanced</div>')
             context_slider = gr.Slider(
                 minimum=32,
                 maximum=192,
@@ -598,7 +596,7 @@ with gr.Blocks(title="Video Watermark Remover", css=CSS) as demo:
             )
         with gr.Column(scale=2):
-            gr.HTML('<div class="card-title"><span class="step-badge">3</span>Draw Over the Watermark</div>')
             editor = gr.ImageEditor(
                 label="Paint over the watermark (brush tool)",
                 type="numpy",
@@ -639,14 +637,14 @@ with gr.Blocks(title="Video Watermark Remover", css=CSS) as demo:
     # ── Outputs ──────────────────────────────────────────────────────────────
     with gr.Row():
         with gr.Column():
-            gr.HTML('<div class="card-title">Crop Preview</div>')
             crop_preview = gr.Image(
                 label="",
                 type="numpy",
                 show_label=False,
             )
         with gr.Column():
-            gr.HTML('<div class="card-title">Output Video</div>')
             video_output = gr.Video(
                 label="",
                 show_label=False,

 )
 from pipeline.video import (
     VideoMeta, VideoWorkspace,
+    attach_audio, extract_first_frame_array, extract_frames, frames_to_video, probe,
 )
 from pipeline.vace import prewarm_vace_cache
 def on_video_upload(video_path: str | None):
     """Extract first frame and populate the ImageEditor."""
+    # Tuple shape: (editor_update, crop_preview_update, meta_state, status).
+    def _msg(text: str):
+        return gr.update(), gr.update(), None, text
     if not video_path:
+        return _msg("Upload a video to begin.")
     try:
         meta = probe(video_path)
         # arbitrarily long clip through.
         max_frames = round(UPLOAD_DURATION_S * max(meta.fps, 1.0))
         if meta.duration_s > UPLOAD_DURATION_S:
+            return _msg(
                 f"❌ Clip too long ({meta.duration_s:.1f}s). "
                 f"Max {UPLOAD_DURATION_S:.0f}s; only the first "
+                f"{PROCESS_DURATION_S:.0f}s would be processed anyway."
             )
         if meta.frame_count > max_frames:
+            return _msg(
+                f"❌ Clip too long ({meta.frame_count} frames at "
+                f"{meta.fps:.2f} fps). Max {UPLOAD_DURATION_S:.0f} seconds."
             )
         if meta.width * meta.height > MAX_UPLOAD_W * MAX_UPLOAD_H:
+            return _msg(
                 f"❌ Resolution too high ({meta.width}×{meta.height}). "
+                f"Max {MAX_UPLOAD_W}×{MAX_UPLOAD_H}."
             )
         will_trim = meta.duration_s > PROCESS_DURATION_S
+        first_frame = extract_first_frame_array(video_path)
         meta_str = (
             f"{meta.width}×{meta.height} · {meta.fps:.3g} fps · "
             f"\n\nNow draw over the watermark with the brush tool.",
         )
     except Exception as e:
+        return _msg(f"❌ Error: {e}")
 def on_preview_crop(editor_value: dict | None, meta_state: dict | None, context_px: int):
 # UI
 # ---------------------------------------------------------------------------
+def _card_title(text: str, step: int | None = None, top_margin: bool = False) -> gr.HTML:
+    """Render a card heading. ``step`` adds the numbered badge prefix."""
+    margin = ' style="margin-top:16px"' if top_margin else ""
+    badge = f'<span class="step-badge">{step}</span>' if step is not None else ""
+    return gr.HTML(f'<div class="card-title"{margin}>{badge}{text}</div>')
 with gr.Blocks(title="Video Watermark Remover", css=CSS) as demo:
     # State
     # ── Step 1 + 2 side by side ─────────────────────────────────────────────
     with gr.Row(equal_height=False):
         with gr.Column(scale=1):
+            _card_title("Upload Video", step=1)
             video_input = gr.Video(
                 label=(
                     f"Source clip (up to {UPLOAD_DURATION_S:.0f}s, "
                 elem_id="video-input",
             )
+            _card_title("Mode", step=2, top_margin=True)
             mode_radio = gr.Radio(
                 choices=list(ALL_MODES),
                 value=MODE_FAST,
                 elem_classes=["mode-radio"],
             )
+            _card_title("⚙️ Advanced", top_margin=True)
             context_slider = gr.Slider(
                 minimum=32,
                 maximum=192,
             )
         with gr.Column(scale=2):
+            _card_title("Draw Over the Watermark", step=3)
             editor = gr.ImageEditor(
                 label="Paint over the watermark (brush tool)",
                 type="numpy",
     # ── Outputs ──────────────────────────────────────────────────────────────
     with gr.Row():
         with gr.Column():
+            _card_title("Crop Preview")
             crop_preview = gr.Image(
                 label="",
                 type="numpy",
                 show_label=False,
             )
         with gr.Column():
+            _card_title("Output Video")
             video_output = gr.Video(
                 label="",
                 show_label=False,

pipeline/video.py CHANGED Viewed

@@ -16,6 +16,7 @@ clean error messages to the Gradio UI.
 from __future__ import annotations
 import json
 import math
 import shutil
@@ -25,6 +26,9 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import List, Optional
 # ---------------------------------------------------------------------------
 # Video metadata
@@ -222,37 +226,29 @@ def extract_frames(
     return frames
-def extract_first_frame(video_path: str | Path, out_path: str | Path) -> Path:
-    """
-    Extract only the first frame, e.g. for mask drawing in the UI.
-    Parameters
-    ----------
-    video_path : str | Path
-    out_path : str | Path
-        Path to write the PNG (parent directory must exist).
-    Returns
-    -------
-    Path
-        Same as out_path, now guaranteed to exist.
     """
-    out_path = Path(out_path)
-    out_path.parent.mkdir(parents=True, exist_ok=True)
     cmd = [
         "ffmpeg",
         "-y",
         "-i", str(video_path),
         "-frames:v", "1",
-        "-update", "1",
-        str(out_path),
     ]
-    _run(cmd)
-    if not out_path.exists():
-        raise RuntimeError(f"First frame extraction failed for {video_path}")
-    return out_path
 # ---------------------------------------------------------------------------

 from __future__ import annotations
+import io
 import json
 import math
 import shutil
 from pathlib import Path
 from typing import List, Optional
+import numpy as np
+from PIL import Image
 # ---------------------------------------------------------------------------
 # Video metadata
     return frames
+def extract_first_frame_array(video_path: str | Path) -> np.ndarray:
+    """Extract the first frame of *video_path* as an RGB uint8 ndarray.
+    Streams the PNG through stdout — no on-disk temp file needed. Used by
+    the UI to populate the mask-drawing editor without leaving artefacts
+    in /tmp on each upload.
     """
     cmd = [
         "ffmpeg",
         "-y",
         "-i", str(video_path),
         "-frames:v", "1",
+        "-f", "image2pipe",
+        "-c:v", "png",
+        "pipe:1",
     ]
+    result = subprocess.run(cmd, capture_output=True)
+    if result.returncode != 0 or not result.stdout:
+        stderr = result.stderr.decode("utf-8", errors="replace")[-500:]
+        raise RuntimeError(
+            f"First-frame extraction failed for {video_path}.\nstderr: {stderr}"
+        )
+    return np.array(Image.open(io.BytesIO(result.stdout)).convert("RGB"))
 # ---------------------------------------------------------------------------