Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 29, 2025

Commit

c01f936

1 Parent(s): bc45f7d

Update processing/video/video_processor.py

Browse files

Files changed (1) hide show

processing/video/video_processor.py +30 -323

processing/video/video_processor.py CHANGED Viewed

@@ -1,327 +1,9 @@
-#!/usr/bin/env python3
-"""
-Compatibility shim: CoreVideoProcessor (stabilized + crisper edges)
-- Accepts background configs:
-    {"custom_path": "/path/to/image.png"}
-    {"background_choice": "<preset_key>"}
-    {"gradient": {type, start, end, angle_deg}}
-- Model-only downscale (max_model_size) for speed, full-res render.
-- FFmpeg pipe writer with encoder fallbacks and stderr surfacing; falls back
-  to OpenCV VideoWriter if FFmpeg isn't available or fails mid-run.
-- Temporal smoothing + mask hardening to avoid flicker/ghosting.
-- Windowed two-phase execution (SAM2 window → release → MatAnyone window)
-  to avoid GPU fragmentation/OOM on T4 (16GB).
-Requirements for the models provider:
-- get_sam2() -> predictor or None
-- get_matanyone() -> processor or None
-"""
-from __future__ import annotations
-from dataclasses import dataclass
-from typing import Optional, Dict, Any, Callable, List, Tuple
-import os
-import time
-import threading
-import shutil
-import subprocess
-import shlex
-import cv2
-import numpy as np
-# Try project logger; fall back to std logging
-try:
-    from utils.logging_setup import make_logger
-    _log = make_logger(__name__)
-except Exception:
-    import logging
-    logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
-    _log = logging.getLogger(__name__)
-# Import directly from utils.cv_processing to avoid circular imports via utils/__init__.py
-from utils.cv_processing import (
-    segment_person_hq,
-    refine_mask_hq,
-    replace_background_hq,
-    create_professional_background,
-    validate_video_file,
-    PROFESSIONAL_BACKGROUNDS,
-)
-# ---------- local gradient helper (no extra imports needed) ----------
-def _to_rgb(c):
-    if isinstance(c, (list, tuple)) and len(c) == 3:
-        return tuple(int(x) for x in c)
-    if isinstance(c, str) and c.startswith("#") and len(c) == 7:
-        return tuple(int(c[i:i+2], 16) for i in (1, 3, 5))
-    return (255, 255, 255)
-def _create_gradient_background_local(spec: Dict[str, Any], width: int, height: int) -> np.ndarray:
-    """
-    Minimal gradient generator for backgrounds (linear with rotation).
-    spec = {"type": "linear"|"radial"(ignored), "start": (r,g,b)|"#rrggbb", "end": ..., "angle_deg": float}
-    Returns RGB np.uint8 (H,W,3)
-    """
-    start = _to_rgb(spec.get("start", "#222222"))
-    end   = _to_rgb(spec.get("end",   "#888888"))
-    angle = float(spec.get("angle_deg", 0))
-    # build vertical gradient
-    bg = np.zeros((height, width, 3), np.uint8)
-    for y in range(height):
-        t = y / max(1, height - 1)
-        r = int(start[0]*(1-t) + end[0]*t)
-        g = int(start[1]*(1-t) + end[1]*t)
-        b = int(start[2]*(1-t) + end[2]*t)
-        bg[y, :] = (r, g, b)
-    if abs(angle) % 360 < 1e-6:
-        return bg
-    # rotate by angle using OpenCV (RGB-safe)
-    center = (width / 2, height / 2)
-    M = cv2.getRotationMatrix2D(center, angle, 1.0)
-    rot = cv2.warpAffine(bg, M, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101)
-    return rot
-@dataclass
-class ProcessorConfig:
-    background_preset: str = "office"    # key in PROFESSIONAL_BACKGROUNDS
-    write_fps: Optional[float] = None    # None -> keep source fps
-    # Model-only downscale (speedup without changing output resolution)
-    max_model_size: Optional[int] = 1280
-    # FFmpeg / NVENC output (pipe). If disabled or unavailable, use OpenCV writer.
-    use_nvenc: bool = True
-    nvenc_codec: str = "h264"            # "h264" or "hevc"
-    nvenc_preset: str = "p5"             # NVENC preset string
-    nvenc_cq: int = 18                   # constant quality (lower = higher quality)
-    nvenc_tune_hq: bool = True
-    nvenc_pix_fmt: str = "yuv420p"       # browser-safe
-    # libx264 fallback
-    x264_preset: str = "medium"
-    x264_crf: int = 18
-    x264_pix_fmt: str = "yuv420p"
-    movflags_faststart: bool = True
-    # ---------- stability & edge quality ----------
-    temporal_ema_alpha: float = 0.75   # higher = calmer (0.6–0.85 typical)
-    min_iou_to_accept: float = 0.05    # reject sudden mask jumps
-    dilate_px: int = 6                 # pad edges to keep hair/ears/shoulders
-    edge_blur_px: int = 1              # tiny blur to calm edge shimmer
-    # hardening (turn soft mask into crisper 0/1)
-    hard_low: float = 0.35             # values below -> 0
-    hard_high: float = 0.70            # values above -> 1
-    mask_gamma: float = 0.90           # <1 boosts mid-tones slightly
-    # ---------- windowed two-phase control ----------
-    use_windowed: bool = True          # enable two-phase SAM2→MatAnyone per chunk
-    window_size: int = 8               # frames per window
-# Back-compat alias used elsewhere in the app
-ProcessingConfig = ProcessorConfig
-def _env_bool(name: str, default: bool) -> bool:
-    v = os.environ.get(name, None)
-    if v is None:
-        return default
-    return str(v).strip().lower() not in ("0", "no", "false", "off", "")
-def _env_int(name: str, default: int) -> int:
-    try:
-        return int(os.environ.get(name, "").strip() or default)
-    except Exception:
-        return default
-class _FFmpegPipe:
-    """
-    Wrapper around an FFmpeg stdin pipe with encoder fallbacks and good error messages.
-    """
-    def __init__(self, width: int, height: int, fps: float, out_path: str, cfg: ProcessorConfig, log=_log):
-        self.width = int(width)
-        self.height = int(height)
-        self.fps = float(fps) if fps and fps > 0 else 25.0
-        self.out_path = out_path
-        self.cfg = cfg
-        self.log = log
-        self.proc: Optional[subprocess.Popen] = None
-        self.encoder_used: Optional[str] = None
-        self._stderr: bytes | None = None
-        self._ffmpeg = shutil.which("ffmpeg")
-        if not self._ffmpeg:
-            raise RuntimeError("ffmpeg not found on PATH")
-        self._start_with_fallbacks()
-    def _cmd_for_encoder(self, encoder: str) -> list[str]:
-        base = [
-            self._ffmpeg,
-            "-hide_banner", "-loglevel", "error",
-            "-y",
-            # rawvideo input from stdin
-            "-f", "rawvideo",
-            "-vcodec", "rawvideo",
-            "-pix_fmt", "bgr24",
-            "-s", f"{self.width}x{self.height}",
-            "-r", f"{self.fps}",
-            "-i", "-",        # stdin
-            "-an",            # no audio here
-        ]
-        if self.cfg.movflags_faststart:
-            base += ["-movflags", "+faststart"]
-        if encoder == "h264_nvenc":
-            base += [
-                "-c:v", "h264_nvenc",
-                "-preset", self.cfg.nvenc_preset,
-                "-cq", str(int(self.cfg.nvenc_cq)),
-                "-pix_fmt", self.cfg.nvenc_pix_fmt,
-            ]
-            if self.cfg.nvenc_tune_hq:
-                base += ["-tune", "hq"]
-        elif encoder == "hevc_nvenc":
-            base += [
-                "-c:v", "hevc_nvenc",
-                "-preset", self.cfg.nvenc_preset,
-                "-cq", str(int(self.cfg.nvenc_cq)),
-                "-pix_fmt", self.cfg.nvenc_pix_fmt,
-            ]
-            if self.cfg.nvenc_tune_hq:
-                base += ["-tune", "hq"]
-        elif encoder == "libx264":
-            base += [
-                "-c:v", "libx264",
-                "-preset", self.cfg.x264_preset,
-                "-crf", str(int(self.cfg.x264_crf)),
-                "-pix_fmt", self.cfg.x264_pix_fmt,
-            ]
-        elif encoder == "mpeg4":
-            base += [
-                "-c:v", "mpeg4",
-                "-q:v", "2",
-                "-pix_fmt", "yuv420p",
-            ]
-        else:
-            base += ["-c:v", "libx264", "-preset", self.cfg.x264_preset, "-crf", str(int(self.cfg.x264_crf)), "-pix_fmt", self.cfg.x264_pix_fmt]
-        base += [self.out_path]
-        return base
-    def _try_start(self, enc: str) -> bool:
-        cmd = self._cmd_for_encoder(enc)
-        try:
-            self.proc = subprocess.Popen(
-                cmd,
-                stdin=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                bufsize=10**7,
-            )
-            self.encoder_used = enc
-            self.log.info("FFmpeg started: %s", " ".join(shlex.quote(c) for c in cmd))
-            # quick poll: if ffmpeg dies immediately, fail fast
-            time.sleep(0.05)
-            if self.proc.poll() is not None:
-                self._stderr = self.proc.stderr.read() if self.proc.stderr else b""
-                self.log.warning("FFmpeg exited on start with %s: %s", enc, (self._stderr or b"").decode(errors="ignore"))
-                self.proc = None
-                return False
-            return True
-        except Exception as e:
-            self.log.warning("Failed to start FFmpeg with %s: %s", enc, e)
-            self.proc = None
-            return False
-    def _start_with_fallbacks(self):
-        encoders = []
-        if self.cfg.use_nvenc:
-            encoders += ["h264_nvenc"] if self.cfg.nvenc_codec.lower() == "h264" else ["hevc_nvenc"]
-        encoders += ["libx264", "mpeg4"]
-        for enc in encoders:
-            if self._try_start(enc):
-                return
-        msg = "Could not start FFmpeg with any encoder (nvenc/libx264/mpeg4). Is ffmpeg present and codecs available?"
-        if self._stderr:
-            msg += f" Stderr: {(self._stderr or b'').decode(errors='ignore')[:500]}"
-        raise RuntimeError(msg)
-    def write(self, frame_bgr: np.ndarray):
-        if self.proc is None or self.proc.stdin is None:
-            raise RuntimeError("FFmpeg process is not running (stdin is None).")
-        if not isinstance(frame_bgr, np.ndarray) or frame_bgr.dtype != np.uint8:
-            raise ValueError("Frame must be a np.ndarray of dtype uint8.")
-        if frame_bgr.ndim != 3 or frame_bgr.shape[2] != 3:
-            raise ValueError("Frame must have shape (H, W, 3).")
-        if frame_bgr.shape[0] != self.height or frame_bgr.shape[1] != self.width:
-            raise ValueError(f"Frame size mismatch. Expected {self.width}x{self.height}, got {frame_bgr.shape[1]}x{frame_bgr.shape[0]}.")
-        frame_bgr = np.ascontiguousarray(frame_bgr)
-        try:
-            self.proc.stdin.write(frame_bgr.tobytes())
-        except Exception as e:
-            stderr = b""
-            try:
-                if self.proc and self.proc.stderr:
-                    stderr = self.proc.stderr.read()
-            except Exception:
-                pass
-            msg = f"FFmpeg pipe write failed: {e}"
-            if stderr:
-                msg += f"\nffmpeg stderr: {(stderr or b'').decode(errors='ignore')[:1000]}"
-            raise BrokenPipeError(msg)
-    def close(self):
-        if self.proc is None:
-            return
-        try:
-            if self.proc.stdin:
-                try:
-                    self.proc.stdin.flush()
-                except Exception:
-                    pass
-                try:
-                    self.proc.stdin.close()
-                except Exception:
-                    pass
-            if self.proc.stderr:
-                try:
-                    err = self.proc.stderr.read()
-                    if err:
-                        self.log.debug("FFmpeg stderr (tail): %s", err.decode(errors="ignore")[-2000:])
-                except Exception:
-                    pass
-            self.proc.wait(timeout=10)
-        except Exception:
-            try:
-                self.proc.kill()
-            except Exception:
-                pass
-        finally:
-            self.proc = None
 class CoreVideoProcessor:
     """
     Minimal, safe implementation used by core/app.py.
-    It relies on a models provider (e.g., ModelLoader) that implements:
-        - get_sam2()
-        - get_matanyone()
-    and uses utils.cv_processing for the pipeline.
-    Supports progress callback and cancellation via stop_event.
     """
     def __init__(self, config: Optional[ProcessorConfig] = None, models: Optional[Any] = None):
@@ -331,6 +13,7 @@ def __init__(self, config: Optional[ProcessorConfig] = None, models: Optional[An
         if self.models is None:
             self.log.warning("CoreVideoProcessor initialized without a models provider; will use fallbacks.")
         self._ffmpeg = shutil.which("ffmpeg")
         # -------- Back-compat safe config flags (do not require attrs on user config)
         self._use_windowed = _env_bool(
@@ -350,6 +33,33 @@ def __init__(self, config: Optional[ProcessorConfig] = None, models: Optional[An
             self._chunk_size = 12
         self._chunk_idx = 0
     # ---------- mask post-processing (stability + crispness) ----------
     def _iou(self, a: np.ndarray, b: np.ndarray, thr: float = 0.5) -> float:
         a_bin = (a >= thr).astype(np.uint8)
@@ -826,6 +536,3 @@ def process_video(
             "output_path": output_path,
         }
-# Backward-compat alias used elsewhere
-VideoProcessor = CoreVideoProcessor

+# ... (all your imports and CoreVideoProcessor class header/attributes as above)
 class CoreVideoProcessor:
     """
     Minimal, safe implementation used by core/app.py.
+    ...
     """
     def __init__(self, config: Optional[ProcessorConfig] = None, models: Optional[Any] = None):
         if self.models is None:
             self.log.warning("CoreVideoProcessor initialized without a models provider; will use fallbacks.")
         self._ffmpeg = shutil.which("ffmpeg")
+        # (rest as before...)
         # -------- Back-compat safe config flags (do not require attrs on user config)
         self._use_windowed = _env_bool(
             self._chunk_size = 12
         self._chunk_idx = 0
+    # ---------------- ADDED METHOD ----------------
+    def prepare_background(self, background_choice: str, custom_background_path: Optional[str], width: int, height: int) -> np.ndarray:
+        """
+        Prepares a background image for compositing.
+        If a valid custom background path is given, loads and resizes it. Otherwise, uses a preset.
+        Returns: np.ndarray RGB (H, W, 3) uint8
+        """
+        import cv2
+        from utils.cv_processing import create_professional_background
+        if custom_background_path:
+            try:
+                img = cv2.imread(custom_background_path, cv2.IMREAD_COLOR)
+                if img is not None:
+                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+                    img = cv2.resize(img, (width, height), interpolation=cv2.INTER_LANCZOS4)
+                    return img
+                else:
+                    self.log.warning(f"Failed to load custom background from '{custom_background_path}', using preset.")
+            except Exception as e:
+                self.log.warning(f"Exception loading custom background: {e}, using preset.")
+        # fallback to preset
+        return create_professional_background(background_choice, width, height)
+    # (rest of class unchanged...)
     # ---------- mask post-processing (stability + crispness) ----------
     def _iou(self, a: np.ndarray, b: np.ndarray, thr: float = 0.5) -> float:
         a_bin = (a >= thr).astype(np.uint8)
             "output_path": output_path,
         }