Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Sep 11, 2025

Commit

0319b0d

verified ·

1 Parent(s): df850a4

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -16

app.py CHANGED Viewed

@@ -407,19 +407,31 @@ def create_mask(self, image_rgb: np.ndarray) -> Optional[np.ndarray]:
 # =============================================================================
 # CHAPTER 6: MATANYONE HANDLER (First-frame PROB mask)
-# =============================================================================
 class MatAnyoneHandler:
     def __init__(self):
         self.core = None
         self.initialized = False
     # ----- tensor helpers -----
     def _to_chw_float(self, img01: np.ndarray) -> "torch.Tensor":
         assert img01.ndim == 3 and img01.shape[2] == 3, f"Expected HxWx3, got {img01.shape}"
         t = torch.from_numpy(img01.transpose(2, 0, 1)).contiguous().float()  # 3xHxW
         return t.to(DEVICE, non_blocking=CUDA_AVAILABLE)
     def _prob_from_mask_u8(self, mask_u8: np.ndarray, w: int, h: int) -> "torch.Tensor":
         if mask_u8.shape[0] != h or mask_u8.shape[1] != w:
             mask_u8 = cv2.resize(mask_u8, (w, h), interpolation=cv2.INTER_NEAREST)
         prob = (mask_u8.astype(np.float32) / 255.0)[None, ...]  # 1xHxW
@@ -427,8 +439,14 @@ def _prob_from_mask_u8(self, mask_u8: np.ndarray, w: int, h: int) -> "torch.Tens
         return t.to(DEVICE, non_blocking=CUDA_AVAILABLE)
     def _alpha_to_u8_hw(self, alpha_like) -> np.ndarray:
         if isinstance(alpha_like, (list, tuple)) and len(alpha_like) > 1:
             alpha_like = alpha_like[1]  # handle (indices, probs)
         if isinstance(alpha_like, torch.Tensor):
             t = alpha_like.detach()
             if t.is_cuda:
@@ -437,14 +455,14 @@ def _alpha_to_u8_hw(self, alpha_like) -> np.ndarray:
         else:
             a = np.asarray(alpha_like, dtype=np.float32)
             a = np.clip(a, 0, 1)
         a = np.squeeze(a)
         if a.ndim != 2:
-            # handle shapes (1,H,W) or (K,H,W) → pick first
-            if a.ndim == 3 and a.shape[0] >= 1:
-                a = a[0]
-            else:
-                raise ValueError(f"Alpha must be HxW; got {a.shape}")
-        return (np.clip(a * 255.0, 0, 255).astype(np.uint8))
     def initialize(self) -> bool:
         if not TORCH_AVAILABLE:
@@ -485,8 +503,20 @@ def initialize(self) -> bool:
                 state.matanyone_error = f"MatAnyone init error: {e}"
                 return False
-    # ----- video matting using first-frame PROB mask -----
     def process_video(self, input_path: str, mask_path: str, output_path: str) -> str:
         if not self.initialized or self.core is None:
             raise RuntimeError("MatAnyone not initialized")
@@ -502,34 +532,43 @@ def process_video(self, input_path: str, mask_path: str, output_path: str) -> st
         w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
         seed_mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
         if seed_mask is None:
             cap.release()
             raise RuntimeError("Seed mask read failed")
         tmp_dir = TEMP_DIR / f"ma_{int(time.time())}_{random.randint(1000,9999)}"
         tmp_dir.mkdir(parents=True, exist_ok=True)
         memory_manager.register_temp_file(str(tmp_dir))
         frame_idx = 0
-        # First frame (with PROB mask)
         ok, frame_bgr = cap.read()
         if not ok or frame_bgr is None:
             cap.release()
             raise RuntimeError("Empty first frame")
         frame_rgb01 = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
-        img_chw = self._to_chw_float(frame_rgb01)             # 3xHxW
-        prob_chw = self._prob_from_mask_u8(seed_mask, w, h)   # 1xHxW
         with torch.no_grad():
-            out_prob = self.core.step(img_chw, prob=prob_chw, matting=True)
         alpha_u8 = self._alpha_to_u8_hw(out_prob)
         cv2.imwrite(str(tmp_dir / f"{frame_idx:06d}.png"), alpha_u8)
         frame_idx += 1
-        # Remaining frames (no mask)
         while True:
             ok, frame_bgr = cap.read()
             if not ok or frame_bgr is None:
@@ -538,7 +577,10 @@ def process_video(self, input_path: str, mask_path: str, output_path: str) -> st
             img_chw = self._to_chw_float(frame_rgb01)
             with torch.no_grad():
-                out_prob = self.core.step(img_chw)
             alpha_u8 = self._alpha_to_u8_hw(out_prob)
             cv2.imwrite(str(tmp_dir / f"{frame_idx:06d}.png"), alpha_u8)
@@ -546,7 +588,7 @@ def process_video(self, input_path: str, mask_path: str, output_path: str) -> st
         cap.release()
-        # Build MP4 from alpha pngs
         list_file = tmp_dir / "list.txt"
         with open(list_file, "w") as f:
             for i in range(frame_idx):
@@ -554,7 +596,8 @@ def process_video(self, input_path: str, mask_path: str, output_path: str) -> st
         cmd = [
             "ffmpeg", "-y", "-hide_banner", "-loglevel", "error",
-            "-f", "concat", "-safe", "0", "-r", f"{fps:.6f}",
             "-i", str(list_file),
             "-vf", f"format=gray,scale={w}:{h}:flags=area",
             "-pix_fmt", "yuv420p",
@@ -564,6 +607,7 @@ def process_video(self, input_path: str, mask_path: str, output_path: str) -> st
         subprocess.run(cmd, check=True)
         return str(alpha_path)
 # =============================================================================
 # CHAPTER 7: AI BACKGROUNDS
 # =============================================================================

 # =============================================================================
 # CHAPTER 6: MATANYONE HANDLER (First-frame PROB mask)
+# ============================================================================
 class MatAnyoneHandler:
+    """
+    MatAnyone loader + inference adapter.
+    Key points:
+    - Uses first-frame *soft probability* seed (1xHxW float in [0,1]), not an index mask.
+    - Calls InferenceCore.step with the prob map as a **positional** arg (some builds reject `prob=`).
+    - Tries `matting=True` when supported; falls back if the kwarg is not available.
+    - Always feeds CHW tensors for images (3,H,W) and 1xHxW for probs — no extra batch dims.
+    """
     def __init__(self):
         self.core = None
         self.initialized = False
     # ----- tensor helpers -----
     def _to_chw_float(self, img01: np.ndarray) -> "torch.Tensor":
+        """img01: HxWx3 in [0,1] -> torch float 3xHxW on DEVICE"""
         assert img01.ndim == 3 and img01.shape[2] == 3, f"Expected HxWx3, got {img01.shape}"
         t = torch.from_numpy(img01.transpose(2, 0, 1)).contiguous().float()  # 3xHxW
         return t.to(DEVICE, non_blocking=CUDA_AVAILABLE)
     def _prob_from_mask_u8(self, mask_u8: np.ndarray, w: int, h: int) -> "torch.Tensor":
+        """mask_u8: HxW uint8 -> torch float 1xHxW on DEVICE, resized to (w,h) if needed"""
         if mask_u8.shape[0] != h or mask_u8.shape[1] != w:
             mask_u8 = cv2.resize(mask_u8, (w, h), interpolation=cv2.INTER_NEAREST)
         prob = (mask_u8.astype(np.float32) / 255.0)[None, ...]  # 1xHxW
         return t.to(DEVICE, non_blocking=CUDA_AVAILABLE)
     def _alpha_to_u8_hw(self, alpha_like) -> np.ndarray:
+        """
+        Accepts torch Tensor or numpy-like. Returns uint8 HxW (0..255).
+        Handles shapes (H,W), (1,H,W), or (K,H,W) -> picks first channel.
+        Also handles MatAnyone tuples/lists like (indices, probs) by taking the 2nd item.
+        """
         if isinstance(alpha_like, (list, tuple)) and len(alpha_like) > 1:
             alpha_like = alpha_like[1]  # handle (indices, probs)
         if isinstance(alpha_like, torch.Tensor):
             t = alpha_like.detach()
             if t.is_cuda:
         else:
             a = np.asarray(alpha_like, dtype=np.float32)
             a = np.clip(a, 0, 1)
         a = np.squeeze(a)
+        if a.ndim == 3 and a.shape[0] >= 1:
+            a = a[0]
         if a.ndim != 2:
+            raise ValueError(f"Alpha must be HxW; got {a.shape}")
+        return np.clip(a * 255.0, 0, 255).astype(np.uint8)
     def initialize(self) -> bool:
         if not TORCH_AVAILABLE:
                 state.matanyone_error = f"MatAnyone init error: {e}"
                 return False
+    # ----- video matting using first-frame PROB mask (PATCHED) -----
     def process_video(self, input_path: str, mask_path: str, output_path: str) -> str:
+        """
+        Produce a single-channel alpha mp4 matching input fps & size.
+        First frame:
+          - Generate soft prob (1,H,W) from SAM2 mask and pass as positional arg to step().
+          - Try step(image, prob, matting=True); if TypeError, call step(image, prob).
+        Remaining frames:
+          - Try step(image, matting=True); fallback to step(image).
+        Returns: path to alpha.mp4
+        """
         if not self.initialized or self.core is None:
             raise RuntimeError("MatAnyone not initialized")
         w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        # soft seed prob (1,H,W) in [0,1]
         seed_mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
         if seed_mask is None:
             cap.release()
             raise RuntimeError("Seed mask read failed")
+        prob_1hw = self._prob_from_mask_u8(seed_mask, w, h)  # (1,H,W) float
+        # temp frames
         tmp_dir = TEMP_DIR / f"ma_{int(time.time())}_{random.randint(1000,9999)}"
         tmp_dir.mkdir(parents=True, exist_ok=True)
         memory_manager.register_temp_file(str(tmp_dir))
+        def _step_with_prob(image_chw: "torch.Tensor", prob_1hw_t: "torch.Tensor"):
+            """Call step with positional prob; fall back if 'matting' kwarg unsupported."""
+            try:
+                return self.core.step(image_chw, prob_1hw_t, matting=True)
+            except TypeError:
+                return self.core.step(image_chw, prob_1hw_t)
         frame_idx = 0
+        # --- first frame (with soft prob) ---
         ok, frame_bgr = cap.read()
         if not ok or frame_bgr is None:
             cap.release()
             raise RuntimeError("Empty first frame")
         frame_rgb01 = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
+        img_chw = self._to_chw_float(frame_rgb01)  # (3,H,W)
         with torch.no_grad():
+            out_prob = _step_with_prob(img_chw, prob_1hw)
         alpha_u8 = self._alpha_to_u8_hw(out_prob)
         cv2.imwrite(str(tmp_dir / f"{frame_idx:06d}.png"), alpha_u8)
         frame_idx += 1
+        # --- remaining frames (no seed) ---
         while True:
             ok, frame_bgr = cap.read()
             if not ok or frame_bgr is None:
             img_chw = self._to_chw_float(frame_rgb01)
             with torch.no_grad():
+                try:
+                    out_prob = self.core.step(img_chw, matting=True)
+                except TypeError:
+                    out_prob = self.core.step(img_chw)
             alpha_u8 = self._alpha_to_u8_hw(out_prob)
             cv2.imwrite(str(tmp_dir / f"{frame_idx:06d}.png"), alpha_u8)
         cap.release()
+        # --- encode PNGs → alpha mp4 ---
         list_file = tmp_dir / "list.txt"
         with open(list_file, "w") as f:
             for i in range(frame_idx):
         cmd = [
             "ffmpeg", "-y", "-hide_banner", "-loglevel", "error",
+            "-f", "concat", "-safe", "0",
+            "-r", f"{fps:.6f}",
             "-i", str(list_file),
             "-vf", f"format=gray,scale={w}:{h}:flags=area",
             "-pix_fmt", "yuv420p",
         subprocess.run(cmd, check=True)
         return str(alpha_path)
 # =============================================================================
 # CHAPTER 7: AI BACKGROUNDS
 # =============================================================================