Spaces:

MogensR
/

VideoBackgroundReplacer2

Configuration error

App Files Files Community

MogensR commited on Sep 16, 2025

Commit

9923851

1 Parent(s): 72f4052

agent 2.8

Browse files

Files changed (1) hide show

models/matanyone_loader.py +73 -50

models/matanyone_loader.py CHANGED Viewed

@@ -83,7 +83,7 @@ def _read_mask_hw(mask_path: Path, target_hw: Tuple[int, int]) -> np.ndarray:
     if mask.shape[:2] != (H, W):
         mask = cv2.resize(mask, (W, H), interpolation=cv2.INTER_LINEAR)
     maskf = (mask.astype(np.float32) / 255.0).clip(0.0, 1.0)
-    return maskf  # shape: (H, W), float32
 def _to_hwc01(img_bgr: np.ndarray) -> np.ndarray:
@@ -146,9 +146,10 @@ def __init__(self, device: Optional[str] = None, precision: str = "auto"):
         self._api_mode = None
         self._initialized = False
-        # chosen layouts after first frame succeeds
-        self._img_layout: Optional[str] = None     # 'HWC' or 'CHW'
-        self._mask_layout: Optional[str] = None    # 'HW', '1HW', or None
         self._lazy_init()
@@ -224,38 +225,60 @@ def _core_call(self, img_t: torch.Tensor, mask_t: Optional[torch.Tensor]):
             return self._core.process_frame(img_t, mask_t) if mask_t is not None else self._core.process_frame(img_t)
         raise MatAnyError("Internal error: unknown API mode")
     def _run_frame(self, frame_bgr: np.ndarray, seed_hw: Optional[np.ndarray], is_first: bool) -> np.ndarray:
         """
         Returns alpha matte as 2D np.float32 in [0,1].
         - On first frame, try several (image,mask) layout combos and remember the winner.
-        - On later frames, use the recorded layout (mask is None).
         """
         self._validate_input_frame(frame_bgr)
-        # Build both image layouts
-        img_hwc = _to_hwc01(frame_bgr)  # (H,W,3) float32
-        img_chw = _to_chw01(frame_bgr)  # (3,H,W)  float32
-        # Build mask layouts if seed is provided on first frame
-        mask_hw  = None
-        mask_1hw = None
-        if is_first and seed_hw is not None:
-            if seed_hw.ndim != 2:
-                raise MatAnyError(f"Internal: seed_hw must be HW; got {seed_hw.shape}")
-            mask_hw  = seed_hw.astype(np.float32, copy=False)  # (H,W)
-            mask_1hw = mask_hw[None, ...]                      # (1,H,W)
-        # If layout already chosen, use it
-        if self._img_layout is not None and (not is_first):
-            img_t = None
-            if self._img_layout == "HWC":
-                img_t = torch.from_numpy(img_hwc).to(self.device, dtype=torch.float32, non_blocking=True)
-            else:
-                img_t = torch.from_numpy(img_chw).to(self.device, dtype=torch.float32, non_blocking=True)
             with torch.no_grad(), self._maybe_amp():
                 out = self._core_call(img_t, None)
             alpha_np = out.detach().float().clamp(0, 1).squeeze().cpu().numpy() if isinstance(out, torch.Tensor) \
                        else np.asarray(out, dtype=np.float32)
             if alpha_np.max() > 1.0:
@@ -265,32 +288,35 @@ def _run_frame(self, frame_bgr: np.ndarray, seed_hw: Optional[np.ndarray], is_fi
                 raise MatAnyError(f"Expected 2D alpha matte; got shape {alpha_np.shape}")
             return alpha_np.astype(np.float32)
-        # Otherwise, probe possible combos on the first frame
-        attempts = []
-        if is_first:
-            attempts = [
-                ("HWC", "HW",  img_hwc, mask_hw),
-                ("HWC", "1HW", img_hwc, mask_1hw),
-                ("CHW", "HW",  img_chw, mask_hw),
-                ("CHW", "1HW", img_chw, mask_1hw),
-            ]
-        else:
-            # Should never reach here (later frames handled above), but keep a safe default
-            attempts = [("HWC", None, img_hwc, None), ("CHW", None, img_chw, None)]
         last_err = None
-        for img_layout, m_layout, img_np, m_np in attempts:
             try:
-                img_t = torch.from_numpy(img_np).to(self.device, dtype=torch.float32, non_blocking=True)
-                mask_t = None if m_np is None else torch.from_numpy(m_np).to(self.device, dtype=torch.float32, non_blocking=True)
                 with torch.no_grad(), self._maybe_amp():
                     out = self._core_call(img_t, mask_t)
-                # success → remember layout for subsequent frames
-                self._img_layout = img_layout
-                self._mask_layout = m_layout
-                log.info(f"[MATANY] Selected layouts: image={img_layout}, mask={m_layout}")
                 alpha_np = out.detach().float().clamp(0, 1).squeeze().cpu().numpy() if isinstance(out, torch.Tensor) \
                            else np.asarray(out, dtype=np.float32)
@@ -303,10 +329,8 @@ def _run_frame(self, frame_bgr: np.ndarray, seed_hw: Optional[np.ndarray], is_fi
             except Exception as e:
                 last_err = e
-                emsg = str(e)
-                log.warning(f"[MATANY] Layout attempt failed (image={img_layout}, mask={m_layout}): {emsg}")
-        # If we’re here, all attempts failed
         snap = _cuda_snapshot(self.device)
         raise MatAnyError(f"MatAnyone first-frame probe failed for all layouts. Last error: {last_err} | {snap}")
@@ -386,7 +410,6 @@ def process_stream(
                 # Compose outputs
                 alpha_u8  = (alpha_hw * 255.0 + 0.5).astype(np.uint8)
                 alpha_bgr = cv2.cvtColor(alpha_u8, cv2.COLOR_GRAY2BGR)
-                # alpha_hw already [0,1]
                 fg_bgr = (frame.astype(np.float32) * alpha_hw[..., None]).clip(0, 255).astype(np.uint8)
                 alpha_writer.write(alpha_bgr)

     if mask.shape[:2] != (H, W):
         mask = cv2.resize(mask, (W, H), interpolation=cv2.INTER_LINEAR)
     maskf = (mask.astype(np.float32) / 255.0).clip(0.0, 1.0)
+    return maskf  # (H, W)
 def _to_hwc01(img_bgr: np.ndarray) -> np.ndarray:
         self._api_mode = None
         self._initialized = False
+        # chosen builders after first frame succeeds
+        self._build_img = None   # Callable[[np.ndarray], torch.Tensor]
+        self._build_msk = None   # Optional[Callable[[np.ndarray], Optional[torch.Tensor]]]
+        self._layout_name = None
         self._lazy_init()
             return self._core.process_frame(img_t, mask_t) if mask_t is not None else self._core.process_frame(img_t)
         raise MatAnyError("Internal error: unknown API mode")
+    # ---- builders for probing ----
+    def _mk_builder_bchw(self) -> Tuple[str, Callable[[np.ndarray], torch.Tensor], Callable[[np.ndarray], Optional[torch.Tensor]]]:
+        def b_img(frame_bgr: np.ndarray) -> torch.Tensor:
+            chw = _to_chw01(frame_bgr)
+            return torch.from_numpy(chw).unsqueeze(0).contiguous().to(self.device, dtype=torch.float32, non_blocking=True)  # [1,3,H,W]
+        def b_msk(seed_hw: np.ndarray) -> torch.Tensor:
+            return torch.from_numpy(seed_hw).unsqueeze(0).unsqueeze(0).contiguous().to(self.device, dtype=torch.float32, non_blocking=True)  # [1,1,H,W]
+        return "BCHW+B1HW", b_img, b_msk
+    def _mk_builder_bchw_nomask(self) -> Tuple[str, Callable[[np.ndarray], torch.Tensor], Callable[[np.ndarray], Optional[torch.Tensor]]]:
+        def b_img(frame_bgr: np.ndarray) -> torch.Tensor:
+            chw = _to_chw01(frame_bgr)
+            return torch.from_numpy(chw).unsqueeze(0).contiguous().to(self.device, dtype=torch.float32, non_blocking=True)
+        def b_msk(_: np.ndarray) -> Optional[torch.Tensor]:
+            return None
+        return "BCHW+None", b_img, b_msk
+    def _mk_builder_btchw(self) -> Tuple[str, Callable[[np.ndarray], torch.Tensor], Callable[[np.ndarray], Optional[torch.Tensor]]]:
+        def b_img(frame_bgr: np.ndarray) -> torch.Tensor:
+            chw = _to_chw01(frame_bgr)
+            return torch.from_numpy(chw).unsqueeze(0).unsqueeze(1).contiguous().to(self.device, dtype=torch.float32, non_blocking=True)  # [1,1,3,H,W]
+        def b_msk(seed_hw: np.ndarray) -> torch.Tensor:
+            return torch.from_numpy(seed_hw).unsqueeze(0).unsqueeze(0).unsqueeze(0).contiguous().to(self.device, dtype=torch.float32, non_blocking=True)  # [1,1,1,H,W]
+        return "BTCHW+B1THW", b_img, b_msk
+    def _mk_builder_chw(self) -> Tuple[str, Callable[[np.ndarray], torch.Tensor], Callable[[np.ndarray], Optional[torch.Tensor]]]:
+        def b_img(frame_bgr: np.ndarray) -> torch.Tensor:
+            chw = _to_chw01(frame_bgr)
+            return torch.from_numpy(chw).contiguous().to(self.device, dtype=torch.float32, non_blocking=True)  # [3,H,W]
+        def b_msk(seed_hw: np.ndarray) -> torch.Tensor:
+            return torch.from_numpy(seed_hw).unsqueeze(0).contiguous().to(self.device, dtype=torch.float32, non_blocking=True)  # [1,H,W]
+        return "CHW+1HW", b_img, b_msk
+    def _mk_builder_hwc(self) -> Tuple[str, Callable[[np.ndarray], torch.Tensor], Callable[[np.ndarray], Optional[torch.Tensor]]]:
+        def b_img(frame_bgr: np.ndarray) -> torch.Tensor:
+            hwc = _to_hwc01(frame_bgr)
+            return torch.from_numpy(hwc).contiguous().to(self.device, dtype=torch.float32, non_blocking=True)  # [H,W,3]
+        def b_msk(seed_hw: np.ndarray) -> torch.Tensor:
+            return torch.from_numpy(seed_hw).contiguous().to(self.device, dtype=torch.float32, non_blocking=True)  # [H,W]
+        return "HWC+HW", b_img, b_msk
     def _run_frame(self, frame_bgr: np.ndarray, seed_hw: Optional[np.ndarray], is_first: bool) -> np.ndarray:
         """
         Returns alpha matte as 2D np.float32 in [0,1].
         - On first frame, try several (image,mask) layout combos and remember the winner.
+        - On later frames, use the recorded builders (mask is None).
         """
         self._validate_input_frame(frame_bgr)
+        # Later frames: use the memorized builders
+        if self._build_img is not None and not is_first:
+            img_t = self._build_img(frame_bgr)
             with torch.no_grad(), self._maybe_amp():
                 out = self._core_call(img_t, None)
             alpha_np = out.detach().float().clamp(0, 1).squeeze().cpu().numpy() if isinstance(out, torch.Tensor) \
                        else np.asarray(out, dtype=np.float32)
             if alpha_np.max() > 1.0:
                 raise MatAnyError(f"Expected 2D alpha matte; got shape {alpha_np.shape}")
             return alpha_np.astype(np.float32)
+        # First frame: probe combos
+        attempts = [
+            self._mk_builder_bchw(),         # [1,3,H,W] + [1,1,H,W]
+            self._mk_builder_bchw_nomask(),  # [1,3,H,W] + None
+            self._mk_builder_btchw(),        # [1,1,3,H,W] + [1,1,1,H,W]
+            self._mk_builder_chw(),          # [3,H,W] + [1,H,W]
+            self._mk_builder_hwc(),          # [H,W,3] + [H,W]
+        ]
         last_err = None
+        for name, mk_img, mk_msk in attempts:
             try:
+                img_t = mk_img(frame_bgr)
+                mask_t = None
+                if seed_hw is not None:
+                    mask_t = mk_msk(seed_hw)
+                log.info(f"[MATANY] Trying layout: {name} | img.shape={tuple(img_t.shape)}"
+                         f"{'' if mask_t is None else ' mask.shape=' + str(tuple(mask_t.shape))}")
                 with torch.no_grad(), self._maybe_amp():
                     out = self._core_call(img_t, mask_t)
+                # success → remember builders for subsequent frames
+                self._build_img = mk_img
+                # after first frame, we won't pass mask anymore
+                self._build_msk = mk_msk
+                self._layout_name = name
+                log.info(f"[MATANY] Selected layout: {name}")
                 alpha_np = out.detach().float().clamp(0, 1).squeeze().cpu().numpy() if isinstance(out, torch.Tensor) \
                            else np.asarray(out, dtype=np.float32)
             except Exception as e:
                 last_err = e
+                log.warning(f"[MATANY] Layout attempt failed ({name}): {e}")
         snap = _cuda_snapshot(self.device)
         raise MatAnyError(f"MatAnyone first-frame probe failed for all layouts. Last error: {last_err} | {snap}")
                 # Compose outputs
                 alpha_u8  = (alpha_hw * 255.0 + 0.5).astype(np.uint8)
                 alpha_bgr = cv2.cvtColor(alpha_u8, cv2.COLOR_GRAY2BGR)
                 fg_bgr = (frame.astype(np.float32) * alpha_hw[..., None]).clip(0, 255).astype(np.uint8)
                 alpha_writer.write(alpha_bgr)