Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 28, 2025

Commit

4f1de42

1 Parent(s): f7afe05

Update models/loaders/matanyone_loader.py

Browse files

Files changed (1) hide show

models/loaders/matanyone_loader.py +9 -6

models/loaders/matanyone_loader.py CHANGED Viewed

@@ -232,8 +232,11 @@ def __call__(self, image, mask=None, **kwargs) -> np.ndarray:
                 img_in = _resize_bchw(img_bchw, (th, tw), is_mask=False)
                 msk_in = _resize_bchw(msk_b1hw, (th, tw), is_mask=True) if msk_b1hw is not None else None
-                img_chw = _to_chw_image(img_in)
-                m_1hw  = _to_1hw_mask(msk_in) if msk_in is not None else None
                 # inference with autocast + inference_mode
                 with torch.inference_mode():
@@ -247,12 +250,13 @@ def __exit__(self, *args): return False
                     with amp_ctx:
                         if not self.started:
-                            if m_1hw is None:
                                 logger.warning("First frame arrived without a mask; returning neutral alpha.")
                                 return np.full((H, W), 0.5, dtype=np.float32)
-                            # encode/memorize
-                            _ = self.core.step(image=img_chw, mask=m_1hw)
                             # warm-up predict
                             if self._has_first_frame_pred:
                                 out_prob = self.core.step(image=img_chw, first_frame_pred=True)
@@ -455,4 +459,3 @@ def get_info(self) -> Dict[str, Any]:
     # Optional: instance-level shape debugging
     def debug_shapes(self, image, mask, tag: str = ""):
         debug_shapes(tag, image, mask)

                 img_in = _resize_bchw(img_bchw, (th, tw), is_mask=False)
                 msk_in = _resize_bchw(msk_b1hw, (th, tw), is_mask=True) if msk_b1hw is not None else None
+                # ---- IMPORTANT SHAPE CHANGES (only edit) ----
+                img_chw = _to_chw_image(img_in).contiguous()                  # [C,H,W]
+                m_1hw  = _to_1hw_mask(msk_in) if msk_in is not None else None # [1,H,W] or None
+                mask_2d = m_1hw[0].contiguous() if m_1hw is not None else None # [H,W] or None
+                # ------------------------------------------------
                 # inference with autocast + inference_mode
                 with torch.inference_mode():
                     with amp_ctx:
                         if not self.started:
+                            if mask_2d is None:
                                 logger.warning("First frame arrived without a mask; returning neutral alpha.")
                                 return np.full((H, W), 0.5, dtype=np.float32)
+                            # encode/memorize — pass 2-D mask (H,W)
+                            _ = self.core.step(image=img_chw, mask=mask_2d)
                             # warm-up predict
                             if self._has_first_frame_pred:
                                 out_prob = self.core.step(image=img_chw, first_frame_pred=True)
     # Optional: instance-level shape debugging
     def debug_shapes(self, image, mask, tag: str = ""):
         debug_shapes(tag, image, mask)