Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 28, 2025

Commit

0672ceb

1 Parent(s): aa52ec9

Update models/loaders/matanyone_loader.py

Browse files

Files changed (1) hide show

models/loaders/matanyone_loader.py +12 -8

models/loaders/matanyone_loader.py CHANGED Viewed

@@ -92,11 +92,14 @@ def ensure_image_nchw(img: torch.Tensor, want_batched: bool = True) -> torch.Ten
             img = img.to(device)
             # Handle 5D tensors (B,T,C,H,W) by squeezing time dimension
-            if img.ndim == 5:
-                if img.shape[1] == 1:  # Single time frame
-                    img = img.squeeze(1)
-                elif img.shape[0] == 1:  # Single batch
                     img = img.squeeze(0)
             # Handle various input formats
             if img.ndim == 3:
@@ -134,12 +137,12 @@ def ensure_image_nchw(img: torch.Tensor, want_batched: bool = True) -> torch.Ten
                 if nchw.max() > 1.0:
                     nchw = nchw / 255.0
-                return nchw if want_batched else nchw[0]
             else:
                 logger.error(f"Unexpected image dimensions: {img.shape}")
                 # Return something safe
-                return torch.zeros((1, 3, 512, 512), device=device, dtype=torch.float32)
         def ensure_mask_for_matanyone(mask: torch.Tensor, idx_mask: bool = False,
                                       threshold: float = 0.5, keep_soft: bool = False) -> torch.Tensor:
@@ -228,8 +231,9 @@ def guarded_method(*args, **kwargs):
                     # Try unbatched first (most common)
                     try:
                         new_kwargs = dict(kwargs)
-                        new_kwargs["image"] = img_nchw[0]  # CHW
-                        new_kwargs["mask"] = m_fixed if idx_mask else m_fixed  # Already correct shape
                         new_kwargs["idx_mask"] = bool(idx_mask)
                         result = original_method(**new_kwargs)

             img = img.to(device)
             # Handle 5D tensors (B,T,C,H,W) by squeezing time dimension
+            while img.ndim == 5:
+                if img.shape[0] == 1:
                     img = img.squeeze(0)
+                elif img.shape[1] == 1:
+                    img = img.squeeze(1)
+                else:
+                    # Can't auto-squeeze, take first time frame
+                    img = img[:, 0]
             # Handle various input formats
             if img.ndim == 3:
                 if nchw.max() > 1.0:
                     nchw = nchw / 255.0
+                return nchw if want_batched else nchw.squeeze(0) if not want_batched and nchw.shape[0] == 1 else nchw[0]
             else:
                 logger.error(f"Unexpected image dimensions: {img.shape}")
                 # Return something safe
+                return torch.zeros((3, 512, 512), device=device, dtype=torch.float32).unsqueeze(0) if want_batched else torch.zeros((3, 512, 512), device=device, dtype=torch.float32)
         def ensure_mask_for_matanyone(mask: torch.Tensor, idx_mask: bool = False,
                                       threshold: float = 0.5, keep_soft: bool = False) -> torch.Tensor:
                     # Try unbatched first (most common)
                     try:
                         new_kwargs = dict(kwargs)
+                        # CRITICAL: Use unbatched (CHW) not batched for first attempt
+                        new_kwargs["image"] = img_nchw.squeeze(0) if img_nchw.shape[0] == 1 else img_nchw[0]  # CHW
+                        new_kwargs["mask"] = m_fixed.squeeze(0) if m_fixed.shape[0] == 1 else m_fixed  # HW or CHW
                         new_kwargs["idx_mask"] = bool(idx_mask)
                         result = original_method(**new_kwargs)