Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 28, 2025

Commit

aa52ec9

1 Parent(s): 9e03b6b

Update models/loaders/matanyone_loader.py

Browse files

Files changed (1) hide show

models/loaders/matanyone_loader.py +200 -12

models/loaders/matanyone_loader.py CHANGED Viewed

@@ -62,26 +62,214 @@ def load(self) -> Optional[Any]:
         return None
     def _load_official(self) -> Optional[Any]:
-        """Load using official MatAnyone API"""
         from matanyone import InferenceCore
         # Create processor - pass model ID as positional argument
         processor = InferenceCore(self.model_id)
-        # Ensure processor is properly initialized for the device
-        if hasattr(processor, 'device'):
-            processor.device = self.device
-        # Move model components to device if they exist
-        if hasattr(processor, 'model'):
-            if hasattr(processor.model, 'to'):
-                processor.model = processor.model.to(self.device)
-                processor.model.eval()
-        # Patch the processor to handle inputs properly
-        self._patch_processor(processor)
-        return processor
     def _patch_processor(self, processor):
         """

         return None
     def _load_official(self) -> Optional[Any]:
+        """Load using official MatAnyone API with comprehensive shape guard"""
         from matanyone import InferenceCore
         # Create processor - pass model ID as positional argument
         processor = InferenceCore(self.model_id)
+        # Install the critical shape guard patch from original loader
+        self._install_shape_guard(processor)
+        return processor
+    def _install_shape_guard(self, processor):
+        """
+        Install the comprehensive shape guard from the original loader.
+        This is CRITICAL for preventing 5D tensor issues and ensuring compatibility.
+        """
+        import torch
+        import numpy as np
+        device = self.device
+        # Helper functions for tensor manipulation
+        def ensure_image_nchw(img: torch.Tensor, want_batched: bool = True) -> torch.Tensor:
+            """Ensure image is in NCHW format"""
+            if isinstance(img, np.ndarray):
+                img = torch.from_numpy(img)
+            img = img.to(device)
+            # Handle 5D tensors (B,T,C,H,W) by squeezing time dimension
+            if img.ndim == 5:
+                if img.shape[1] == 1:  # Single time frame
+                    img = img.squeeze(1)
+                elif img.shape[0] == 1:  # Single batch
+                    img = img.squeeze(0)
+            # Handle various input formats
+            if img.ndim == 3:
+                # CHW or HWC
+                if img.shape[0] in (1, 3, 4):  # Likely CHW
+                    chw = img
+                elif img.shape[-1] in (1, 3, 4):  # Likely HWC
+                    chw = img.permute(2, 0, 1)
+                else:
+                    # Assume CHW
+                    chw = img
+                # Ensure float and normalized
+                if chw.dtype != torch.float32:
+                    chw = chw.float()
+                if chw.max() > 1.0:
+                    chw = chw / 255.0
+                return chw.unsqueeze(0) if want_batched else chw
+            elif img.ndim == 4:
+                # NCHW or NHWC
+                N, A, B, C = img.shape
+                if A in (1, 3, 4):  # NCHW
+                    nchw = img
+                elif C in (1, 3, 4):  # NHWC
+                    nchw = img.permute(0, 3, 1, 2)
+                else:
+                    # Assume NCHW
+                    nchw = img
+                # Ensure float and normalized
+                if nchw.dtype != torch.float32:
+                    nchw = nchw.float()
+                if nchw.max() > 1.0:
+                    nchw = nchw / 255.0
+                return nchw if want_batched else nchw[0]
+            else:
+                logger.error(f"Unexpected image dimensions: {img.shape}")
+                # Return something safe
+                return torch.zeros((1, 3, 512, 512), device=device, dtype=torch.float32)
+        def ensure_mask_for_matanyone(mask: torch.Tensor, idx_mask: bool = False,
+                                      threshold: float = 0.5, keep_soft: bool = False) -> torch.Tensor:
+            """Ensure mask is in correct format for MatAnyone"""
+            if isinstance(mask, np.ndarray):
+                mask = torch.from_numpy(mask)
+            mask = mask.to(device)
+            # Handle 5D tensors
+            if mask.ndim == 5:
+                if mask.shape[1] == 1:
+                    mask = mask.squeeze(1)
+                if mask.shape[0] == 1 and mask.ndim == 5:
+                    mask = mask.squeeze(0)
+            # Handle index masks
+            if idx_mask:
+                if mask.ndim == 3:
+                    if mask.shape[0] == 1:
+                        idx = (mask[0] >= threshold).to(torch.long)
+                    else:
+                        idx = torch.argmax(mask, dim=0).to(torch.long)
+                        idx = (idx > 0).to(torch.long)
+                elif mask.ndim == 2:
+                    idx = (mask >= threshold).to(torch.long)
+                else:
+                    logger.warning(f"Unexpected idx mask shape: {mask.shape}")
+                    idx = torch.zeros((512, 512), device=device, dtype=torch.long)
+                return idx
+            # Handle channel masks
+            if mask.ndim == 2:
+                out = mask.unsqueeze(0)  # Add channel dimension
+            elif mask.ndim == 3:
+                if mask.shape[0] == 1:
+                    out = mask
+                else:
+                    # Choose channel with largest area
+                    areas = mask.sum(dim=(-2, -1))
+                    best_idx = areas.argmax()
+                    out = mask[best_idx:best_idx+1]
+            else:
+                logger.warning(f"Unexpected mask shape: {mask.shape}")
+                out = torch.ones((1, 512, 512), device=device, dtype=torch.float32)
+            # Convert to float and normalize
+            out = out.to(torch.float32)
+            if not keep_soft:
+                out = (out >= threshold).to(torch.float32)
+            return out.clamp_(0.0, 1.0).contiguous()
+        # Create the guarded wrapper
+        def create_guarded_method(original_method):
+            """Create a guarded version of a MatAnyone method"""
+            def guarded_method(*args, **kwargs):
+                # Extract image and mask
+                image = kwargs.get("image", None)
+                mask = kwargs.get("mask", None)
+                idx_mask = kwargs.get("idx_mask", kwargs.get("index_mask", False))
+                # Handle positional arguments
+                if image is None and len(args) >= 1:
+                    image = args[0]
+                if mask is None and len(args) >= 2:
+                    mask = args[1]
+                if image is None or mask is None:
+                    logger.error(f"MatAnyone called without image/mask: args={len(args)}, kwargs={list(kwargs.keys())}")
+                    # Return something safe
+                    return torch.ones((1, 512, 512), dtype=torch.float32) * 0.5
+                try:
+                    # Coerce shapes
+                    img_nchw = ensure_image_nchw(image, want_batched=True)
+                    if idx_mask:
+                        m_fixed = ensure_mask_for_matanyone(mask, idx_mask=True)
+                    else:
+                        m_fixed = ensure_mask_for_matanyone(mask, idx_mask=False, threshold=0.5)
+                    # Log shapes for debugging
+                    logger.debug(f"MatAnyone input - image: {img_nchw.shape}, mask: {m_fixed.shape}, idx: {idx_mask}")
+                    # Try unbatched first (most common)
+                    try:
+                        new_kwargs = dict(kwargs)
+                        new_kwargs["image"] = img_nchw[0]  # CHW
+                        new_kwargs["mask"] = m_fixed if idx_mask else m_fixed  # Already correct shape
+                        new_kwargs["idx_mask"] = bool(idx_mask)
+                        result = original_method(**new_kwargs)
+                        return result
+                    except Exception as e1:
+                        logger.debug(f"Unbatched call failed, trying batched: {e1}")
+                        # Try with batch dimension
+                        new_kwargs = dict(kwargs)
+                        new_kwargs["image"] = img_nchw  # NCHW
+                        new_kwargs["mask"] = m_fixed
+                        new_kwargs["idx_mask"] = bool(idx_mask)
+                        result = original_method(**new_kwargs)
+                        return result
+                except Exception as e:
+                    logger.error(f"MatAnyone guarded call failed: {e}")
+                    import traceback
+                    logger.debug(traceback.format_exc())
+                    # Return input mask as fallback
+                    if isinstance(mask, torch.Tensor):
+                        return mask.cpu().numpy()
+                    elif isinstance(mask, np.ndarray):
+                        return mask
+                    else:
+                        return np.ones((512, 512), dtype=np.float32) * 0.5
+            return guarded_method
+        # Apply guard to both step and process methods
+        if hasattr(processor, 'step'):
+            original_step = processor.step
+            processor.step = create_guarded_method(original_step)
+            logger.info("Installed shape guard on MatAnyone.step")
+        if hasattr(processor, 'process'):
+            original_process = processor.process
+            processor.process = create_guarded_method(original_process)
+            logger.info("Installed shape guard on MatAnyone.process")
     def _patch_processor(self, processor):
         """