Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 30, 2025

Commit

7b01a2f

1 Parent(s): 8e6cc12

Update models/loaders/matanyone_loader.py

Browse files

Files changed (1) hide show

models/loaders/matanyone_loader.py +187 -126

models/loaders/matanyone_loader.py CHANGED Viewed

@@ -1,14 +1,18 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
-MatAnyone Loader - Stable Callable Wrapper for InferenceCore
-===========================================================
-- Enforces image CHW float32 [0,1] and mask 1HW float32 [0,1]
-- Adds internal batch dim (B=1) and removes it on output
-- Works with multiple possible InferenceCore loading signatures
-- Uses torch.inference_mode() + optional autocast for speed
-- Returns a 2-D alpha mask (H,W) float32 in [0,1]
 """
 import os
@@ -32,92 +36,170 @@ def _to_float01_np(arr: np.ndarray) -> np.ndarray:
     if arr.dtype == np.uint8:
         arr = arr.astype(np.float32) / 255.0
     else:
-        arr = arr.astype(np.float32)
-    # Clamp for safety
     np.clip(arr, 0.0, 1.0, out=arr)
     return arr
-def _ensure_chw_float01(image: Union[np.ndarray, torch.Tensor]) -> torch.Tensor:
     """
-    Convert image to torch.FloatTensor CHW in [0,1].
-    Accepts HxWxC or CHW (numpy or tensor). Adds batch dim later.
     """
     if torch.is_tensor(image):
         t = image
-        if t.ndim == 3 and t.shape[0] in (1, 3, 4):  # already CHW
-            pass
-        elif t.ndim == 3 and t.shape[-1] in (1, 3, 4):  # HWC -> CHW
-            t = t.permute(2, 0, 1)
-        elif t.ndim == 2:  # HW (grayscale)
             t = t.unsqueeze(0)
         else:
-            raise ValueError(f"Unsupported image tensor shape: {tuple(t.shape)}")
         t = t.to(dtype=torch.float32)
-        # If likely 0-255, scale; otherwise clamp to [0,1]
         if torch.max(t) > 1.5:
             t = t / 255.0
         t = torch.clamp(t, 0.0, 1.0)
         return t
-    else:
-        arr = np.asarray(image)
-        if arr.ndim == 3 and arr.shape[2] in (1, 3, 4):  # HWC
-            arr = arr.transpose(2, 0, 1)  # -> CHW
-        elif arr.ndim == 2:  # HW
-            arr = arr[None, ...]  # -> 1HW
-        elif arr.ndim == 3 and arr.shape[0] in (1, 3, 4):  # already CHW
             pass
         else:
-            raise ValueError(f"Unsupported image numpy shape: {arr.shape}")
-        arr = _to_float01_np(arr)
-        return torch.from_numpy(arr)
-def _ensure_1hw_float01(mask: Union[np.ndarray, torch.Tensor]) -> torch.Tensor:
     """
-    Convert mask to torch.FloatTensor 1HW in [0,1].
-    Accepts HW, 1HW, CHW (C=1), HxWx1.
     """
     if torch.is_tensor(mask):
         m = mask
-        if m.ndim == 2:            # HW
-            m = m.unsqueeze(0)     # 1HW
-        elif m.ndim == 3:
-            if m.shape[0] == 1:    # 1HW
-                pass
-            elif m.shape[-1] == 1: # HW1 -> 1HW
-                m = m.permute(2, 0, 1)
             else:
-                raise ValueError(f"Mask has too many channels: {tuple(m.shape)}")
         else:
-            raise ValueError(f"Unsupported mask tensor shape: {tuple(m.shape)}")
         m = m.to(dtype=torch.float32)
         if torch.max(m) > 1.5:
             m = m / 255.0
         m = torch.clamp(m, 0.0, 1.0)
         return m
-    else:
-        arr = np.asarray(mask)
-        if arr.ndim == 2:               # HW
-            arr = arr[None, ...]        # 1HW
-        elif arr.ndim == 3:
-            if arr.shape[0] == 1:       # 1HW
-                pass
-            elif arr.shape[-1] == 1:    # HW1 -> 1HW
-                arr = arr.transpose(2, 0, 1)
-            else:
-                raise ValueError(f"Mask has too many channels: {arr.shape}")
         else:
-            raise ValueError(f"Unsupported mask numpy shape: {arr.shape}")
-        arr = _to_float01_np(arr)
-        return torch.from_numpy(arr)
 def _alpha_from_result(result: Union[np.ndarray, torch.Tensor]) -> np.ndarray:
-    """
-    Extract a 2D alpha (H,W) float32 [0,1] from a variety of possible outputs.
-    Accepts numpy/tensor with shapes: HW, 1HW, CHW(C>=1), BHWC, BCHW, etc.
-    """
     if result is None:
         return np.full((512, 512), 0.5, dtype=np.float32)
@@ -125,27 +207,23 @@ def _alpha_from_result(result: Union[np.ndarray, torch.Tensor]) -> np.ndarray:
         result = result.detach().float().cpu()
     arr = np.asarray(result)
     if arr.ndim == 2:
         alpha = arr
     elif arr.ndim == 3:
-        # Prefer first channel for CHW/HWC
-        if arr.shape[0] in (1, 3, 4):       # CHW
             alpha = arr[0]
-        elif arr.shape[-1] in (1, 3, 4):    # HWC
             alpha = arr[..., 0]
         else:
-            # Unknown 3D shape – take first slice robustly
-            alpha = arr[0]
-    elif arr.ndim == 4:
-        # Batch first: BxCxHxW or BxHxWxC
-        if arr.shape[1] in (1, 3, 4):       # BCHW
-            alpha = arr[0, 0]
-        elif arr.shape[-1] in (1, 3, 4):    # BHWC
-            alpha = arr[0, ..., 0]
-        else:
-            alpha = arr[0, 0]
     else:
-        # Fallback
         alpha = np.full((512, 512), 0.5, dtype=np.float32)
     alpha = alpha.astype(np.float32, copy=False)
@@ -154,41 +232,29 @@ def _alpha_from_result(result: Union[np.ndarray, torch.Tensor]) -> np.ndarray:
 def _hw_from_image_like(x: Union[np.ndarray, torch.Tensor]) -> Tuple[int, int]:
-    """Best-effort get (H, W) from an image/mask input for neutral fallbacks."""
     if torch.is_tensor(x):
         shape = tuple(x.shape)
-        # Handle CHW / HWC / BCHW / BHWC / HW
-        if len(shape) == 2:                  # HW
-            return shape[0], shape[1]
-        if len(shape) == 3:
-            if shape[0] in (1, 3, 4):       # CHW
-                return shape[1], shape[2]
-            if shape[-1] in (1, 3, 4):      # HWC
-                return shape[0], shape[1]
-        if len(shape) == 4:
-            # Assume batch first
-            b, c_or_h, h_or_w, maybe_w = shape
-            # Try BCHW
-            if shape[1] in (1, 3, 4):
-                return shape[2], shape[3]
-            # Try BHWC
-            return shape[1], shape[2]
-        return 512, 512
     else:
-        arr = np.asarray(x)
-        if arr.ndim == 2:                  # HW
-            return arr.shape[0], arr.shape[1]
-        if arr.ndim == 3:
-            if arr.shape[0] in (1, 3, 4):  # CHW
-                return arr.shape[1], arr.shape[2]
-            if arr.shape[-1] in (1, 3, 4): # HWC
-                return arr.shape[0], arr.shape[1]
-        if arr.ndim == 4:
-            # Assume batch first
-            if arr.shape[1] in (1, 3, 4):  # BCHW
-                return arr.shape[2], arr.shape[3]
-            return arr.shape[1], arr.shape[2]
-        return 512, 512
 # --------------------------- Callable Wrapper ---------------------------
@@ -201,6 +267,7 @@ class MatAnyoneCallableWrapper:
       - First call SHOULD include a mask (1HW). If not, returns neutral 0.5 alpha.
       - Subsequent calls do not require mask.
       - Returns 2D alpha (H,W) float32 in [0,1].
     """
     def __init__(self, inference_core, device: str = "cuda", mixed_precision: Optional[str] = "fp16"):
@@ -213,7 +280,7 @@ def _maybe_autocast(self):
         if self.device == "cuda" and self.mixed_precision in ("fp16", "bf16"):
             dtype = torch.float16 if self.mixed_precision == "fp16" else torch.bfloat16
             return torch.autocast(device_type="cuda", dtype=dtype)
-        # no-op context manager
         class _NullCtx:
             def __enter__(self): return None
             def __exit__(self, *exc): return False
@@ -221,9 +288,8 @@ def __exit__(self, *exc): return False
     def __call__(self, image, mask=None, **kwargs) -> np.ndarray:
         try:
-            # Preprocess → CHW/1HW tensors, then add batch
-            img_chw = _ensure_chw_float01(image).to(self.device, non_blocking=True)
-            img_bchw = img_chw.unsqueeze(0)  # B=1
             if not self.initialized:
                 if mask is None:
@@ -231,15 +297,14 @@ def __call__(self, image, mask=None, **kwargs) -> np.ndarray:
                     logger.warning("MatAnyone first frame called without mask; returning neutral alpha.")
                     return np.full((h, w), 0.5, dtype=np.float32)
-                m_1hw = _ensure_1hw_float01(mask).to(self.device, non_blocking=True)
-                m_b1hw = m_1hw.unsqueeze(0)  # B=1
                 with torch.inference_mode():
                     with self._maybe_autocast():
                         if hasattr(self.core, "step"):
-                            result = self.core.step(image=img_bchw, mask=m_b1hw, **kwargs)
                         elif hasattr(self.core, "process_frame"):
-                            result = self.core.process_frame(img_bchw, m_b1hw, **kwargs)
                         else:
                             logger.warning("InferenceCore has no recognized frame API; echoing input mask.")
                             return _alpha_from_result(mask)
@@ -251,9 +316,9 @@ def __call__(self, image, mask=None, **kwargs) -> np.ndarray:
             with torch.inference_mode():
                 with self._maybe_autocast():
                     if hasattr(self.core, "step"):
-                        result = self.core.step(image=img_bchw, **kwargs)
                     elif hasattr(self.core, "process_frame"):
-                        result = self.core.process_frame(img_bchw, **kwargs)
                     else:
                         h, w = _hw_from_image_like(image)
                         logger.warning("InferenceCore has no recognized frame API on subsequent call; returning neutral alpha.")
@@ -297,7 +362,7 @@ class MatAnyoneLoader:
     Usage:
         loader = MatAnyoneLoader(device="cuda")
         session = loader.load()  # callable
-        alpha = session(frame, first_frame_mask)  # 2-D float32 [0,1]
     """
     def __init__(self, device: str = "cuda", cache_dir: str = "./checkpoints/matanyone_cache",
@@ -346,13 +411,9 @@ def _try_build_core(self):
             logger.debug(f"ctor(model_id, device, cache_dir) failed: {e}")
         # 3) Minimal ctor
-        try:
-            core = InferenceCore(self.model_id)
-            logger.info("Loaded MatAnyone via InferenceCore(model_id) [minimal]")
-            return core
-        except Exception as e:
-            logger.debug(f"ctor(model_id) failed: {e}")
-            raise  # Propagate last error
     def load(self) -> Optional[MatAnyoneCallableWrapper]:
         """Load MatAnyone and return the callable wrapper."""
@@ -364,7 +425,7 @@ def load(self) -> Optional[MatAnyoneCallableWrapper]:
         try:
             self.processor = self._try_build_core()
-            # If the core has an explicit to(device) or set_device, try to use it
             try:
                 if hasattr(self.processor, "to"):
                     self.processor.to(self.device)
@@ -445,7 +506,7 @@ def __call__(self, image, mask=None, **kwargs) -> np.ndarray:
         return self.wrapper(image, mask, **kwargs)
-# Backwards compatibility alias (legacy session naming)
 _MatAnyoneSession = MatAnyoneCallableWrapper
 __all__ = ["MatAnyoneLoader", "_MatAnyoneSession", "MatAnyoneCallableWrapper"]

 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
+MatAnyone Loader - Stable Callable Wrapper for InferenceCore (extra-dim stripping)
+=================================================================================
+- Always call InferenceCore UNBATCHED:
+    image -> CHW float32 [0,1]
+    mask  -> 1HW float32 [0,1]
+- Aggressively strip extra dims:
+    e.g. [B,T,C,H,W] -> [C,H,W]  (use first slice when B/T > 1 with a warning)
+    e.g. [B,C,H,W]   -> [C,H,W]
+    e.g. [H,W,C,1]   -> [H,W,C]
+- Optional CUDA mixed precision (fp16/bf16)
+- Robust alpha extraction -> (H,W) float32 [0,1]
 """
 import os
     if arr.dtype == np.uint8:
         arr = arr.astype(np.float32) / 255.0
     else:
+        arr = arr.astype(np.float32, copy=False)
     np.clip(arr, 0.0, 1.0, out=arr)
     return arr
+def _strip_leading_extras_to_ndim(x: Union[np.ndarray, torch.Tensor], target_ndim: int) -> Union[np.ndarray, torch.Tensor]:
     """
+    Reduce x to at most target_ndim by removing leading dims.
+    - If a leading dim == 1, squeeze it.
+    - If a leading dim > 1, take the first slice and log a warning.
+    Repeat until ndim <= target_ndim.
     """
+    is_tensor = torch.is_tensor(x)
+    get_shape = (lambda t: tuple(t.shape)) if is_tensor else (lambda a: a.shape)
+    index_first = (lambda t: t[0]) if is_tensor else (lambda a: a[0])
+    squeeze_first = (lambda t: t.squeeze(0)) if is_tensor else (lambda a: np.squeeze(a, axis=0))
+    while len(get_shape(x)) > target_ndim:
+        dim0 = get_shape(x)[0]
+        if dim0 == 1:
+            x = squeeze_first(x)
+        else:
+            logger.warning(f"Input has extra leading dim >1 ({dim0}); taking the first slice.")
+            x = index_first(x)
+    return x
+def _ensure_chw_float01(image: Union[np.ndarray, torch.Tensor], *, name: str = "image") -> torch.Tensor:
+    """
+    Convert image to torch.FloatTensor CHW in [0,1], stripping extras.
+    Accepts shapes up to 5D (e.g. B,T,C,H,W / B,C,H,W / H,W,C / CHW / HW / ...).
+    If ambiguous multi-channel, picks first channel with a warning.
+    """
+    orig_shape = tuple(image.shape) if not torch.is_tensor(image) else tuple(image.shape)
+    # Reduce to <= 3 dims
+    image = _strip_leading_extras_to_ndim(image, 3)
     if torch.is_tensor(image):
         t = image
+        # Convert 4D (rare if caller passes) once more
+        if t.ndim == 4:
+            t = _strip_leading_extras_to_ndim(t, 3)
+        if t.ndim == 3:
+            c0, c1, c2 = t.shape
+            if c0 in (1, 3, 4):
+                # CHW
+                pass
+            elif c2 in (1, 3, 4):
+                # HWC -> CHW
+                t = t.permute(2, 0, 1)
+            else:
+                # Ambiguous, assume HWC-like and take first channel after moving to CHW
+                logger.warning(f"{name}: ambiguous 3D shape {tuple(t.shape)}; attempting HWC->CHW then selecting first channel.")
+                t = t.permute(2, 0, 1)
+                if t.shape[0] > 1:
+                    t = t[0]
+                    t = t.unsqueeze(0)  # back to 1HW
+        elif t.ndim == 2:
+            # HW -> 1HW
             t = t.unsqueeze(0)
         else:
+            raise ValueError(f"{name}: unsupported tensor dims {tuple(t.shape)} after stripping.")
         t = t.to(dtype=torch.float32)
         if torch.max(t) > 1.5:
             t = t / 255.0
         t = torch.clamp(t, 0.0, 1.0)
+        logger.debug(f"{name}: {orig_shape} -> {tuple(t.shape)} (CHW)")
         return t
+    # numpy path
+    arr = np.asarray(image)
+    if arr.ndim == 4:
+        arr = _strip_leading_extras_to_ndim(arr, 3)
+    if arr.ndim == 3:
+        if arr.shape[0] in (1, 3, 4):   # CHW
             pass
+        elif arr.shape[-1] in (1, 3, 4):  # HWC -> CHW
+            arr = arr.transpose(2, 0, 1)
         else:
+            logger.warning(f"{name}: ambiguous 3D shape {arr.shape}; trying HWC->CHW and selecting first channel.")
+            arr = arr.transpose(2, 0, 1)  # HWC->CHW
+            if arr.shape[0] > 1:
+                arr = arr[0:1, ...]      # 1HW
+    elif arr.ndim == 2:
+        arr = arr[None, ...]  # 1HW
+    else:
+        raise ValueError(f"{name}: unsupported numpy dims {arr.shape} after stripping.")
+    arr = _to_float01_np(arr)
+    t = torch.from_numpy(arr)
+    logger.debug(f"{name}: {orig_shape} -> {tuple(t.shape)} (CHW)")
+    return t
+def _ensure_1hw_float01(mask: Union[np.ndarray, torch.Tensor], *, name: str = "mask") -> torch.Tensor:
     """
+    Convert mask to torch.FloatTensor 1HW in [0,1], stripping extras.
+    Accepts up to 4D inputs; collapses leading dims; picks first slice/channel if needed.
     """
+    orig_shape = tuple(mask.shape) if not torch.is_tensor(mask) else tuple(mask.shape)
+    mask = _strip_leading_extras_to_ndim(mask, 3)
     if torch.is_tensor(mask):
         m = mask
+        if m.ndim == 3:
+            # 1HW or CHW or HWC-like
+            if m.shape[0] == 1:
+                pass  # 1HW
+            elif m.shape[-1] == 1:
+                m = m.permute(2, 0, 1)  # HW1 -> 1HW
             else:
+                # If multi-channel, take first
+                logger.warning(f"{name}: multi-channel {tuple(m.shape)}; using first channel.")
+                # Assume CHW or HWC-like already normalized earlier; prefer leading as channel
+                if m.shape[0] in (3, 4):
+                    m = m[0:1, ...]
+                elif m.shape[-1] in (3, 4):
+                    m = m.permute(2, 0, 1)[0:1, ...]
+                else:
+                    # Ambiguous -> take first along first axis and ensure 1HW
+                    m = m[0:1, ...]
+        elif m.ndim == 2:
+            m = m.unsqueeze(0)  # 1HW
         else:
+            raise ValueError(f"{name}: unsupported tensor dims {tuple(m.shape)} after stripping.")
         m = m.to(dtype=torch.float32)
         if torch.max(m) > 1.5:
             m = m / 255.0
         m = torch.clamp(m, 0.0, 1.0)
+        logger.debug(f"{name}: {orig_shape} -> {tuple(m.shape)} (1HW)")
         return m
+    # numpy path
+    arr = np.asarray(mask)
+    if arr.ndim == 3:
+        if arr.shape[0] == 1:
+            pass  # 1HW
+        elif arr.shape[-1] == 1:
+            arr = arr.transpose(2, 0, 1)  # HW1 -> 1HW
         else:
+            logger.warning(f"{name}: multi-channel {arr.shape}; using first channel.")
+            if arr.shape[0] in (3, 4):
+                arr = arr[0:1, ...]  # CHW -> 1HW
+            elif arr.shape[-1] in (3, 4):
+                arr = arr.transpose(2, 0, 1)[0:1, ...]  # HWC -> CHW -> 1HW
+            else:
+                arr = arr[0:1, ...]  # ambiguous -> 1HW by slice
+    elif arr.ndim == 2:
+        arr = arr[None, ...]  # 1HW
+    else:
+        raise ValueError(f"{name}: unsupported numpy dims {arr.shape} after stripping.")
+    arr = _to_float01_np(arr)
+    t = torch.from_numpy(arr)
+    logger.debug(f"{name}: {orig_shape} -> {tuple(t.shape)} (1HW)")
+    return t
 def _alpha_from_result(result: Union[np.ndarray, torch.Tensor]) -> np.ndarray:
+    """Extract a 2D alpha (H,W) float32 [0,1] from various outputs."""
     if result is None:
         return np.full((512, 512), 0.5, dtype=np.float32)
         result = result.detach().float().cpu()
     arr = np.asarray(result)
+    # Strip to <= 3 dims, then extract
+    while arr.ndim > 3:
+        if arr.shape[0] > 1:
+            logger.warning(f"Result has leading dim {arr.shape[0]}; taking first slice.")
+        arr = arr[0]
     if arr.ndim == 2:
         alpha = arr
     elif arr.ndim == 3:
+        if arr.shape[0] in (1, 3, 4):      # CHW -> take channel 0
             alpha = arr[0]
+        elif arr.shape[-1] in (1, 3, 4):   # HWC -> take channel 0
             alpha = arr[..., 0]
         else:
+            alpha = arr[0]  # ambiguous
     else:
+        # 1D or 0D shouldn't happen; fallback
         alpha = np.full((512, 512), 0.5, dtype=np.float32)
     alpha = alpha.astype(np.float32, copy=False)
 def _hw_from_image_like(x: Union[np.ndarray, torch.Tensor]) -> Tuple[int, int]:
+    """Best-effort infer (H, W) for fallback mask sizing."""
     if torch.is_tensor(x):
         shape = tuple(x.shape)
     else:
+        shape = np.asarray(x).shape
+    # Try common orders
+    if len(shape) == 2:  # HW
+        return shape[0], shape[1]
+    if len(shape) == 3:
+        if shape[0] in (1, 3, 4):  # CHW
+            return shape[1], shape[2]
+        if shape[-1] in (1, 3, 4):  # HWC
+            return shape[0], shape[1]
+        # Ambiguous -> treat as CHW
+        return shape[1], shape[2]
+    if len(shape) >= 4:
+        # Assume leading are batch/time; try BCHW first
+        if len(shape) >= 4 and (shape[1] in (1, 3, 4)):
+            return shape[2], shape[3]
+        # Else BHWC-ish
+        return shape[-3], shape[-2]
+    return 512, 512
 # --------------------------- Callable Wrapper ---------------------------
       - First call SHOULD include a mask (1HW). If not, returns neutral 0.5 alpha.
       - Subsequent calls do not require mask.
       - Returns 2D alpha (H,W) float32 in [0,1].
+      - Strips any extra dims from inputs before calling core.
     """
     def __init__(self, inference_core, device: str = "cuda", mixed_precision: Optional[str] = "fp16"):
         if self.device == "cuda" and self.mixed_precision in ("fp16", "bf16"):
             dtype = torch.float16 if self.mixed_precision == "fp16" else torch.bfloat16
             return torch.autocast(device_type="cuda", dtype=dtype)
+        # no-op ctx
         class _NullCtx:
             def __enter__(self): return None
             def __exit__(self, *exc): return False
     def __call__(self, image, mask=None, **kwargs) -> np.ndarray:
         try:
+            # Preprocess (unbatched)
+            img_chw = _ensure_chw_float01(image, name="image").to(self.device, non_blocking=True)
             if not self.initialized:
                 if mask is None:
                     logger.warning("MatAnyone first frame called without mask; returning neutral alpha.")
                     return np.full((h, w), 0.5, dtype=np.float32)
+                m_1hw = _ensure_1hw_float01(mask, name="mask").to(self.device, non_blocking=True)
                 with torch.inference_mode():
                     with self._maybe_autocast():
                         if hasattr(self.core, "step"):
+                            result = self.core.step(image=img_chw, mask=m_1hw, **kwargs)
                         elif hasattr(self.core, "process_frame"):
+                            result = self.core.process_frame(img_chw, m_1hw, **kwargs)
                         else:
                             logger.warning("InferenceCore has no recognized frame API; echoing input mask.")
                             return _alpha_from_result(mask)
             with torch.inference_mode():
                 with self._maybe_autocast():
                     if hasattr(self.core, "step"):
+                        result = self.core.step(image=img_chw, **kwargs)
                     elif hasattr(self.core, "process_frame"):
+                        result = self.core.process_frame(img_chw, **kwargs)
                     else:
                         h, w = _hw_from_image_like(image)
                         logger.warning("InferenceCore has no recognized frame API on subsequent call; returning neutral alpha.")
     Usage:
         loader = MatAnyoneLoader(device="cuda")
         session = loader.load()  # callable
+        alpha = session(frame, first_frame_mask)  # returns (H, W) float32
     """
     def __init__(self, device: str = "cuda", cache_dir: str = "./checkpoints/matanyone_cache",
             logger.debug(f"ctor(model_id, device, cache_dir) failed: {e}")
         # 3) Minimal ctor
+        core = InferenceCore(self.model_id)
+        logger.info("Loaded MatAnyone via InferenceCore(model_id) [minimal]")
+        return core
     def load(self) -> Optional[MatAnyoneCallableWrapper]:
         """Load MatAnyone and return the callable wrapper."""
         try:
             self.processor = self._try_build_core()
+            # Optional device move
             try:
                 if hasattr(self.processor, "to"):
                     self.processor.to(self.device)
         return self.wrapper(image, mask, **kwargs)
+# Backwards compatibility alias
 _MatAnyoneSession = MatAnyoneCallableWrapper
 __all__ = ["MatAnyoneLoader", "_MatAnyoneSession", "MatAnyoneCallableWrapper"]