Spaces:

MogensR
/

VideoBackgroundReplacer2

Paused

App Files Files Community

MogensR commited on Sep 17, 2025

Commit

f5cc616

1 Parent(s): 6ca9173

revelations

Browse files

Files changed (1) hide show

models/matanyone_loader.py +138 -335

models/matanyone_loader.py CHANGED Viewed

@@ -1,30 +1,24 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
-MatAnyone adapter — SAM2-seeded, streaming, build-agnostic.
-- SAM2 defines the subject (seed mask) on frame 0.
-- MatAnyone does frame-by-frame alpha matting.
-- Prefers step([B,C,H,W]) with T=1 squeeze patch for conv2d compatibility.
-- Falls back to process_frame([H,W,3]) if supported.
-Changes (2025-09-16):
-- Aligned with torch==2.3.1+cu121 and MatAnyone v1.0.0
-- Added shape logging in _call_step to verify 5D-to-4D squeeze
-- Set MATANY_FORCE_FORMAT=4d as default
-- Added VRAM logging in process_stream (MATANY_LOG_VRAM=1)
-- Enhanced _safe_empty_cache with memory_summary
-- Added MatAnyone version logging
-- Added MatAnyoneModel wrapper class for app_hf.py compatibility
 """
 from __future__ import annotations
 import os
-import cv2
 import time
 import logging
-import numpy as np
-import torch
 import importlib.metadata
 from pathlib import Path
 from typing import Optional, Callable, Tuple
@@ -64,13 +58,17 @@ class MatAnyError(RuntimeError):
     pass
 # ---------- CUDA helpers ----------
-def _cuda_snapshot(device: Optional[torch.device]) -> str:
     try:
         if not torch.cuda.is_available():
             return "CUDA: N/A"
         idx = 0
-        if isinstance(device, torch.device) and device.type == "cuda" and device.index is not None:
-            idx = device.index
         name = torch.cuda.get_device_name(idx)
         alloc = torch.cuda.memory_allocated(idx) / (1024**3)
         resv = torch.cuda.memory_reserved(idx) / (1024**3)
@@ -79,254 +77,48 @@ def _cuda_snapshot(device: Optional[torch.device]) -> str:
         return f"CUDA snapshot error: {e!r}"
 def _safe_empty_cache():
-    if not torch.cuda.is_available():
-        return
     try:
-        log.info(f"[MATANY] CUDA memory before empty_cache: {_cuda_snapshot(None)}")
-        torch.cuda.empty_cache()
-        log.info(f"[MATANY] CUDA memory after empty_cache: {_cuda_snapshot(None)}")
-        if os.getenv("MATANY_LOG_VRAM", "0") == "1":
-            log.debug(f"[MATANY] VRAM summary:\n{torch.cuda.memory_summary()}")
     except Exception:
         pass
-# ---------- SAM2 → seed mask prep ----------
-def _prepare_seed_mask(sam2_mask: np.ndarray, H: int, W: int) -> np.ndarray:
-    """
-    Normalize to float32 [H,W] in {0,1}, white=FG.
-    Auto-invert if >60% ON (likely wrong polarity).
-    """
-    if not isinstance(sam2_mask, np.ndarray):
-        raise MatAnyError(f"SAM2 mask must be numpy array, got {type(sam2_mask)}")
-    if sam2_mask.ndim == 3 and sam2_mask.shape[2] == 3:
-        sam2_mask = cv2.cvtColor(sam2_mask, cv2.COLOR_BGR2GRAY)
-    if sam2_mask.ndim != 2:
-        raise MatAnyError(f"SAM2 mask must be 2D, got shape {sam2_mask.shape}")
-    if sam2_mask.shape != (H, W):
-        sam2_mask = cv2.resize(sam2_mask, (W, H), interpolation=cv2.INTER_NEAREST)
-    m = sam2_mask.astype(np.float32)
-    if m.max() > 1.0:
-        m /= 255.0
-    m = np.clip(m, 0.0, 1.0)
-    if (m > 0.5).mean() > 0.60:
-        m = 1.0 - m
-    return (m > 0.5).astype(np.float32)
-# ---------- Frame conversion ----------
-def _frame_bgr_to_hwc_rgb_numpy(frame) -> np.ndarray:
-    """Accept HWC/CHW BGR uint8 → return HWC RGB uint8."""
-    if not isinstance(frame, np.ndarray) or frame.ndim != 3:
-        raise MatAnyError(f"Frame must be HWC/CHW numpy array, got {type(frame)}, shape={getattr(frame, 'shape', None)}")
-    arr = frame
-    if arr.shape[0] == 3 and arr.shape[2] != 3:   # CHW → HWC
-        arr = np.transpose(arr, (1, 2, 0))
-    if arr.dtype != np.uint8:
-        raise MatAnyError(f"Frame must be uint8, got {arr.dtype}")
-    return cv2.cvtColor(arr, cv2.COLOR_BGR2RGB)
 # ============================================================================
 class MatAnyoneSession:
     """
-    Streaming wrapper that seeds MatAnyone on frame 0.
-    Prefers step([B,C,H,W]) with T=1 squeeze patch for conv2d compatibility.
-    Falls back to process_frame([H,W,3]) if supported.
     """
     def __init__(self, device: Optional[str] = None, precision: str = "auto"):
-        from .matany_compat_patch import apply_matany_t1_squeeze_guard
-        self.device = torch.device(device) if device else (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"))
         self.precision = precision.lower()
-        # Apply T=1 squeeze patch
-        if apply_matany_t1_squeeze_guard():
-            log.info("[MATANY] T=1 squeeze patch applied for MatAnyone")
-        else:
-            log.warning("[MATANY] T=1 squeeze patch failed; conv2d errors may occur")
         # Log MatAnyone version
         try:
             version = importlib.metadata.version("matanyone")
             log.info(f"[MATANY] MatAnyone version: {version}")
         except Exception:
             log.info("[MATANY] MatAnyone version unknown")
-        # API/format overrides for debugging
-        api_force = os.getenv("MATANY_FORCE_API", "").strip().lower()  # "process" or "step"
-        fmt_force = os.getenv("MATANY_FORCE_FORMAT", "4d").strip().lower()  # "4d" or "5d"
-        self._force_api_process = (api_force == "process")
-        self._force_api_step    = (api_force == "step")
-        self._force_4d = (fmt_force == "4d") or not fmt_force  # Default to 4D
-        self._force_5d = (fmt_force == "5d")
-        try:
-            from matanyone.inference.inference_core import InferenceCore
-        except ImportError as e:
-            raise MatAnyError(f"Failed to import MatAnyone: {e}")
         try:
-            self.core = InferenceCore()
-        except TypeError:
-            self.core = InferenceCore("PeiqingYang/MatAnyone")
-        self._has_process = hasattr(self.core, "process_frame")
-        self._has_step    = hasattr(self.core, "step")
-        if not (self._has_process or self._has_step):
-            raise MatAnyError("MatAnyone core exposes neither 'process_frame' nor 'step'")
-        # Prefer step unless forced to process_frame
-        if self._force_api_process and not self._has_process:
-            raise MatAnyError("MATANY_FORCE_API=process but core.process_frame is missing")
-        if self._force_api_step and not self._has_step:
-            raise MatAnyError("MATANY_FORCE_API=step but core.step is missing")
-        self._api = "process_frame" if (self._has_process and not self._force_api_step) else "step"
-        self._use_5d = bool(self._force_5d)  # Only for step mode; rarely needed post-patch
-        log.info(f"[MATANY] APIs: process_frame={self._has_process}, step={self._has_step} | active={self._api} | force4d={self._force_4d} force5d={self._force_5d}")
-    # AMP only affects step() path where we use torch tensors
-    def _amp(self):
-        if self.device.type != "cuda":
-            return torch.amp.autocast(device_type="cuda", enabled=False)
-        if self.precision == "fp32":
-            return torch.amp.autocast(device_type="cuda", enabled=False)
-        if self.precision == "fp16":
-            return torch.amp.autocast(device_type="cuda", enabled=True, dtype=torch.float16)
-        return torch.amp.autocast(device_type="cuda", enabled=True)
-    # ----- Tensor builders for step() mode -----
-    def _to_tensors(self, img_hwc_rgb: np.ndarray, mask_hw: Optional[np.ndarray]):
-        img = torch.from_numpy(img_hwc_rgb).to(self.device)
-        if img.dtype != torch.float32:
-            img = img.float()
-        if float(img.max().item()) > 1.0:
-            img = img / 255.0
-        img_chw = img.permute(2, 0, 1).contiguous()  # [3,H,W]
-        img_4d  = img_chw.unsqueeze(0)               # [1,3,H,W]
-        img_5d  = img_chw.unsqueeze(0).unsqueeze(0)  # [1,1,3,H,W]
-        mask_4d = mask_5d = None
-        if mask_hw is not None:
-            m = torch.from_numpy(mask_hw).to(self.device)
-            if m.dtype != torch.float32:
-                m = m.float()
-            m = (m >= 0.5).float() if float(m.max().item()) <= 1.0 else (m >= 128).float()
-            mask_4d = m.unsqueeze(0).unsqueeze(0).contiguous()   # [1,1,H,W]
-            mask_5d = mask_4d.unsqueeze(1).contiguous()          # [1,1,1,H,W]
-        return img_4d, img_5d, mask_4d, mask_5d
-    # ----- Core call: process_frame fallback, step preferred -----
-    def _call_process_frame(self, rgb_hwc: np.ndarray, seed_mask_hw: Optional[np.ndarray], is_first: bool):
-        """Try numpy path first; fallback to torch path if the wheel requests tensors."""
-        seed = seed_mask_hw if is_first else None
-        # 1) Most wheels want numpy HWC + 2D mask (float 0..1 or uint8)
         try:
-            return self.core.process_frame(rgb_hwc, seed)
-        except TypeError as e_np:
-            msg = str(e_np).lower()
-            # 2) Some wheels want torch [B,C,H,W] tensors even in process_frame
-            if "tensor" in msg or "expected" in msg or "conv2d" in msg:
-                img_4d, _, mask_4d, _ = self._to_tensors(rgb_hwc, seed)
-                with torch.no_grad(), self._amp():
-                    try:
-                        return self.core.process_frame(img_4d, mask_4d)
-                    except Exception as e_t:
-                        raise MatAnyError(f"process_frame tensor path failed: {e_t}") from e_t
-            raise
-    def _call_step(self, rgb_hwc: np.ndarray, seed_mask_hw: Optional[np.ndarray], is_first: bool):
-        """Use 4D [B,C,H,W] by default; retry with 5D only if forced."""
-        img_4d, img_5d, mask_4d, mask_5d = self._to_tensors(rgb_hwc, seed_mask_hw if is_first else None)
-        def run(use_5d: bool):
-            img = img_5d if use_5d else img_4d
-            msk = mask_5d if use_5d else mask_4d
-            log.debug(f"[MATANY] Step input: img={img.shape}, mask={msk.shape if msk is not None else None}, is_first={is_first}")
-            if is_first and msk is not None:
-                try:
-                    return self.core.step(img, msk, is_first=True)
-                except TypeError:
-                    return self.core.step(img, msk)
-            else:
-                return self.core.step(img)
-        with torch.no_grad(), self._amp():
-            if self._force_4d:
-                return run(False)
-            if self._force_5d:
-                return run(True)
-            if self._use_5d:
-                try:
-                    return run(True)
-                except RuntimeError as e5:
-                    m5 = str(e5)
-                    if "expected 3d" in m5.lower() and "4d" in m5 and "conv2d" in m5.lower():
-                        log.info("[MATANY] 5D rejected by wheel (conv2d wants 3D/4D). Falling back to 4D.")
-                        self._use_5d = False
-                        return run(False)
-                    raise MatAnyError(f"Runtime error (step/5D): {m5}") from e5
-            try:
-                return run(False)  # 4D
-            except RuntimeError as e4:
-                m4 = str(e4)
-                needs_5d = any(kw in m4 for kw in ["expected 5D", "expects 5D", "input.dim() == 5", "but got 4D", "got input of size: [1, 3,"])
-                if needs_5d:
-                    log.info("[MATANY] Wheel appears to expect 5D — retrying with [1,1,3,H,W] and [1,1,1,H,W].")
-                    self._use_5d = True
-                    try:
-                        return run(True)
-                    except RuntimeError as e5b:
-                        m5b = str(e5b)
-                        if "expected 3d" in m5b.lower() and "4d" in m5b and "conv2d" in m5b.lower():
-                            self._use_5d = False
-                            raise MatAnyError(f"Wheel ultimately expects 4D (conv2d). Original 4D error: {m4}") from e4
-                        raise MatAnyError(f"step/5D attempt failed: {m5b}") from e5b
-                if "cuda" in m4.lower():
-                    snap = _cuda_snapshot(self.device)
-                    raise MatAnyError(f"CUDA runtime error: {m4} | {snap}") from e4
-                raise MatAnyError(f"Runtime error (step/4D): {m4}") from e4
-    # ----- Per-frame runner -----
-    def _run_frame(self, frame_bgr: np.ndarray, sam2_mask_hw: Optional[np.ndarray], is_first: bool) -> np.ndarray:
-        rgb_hwc = _frame_bgr_to_hwc_rgb_numpy(frame_bgr)
-        H, W = rgb_hwc.shape[:2]
-        seed_for_this_frame = _prepare_seed_mask(sam2_mask_hw, H, W) if (is_first and sam2_mask_hw is not None) else None
-        # Primary: step (4D, post-patch); fallback to process_frame
-        if self._api == "process_frame":
-            try:
-                out = self._call_process_frame(rgb_hwc, seed_for_this_frame, is_first)
-            except Exception as e_proc:
-                log.warning(f"[MATANY] process_frame failed ({e_proc}); falling back to step().")
-                if not self._has_step:
-                    raise MatAnyError(f"process_frame failed and step() is unavailable: {e_proc}")
-                self._api = "step"
-                out = self._call_step(rgb_hwc, seed_for_this_frame, is_first)
-        else:
-            out = self._call_step(rgb_hwc, seed_for_this_frame, is_first)
-        # Normalize to 2D alpha [H,W] in [0,1]
-        if isinstance(out, torch.Tensor):
-            alpha = out.detach().float().squeeze().cpu().numpy()
-        else:
-            alpha = np.asarray(out)
-        alpha = alpha.astype(np.float32)
-        if float(alpha.max()) > 1.0:
-            alpha /= 255.0
-        alpha = np.squeeze(alpha)
-        if alpha.ndim != 2:
-            raise MatAnyError(f"Expected 2D alpha matte; got shape {alpha.shape}")
-        return np.clip(alpha, 0.0, 1.0)
-    # ----- Public: streaming processor -----
     def process_stream(
         self,
         video_path: Path,
@@ -334,97 +126,73 @@ def process_stream(
         out_dir: Optional[Path] = None,
         progress_cb: Optional[Callable] = None,
     ) -> Tuple[Path, Path]:
         video_path = Path(video_path)
         if not video_path.exists():
             raise MatAnyError(f"Video file not found: {video_path}")
-        out_dir = Path(out_dir) if out_dir else video_path.parent
         out_dir.mkdir(parents=True, exist_ok=True)
-        cap_probe = cv2.VideoCapture(str(video_path))
-        if not cap_probe.isOpened():
-            raise MatAnyError(f"Failed to open video: {video_path}")
-        N = int(cap_probe.get(cv2.CAP_PROP_FRAME_COUNT))
-        fps = cap_probe.get(cv2.CAP_PROP_FPS)
-        W = int(cap_probe.get(cv2.CAP_PROP_FRAME_WIDTH))
-        H = int(cap_probe.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        cap_probe.release()
-        if not fps or fps <= 0 or np.isnan(fps):
-            fps = 25.0
-        log.info(f"MatAnyone: {video_path.name} | {N} frames {W}x{H} @ {fps:.2f} fps")
-        _emit_progress(progress_cb, 0.05, f"Video: {N} frames {W}x{H} @ {fps:.2f} fps")
-        _emit_progress(progress_cb, 0.08, "Using per-frame processing")
-        alpha_path = out_dir / "alpha.mp4"
-        fg_path = out_dir / "fg.mp4"
-        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
-        alpha_writer = cv2.VideoWriter(str(alpha_path), fourcc, fps, (W, H), True)
-        fg_writer = cv2.VideoWriter(str(fg_path), fourcc, fps, (W, H), True)
-        if not alpha_writer.isOpened() or not fg_writer.isOpened():
-            raise MatAnyError("Failed to initialize VideoWriter(s)")
-        seed_mask_np = None
-        if seed_mask_path is not None:
-            p = Path(seed_mask_path)
-            if not p.exists():
-                raise MatAnyError(f"Seed mask not found: {p}")
-            m = cv2.imread(str(p), cv2.IMREAD_GRAYSCALE)
-            if m is None:
-                raise MatAnyError(f"Failed to read seed mask: {p}")
-            seed_mask_np = m
-        cap = cv2.VideoCapture(str(video_path))
-        if not cap.isOpened():
-            raise MatAnyError(f"Failed to open video for reading: {video_path}")
-        idx = 0
-        start = time.time()
-        last_prog = start
         try:
-            while True:
-                ret, frame = cap.read()
-                if not ret:
-                    break
-                is_first = (idx == 0)
-                alpha = self._run_frame(frame, seed_mask_np if is_first else None, is_first)
-                alpha_u8 = (alpha * 255.0 + 0.5).astype(np.uint8)
-                alpha_bgr = cv2.cvtColor(alpha_u8, cv2.COLOR_GRAY2BGR)
-                fg_bgr = (frame.astype(np.float32) * alpha[..., None]).clip(0, 255).astype(np.uint8)
-                alpha_writer.write(alpha_bgr)
-                fg_writer.write(fg_bgr)
-                idx += 1
-                now = time.time()
-                if now - last_prog >= 1.0:
-                    frac = idx / max(N, 1)
-                    _emit_progress(progress_cb, min(0.1 + 0.85 * frac, 0.95), f"MatAnyone: {idx}/{N} frames")
-                    last_prog = now
-                    if os.getenv("MATANY_LOG_VRAM", "0") == "1":
-                        log.debug(f"[MATANY] Frame {idx}/{N} VRAM: {_cuda_snapshot(self.device)}")
         except Exception as e:
-            raise MatAnyError(f"Video processing failed: {e}") from e
         finally:
-            try: cap.release()
-            except: pass
-            try: alpha_writer.release()
-            except: pass
-            try: fg_writer.release()
-            except: pass
             _safe_empty_cache()
-        if not alpha_path.exists() or alpha_path.stat().st_size == 0:
-            raise MatAnyError(f"Output file missing/empty: {alpha_path}")
-        if not fg_path.exists() or fg_path.stat().st_size == 0:
-            raise MatAnyError(f"Output file missing/empty: {fg_path}")
-        _emit_progress(progress_cb, 1.0, "MatAnyone: done")
-        elapsed = time.time() - start
-        log.info(f"MatAnyone completed: {idx} frames in {elapsed:.1f}s")
-        return alpha_path, fg_path
 # ============================================================================
 # MatAnyoneModel Wrapper Class for app_hf.py compatibility
 # ============================================================================
@@ -463,8 +231,8 @@ def replace_background(self, video_path, masks, background_path):
             # Convert paths to Path objects
             video_path = Path(video_path)
-            # Handle masks - for now, we'll use the session without a separate mask file
-            # since MatAnyone expects SAM2 to provide the initial seed mask
             # Create output directory
             with tempfile.TemporaryDirectory() as temp_dir:
@@ -473,15 +241,50 @@ def replace_background(self, video_path, masks, background_path):
                 # Process the video stream
                 alpha_path, fg_path = self.session.process_stream(
                     video_path=video_path,
-                    seed_mask_path=None,  # We'll rely on SAM2 integration
                     out_dir=output_dir,
                     progress_cb=None
                 )
-                # For now, return the foreground video
                 # In a full implementation, you'd composite with the background_path
                 return str(fg_path)
         except Exception as e:
             log.error(f"Error in replace_background: {e}")
-            raise MatAnyError(f"Background replacement failed: {e}")

 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
+MatAnyone adapter — Using Official API (File-Based)
+Fixed to use MatAnyone's official process_video() API instead of
+bypassing it with internal tensor manipulation. This eliminates
+all 5D tensor dimension issues.
+Changes (2025-09-17):
+- Replaced custom tensor processing with official MatAnyone API
+- Uses file-based input/output as designed by MatAnyone authors
+- Eliminates all tensor dimension compatibility issues
+- Simplified error handling and logging
 """
 from __future__ import annotations
 import os
 import time
 import logging
+import tempfile
 import importlib.metadata
 from pathlib import Path
 from typing import Optional, Callable, Tuple
     pass
 # ---------- CUDA helpers ----------
+def _cuda_snapshot(device: Optional[str]) -> str:
     try:
+        import torch
         if not torch.cuda.is_available():
             return "CUDA: N/A"
         idx = 0
+        if device and device.startswith("cuda:"):
+            try:
+                idx = int(device.split(":")[1])
+            except (ValueError, IndexError):
+                idx = 0
         name = torch.cuda.get_device_name(idx)
         alloc = torch.cuda.memory_allocated(idx) / (1024**3)
         resv = torch.cuda.memory_reserved(idx) / (1024**3)
         return f"CUDA snapshot error: {e!r}"
 def _safe_empty_cache():
     try:
+        import torch
+        if torch.cuda.is_available():
+            log.info(f"[MATANY] CUDA memory before empty_cache: {_cuda_snapshot('cuda:0')}")
+            torch.cuda.empty_cache()
+            log.info(f"[MATANY] CUDA memory after empty_cache: {_cuda_snapshot('cuda:0')}")
     except Exception:
         pass
 # ============================================================================
 class MatAnyoneSession:
     """
+    Simple wrapper around MatAnyone's official API.
+    Uses file-based input/output as designed by the MatAnyone authors.
     """
     def __init__(self, device: Optional[str] = None, precision: str = "auto"):
+        self.device = device or ("cuda" if self._cuda_available() else "cpu")
         self.precision = precision.lower()
         # Log MatAnyone version
         try:
             version = importlib.metadata.version("matanyone")
             log.info(f"[MATANY] MatAnyone version: {version}")
         except Exception:
             log.info("[MATANY] MatAnyone version unknown")
+        # Initialize MatAnyone's official API
         try:
+            from matanyone import InferenceCore
+            self.processor = InferenceCore("PeiqingYang/MatAnyone")
+            log.info("[MATANY] MatAnyone InferenceCore initialized successfully")
+        except Exception as e:
+            raise MatAnyError(f"Failed to initialize MatAnyone: {e}")
+    def _cuda_available(self) -> bool:
         try:
+            import torch
+            return torch.cuda.is_available()
+        except Exception:
+            return False
     def process_stream(
         self,
         video_path: Path,
         out_dir: Optional[Path] = None,
         progress_cb: Optional[Callable] = None,
     ) -> Tuple[Path, Path]:
+        """
+        Process video using MatAnyone's official API.
+        Args:
+            video_path: Path to input video file
+            seed_mask_path: Path to first-frame mask PNG (white=foreground, black=background)
+            out_dir: Output directory for results
+            progress_cb: Progress callback function
+        Returns:
+            Tuple of (alpha_path, foreground_path)
+        """
         video_path = Path(video_path)
         if not video_path.exists():
             raise MatAnyError(f"Video file not found: {video_path}")
+        if seed_mask_path and not Path(seed_mask_path).exists():
+            raise MatAnyError(f"Seed mask not found: {seed_mask_path}")
+        out_dir = Path(out_dir) if out_dir else video_path.parent / "matanyone_output"
         out_dir.mkdir(parents=True, exist_ok=True)
+        log.info(f"[MATANY] Processing video: {video_path}")
+        log.info(f"[MATANY] Using mask: {seed_mask_path}")
+        log.info(f"[MATANY] Output directory: {out_dir}")
+        _emit_progress(progress_cb, 0.0, "Initializing MatAnyone processing...")
         try:
+            # Use MatAnyone's official API
+            start_time = time.time()
+            _emit_progress(progress_cb, 0.1, "Running MatAnyone video matting...")
+            # Call the official process_video method
+            foreground_path, alpha_path = self.processor.process_video(
+                input_path=str(video_path),
+                mask_path=str(seed_mask_path) if seed_mask_path else None,
+                output_path=str(out_dir)
+            )
+            processing_time = time.time() - start_time
+            log.info(f"[MATANY] Processing completed in {processing_time:.1f}s")
+            log.info(f"[MATANY] Foreground output: {foreground_path}")
+            log.info(f"[MATANY] Alpha output: {alpha_path}")
+            # Convert to Path objects
+            fg_path = Path(foreground_path) if foreground_path else None
+            al_path = Path(alpha_path) if alpha_path else None
+            # Verify outputs exist
+            if not fg_path or not fg_path.exists():
+                raise MatAnyError(f"Foreground output not created: {fg_path}")
+            if not al_path or not al_path.exists():
+                raise MatAnyError(f"Alpha output not created: {al_path}")
+            _emit_progress(progress_cb, 1.0, "MatAnyone processing complete")
+            return al_path, fg_path  # Return (alpha, foreground) to match expected order
         except Exception as e:
+            log.error(f"[MATANY] Processing failed: {e}")
+            raise MatAnyError(f"MatAnyone processing failed: {e}")
         finally:
             _safe_empty_cache()
 # ============================================================================
 # MatAnyoneModel Wrapper Class for app_hf.py compatibility
 # ============================================================================
             # Convert paths to Path objects
             video_path = Path(video_path)
+            # For now, we expect masks to be a path to the first-frame mask
+            mask_path = Path(masks) if isinstance(masks, (str, Path)) else None
             # Create output directory
             with tempfile.TemporaryDirectory() as temp_dir:
                 # Process the video stream
                 alpha_path, fg_path = self.session.process_stream(
                     video_path=video_path,
+                    seed_mask_path=mask_path,
                     out_dir=output_dir,
                     progress_cb=None
                 )
+                # Return the foreground video path
                 # In a full implementation, you'd composite with the background_path
                 return str(fg_path)
         except Exception as e:
             log.error(f"Error in replace_background: {e}")
+            raise MatAnyError(f"Background replacement failed: {e}")
+# ============================================================================
+# Helper function for pipeline integration
+# ============================================================================
+def create_matanyone_session(device=None):
+    """Create a MatAnyone session for use in pipeline"""
+    return MatAnyoneSession(device=device)
+def run_matanyone_on_files(video_path, mask_path, output_dir, device="cuda", progress_callback=None):
+    """
+    Run MatAnyone on video and mask files.
+    Args:
+        video_path: Path to input video
+        mask_path: Path to first-frame mask PNG
+        output_dir: Directory for outputs
+        device: Device to use (cuda/cpu)
+        progress_callback: Progress callback function
+    Returns:
+        Tuple of (alpha_path, foreground_path) or (None, None) on failure
+    """
+    try:
+        session = MatAnyoneSession(device=device)
+        alpha_path, fg_path = session.process_stream(
+            video_path=Path(video_path),
+            seed_mask_path=Path(mask_path) if mask_path else None,
+            out_dir=Path(output_dir),
+            progress_cb=progress_callback
+        )
+        return str(alpha_path), str(fg_path)
+    except Exception as e:
+        log.error(f"MatAnyone processing failed: {e}")
+        return None, None