Spaces:

MogensR
/

VideoBackgroundReplacer2

Configuration error

App Files Files Community

MogensR commited on Sep 16, 2025

Commit

bcb443b

1 Parent(s): 4b817e6

luck

Browse files

Files changed (10) hide show

app.py +25 -9
models/__init__.py +12 -2
models/__pycache__/__init__.cpython-313.pyc +0 -0
models/matany_compat_patch.py +32 -32
models/matanyone_loader.py +37 -45
models/sam2_loader.py +263 -179
pipeline.py +61 -41
requirements.txt +5 -8
ui.py +75 -131
utils/mask_validation.py +63 -6

app.py CHANGED Viewed

@@ -1,7 +1,17 @@
 #!/usr/bin/env python3
 """
 VideoBackgroundReplacer2 - SAM2 + MatAnyone Integration
 """
 print("=== APP STARTUP DEBUG: app.py starting ===")
 import sys
 print(f"=== APP STARTUP DEBUG: Python {sys.version} ===")
@@ -12,7 +22,6 @@
 import os
 os.environ.pop("OMP_NUM_THREADS", None)
-import sys
 import logging
 import threading
 import time
@@ -23,7 +32,7 @@
 # Suppress torchvision video deprecation warnings from MatAnyone
 warnings.filterwarnings("ignore", category=UserWarning, module="torchvision.io._video_deprecation_warning")
-# --- import path fix for third_party packages (MatAnyone, SAM2) ---
 third_party_root = Path(__file__).parent / "third_party"
 sam2_path = third_party_root / "sam2"
@@ -43,7 +52,7 @@
 # DEBUG: try importing MatAnyone and show its location
 try:
-    import matanyone  # noqa: F401
     import inspect
     import os as _os
     print("[MATANY] import OK from:", _os.path.dirname(inspect.getfile(matanyone)), flush=True)
@@ -78,9 +87,9 @@ def _heartbeat():
 # Safe, minimal startup diagnostics (no long CUDA probes)
 # -----------------------------------------------------------------------------
 def _safe_startup_diag():
-    # Torch version only; defer CUDA availability checks to post-launch
     try:
-        import torch  # noqa: F401
         import importlib
         t = importlib.import_module("torch")
         logger.info(
@@ -91,6 +100,14 @@ def _safe_startup_diag():
     except Exception as e:
         logger.warning("Torch not available at startup: %s", e)
     # nvidia-smi with short timeout (avoid indefinite block)
     try:
         out = subprocess.run(
@@ -107,7 +124,7 @@ def _safe_startup_diag():
 # Optional perf tuning; never block startup
 try:
-    import perf_tuning  # noqa: F401
     logger.info("perf_tuning imported successfully.")
 except Exception as e:
     logger.info("perf_tuning not available: %s", e)
@@ -126,7 +143,6 @@ def _safe_startup_diag():
     logger.info(f"[MATANY] probe skipped: {e}")
 # Continue with app startup
 _safe_startup_diag()
 # -----------------------------------------------------------------------------
@@ -166,7 +182,7 @@ def build_ui() -> gr.Blocks:
     logger.info("Launching Gradio on %s:%s …", host, port)
     demo = build_ui()
-    demo.queue(max_size=16)
     threading.Thread(target=_post_launch_diag, daemon=True).start()
-    demo.launch(server_name=host, server_port=port, show_error=True)

 #!/usr/bin/env python3
 """
 VideoBackgroundReplacer2 - SAM2 + MatAnyone Integration
+================================================
+- Sets up Gradio UI and launches pipeline
+- Aligned with torch==2.3.1+cu121, MatAnyone v1.0.0, SAM2 commit 3c76f73c1a7e7b4a2e8a0a9a3e5b92f7e6e3f2f5
+Changes (2025-09-16):
+- Aligned with updated pipeline.py and models/
+- Added MatAnyone version logging in startup diagnostics
+- Updated Gradio launch for compatibility with gradio==5.42.0
+- Ensured sys.path and environment variables match Dockerfile
 """
 print("=== APP STARTUP DEBUG: app.py starting ===")
 import sys
 print(f"=== APP STARTUP DEBUG: Python {sys.version} ===")
 import os
 os.environ.pop("OMP_NUM_THREADS", None)
 import logging
 import threading
 import time
 # Suppress torchvision video deprecation warnings from MatAnyone
 warnings.filterwarnings("ignore", category=UserWarning, module="torchvision.io._video_deprecation_warning")
+# --- import path fix for third_party packages (SAM2) ---
 third_party_root = Path(__file__).parent / "third_party"
 sam2_path = third_party_root / "sam2"
 # DEBUG: try importing MatAnyone and show its location
 try:
+    import matanyone
     import inspect
     import os as _os
     print("[MATANY] import OK from:", _os.path.dirname(inspect.getfile(matanyone)), flush=True)
 # Safe, minimal startup diagnostics (no long CUDA probes)
 # -----------------------------------------------------------------------------
 def _safe_startup_diag():
+    # Torch version
     try:
+        import torch
         import importlib
         t = importlib.import_module("torch")
         logger.info(
     except Exception as e:
         logger.warning("Torch not available at startup: %s", e)
+    # MatAnyone version
+    try:
+        import importlib.metadata
+        version = importlib.metadata.version("matanyone")
+        logger.info(f"[MATANY] MatAnyone version: {version}")
+    except Exception:
+        logger.info("[MATANY] MatAnyone version unknown")
     # nvidia-smi with short timeout (avoid indefinite block)
     try:
         out = subprocess.run(
 # Optional perf tuning; never block startup
 try:
+    import perf_tuning
     logger.info("perf_tuning imported successfully.")
 except Exception as e:
     logger.info("perf_tuning not available: %s", e)
     logger.info(f"[MATANY] probe skipped: {e}")
 # Continue with app startup
 _safe_startup_diag()
 # -----------------------------------------------------------------------------
     logger.info("Launching Gradio on %s:%s …", host, port)
     demo = build_ui()
+    demo.queue(max_size=16, api_open=False)  # Disable public API for security
     threading.Thread(target=_post_launch_diag, daemon=True).start()
+    demo.launch(server_name=host, server_port=port, show_error=True)

models/__init__.py CHANGED Viewed

@@ -8,8 +8,10 @@
 - MatAnyone loader is probe-only here; actual run happens in matanyone_loader.MatAnyoneSession
 Changes (2025-09-16):
 - Updated load_matany to apply T=1 squeeze patch before InferenceCore import
-- Added patch status logging in load_matany
 - Fixed InferenceCore import path to matanyone.inference.inference_core
 """
@@ -21,6 +23,7 @@
 import subprocess
 import inspect
 import logging
 from pathlib import Path
 from typing import Optional, Tuple, Dict, Any, Union, Callable
@@ -261,7 +264,7 @@ def _composite_frame_pro(
 ) -> np.ndarray:
     erode_px = erode_px if erode_px is not None else int(os.environ.get("EDGE_ERODE", "1"))
     dilate_px = dilate_px if dilate_px is not None else int(os.environ.get("EDGE_DILATE", "2"))
-    blur_px   = blur_px   if blur_px   is not None else float(os.environ.get("EDGE_BLUR", "1.5"))
     lw_radius = lw_radius if lw_radius is not None else int(os.environ.get("LIGHTWRAP_RADIUS", "5"))
     lw_amount = lw_amount if lw_amount is not None else float(os.environ.get("LIGHTWRAP_AMOUNT", "0.18"))
     despill_amount = despill_amount if despill_amount is not None else float(os.environ.get("DESPILL_AMOUNT", "0.35"))
@@ -528,6 +531,13 @@ def load_matany() -> Tuple[Optional[object], bool, Dict[str, Any]]:
     try:
         from matanyone.inference.inference_core import InferenceCore  # type: ignore
         meta["matany_import_ok"] = True
         device = _pick_device("MATANY_DEVICE")
         repo_id = os.environ.get("MATANY_REPO_ID", "PeiqingYang/MatAnyone")
         meta["matany_repo_id"] = repo_id

 - MatAnyone loader is probe-only here; actual run happens in matanyone_loader.MatAnyoneSession
 Changes (2025-09-16):
+- Aligned with torch==2.3.1+cu121 and MatAnyone v1.0.0
 - Updated load_matany to apply T=1 squeeze patch before InferenceCore import
+- Added patch status logging and MatAnyone version
+- Added InferenceCore attributes logging for debugging
 - Fixed InferenceCore import path to matanyone.inference.inference_core
 """
 import subprocess
 import inspect
 import logging
+import importlib.metadata
 from pathlib import Path
 from typing import Optional, Tuple, Dict, Any, Union, Callable
 ) -> np.ndarray:
     erode_px = erode_px if erode_px is not None else int(os.environ.get("EDGE_ERODE", "1"))
     dilate_px = dilate_px if dilate_px is not None else int(os.environ.get("EDGE_DILATE", "2"))
+    blur_px = blur_px if blur_px is not None else float(os.environ.get("EDGE_BLUR", "1.5"))
     lw_radius = lw_radius if lw_radius is not None else int(os.environ.get("LIGHTWRAP_RADIUS", "5"))
     lw_amount = lw_amount if lw_amount is not None else float(os.environ.get("LIGHTWRAP_AMOUNT", "0.18"))
     despill_amount = despill_amount if despill_amount is not None else float(os.environ.get("DESPILL_AMOUNT", "0.35"))
     try:
         from matanyone.inference.inference_core import InferenceCore  # type: ignore
         meta["matany_import_ok"] = True
+        # Log MatAnyone version and InferenceCore attributes
+        try:
+            version = importlib.metadata.version("matanyone")
+            logger.info(f"[MATANY] MatAnyone version: {version}")
+        except Exception:
+            logger.info("[MATANY] MatAnyone version unknown")
+        logger.debug(f"[MATANY] InferenceCore attributes: {dir(InferenceCore)}")
         device = _pick_device("MATANY_DEVICE")
         repo_id = os.environ.get("MATANY_REPO_ID", "PeiqingYang/MatAnyone")
         meta["matany_repo_id"] = repo_id

models/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary files a/models/__pycache__/__init__.cpython-313.pyc and b/models/__pycache__/__init__.cpython-313.pyc differ

models/matany_compat_patch.py CHANGED Viewed

@@ -1,11 +1,11 @@
 #!/usr/bin/env python3
 # MatAnyone HF-compat patch: squeeze time dim T=1 before first Conv2d
 # Changes (2025-09-16):
-# - Added fallback patching for forward/encode if encode_img missing
 # - Log dir(MatAnyone) and module version for debugging
 # - Added isinstance(img, torch.Tensor) for non-tensor safety
-# - Enhanced logging with input/output shapes
-# - Kept monkey-patch for HF Spaces compatibility
 import logging
 import torch
@@ -15,9 +15,9 @@
 def apply_matany_t1_squeeze_guard() -> bool:
     """
-    Monkey-patch MatAnyone.encode_img (or forward/encode) to squeeze [B,1,C,H,W] → [B,C,H,W].
     Safe for multi-frame (T>1) as it only squeezes when T==1.
-    Returns True if patch applied successfully, False otherwise.
     """
     try:
         import matanyone.model.matanyone as M
@@ -29,7 +29,7 @@ def apply_matany_t1_squeeze_guard() -> bool:
         return False
     MatAnyone = M.MatAnyone
-    # Log MatAnyone version and attributes for debugging
     try:
         version = importlib.metadata.version("matanyone")
         log.info(f"[MatAnyCompat] MatAnyone version: {version}")
@@ -37,33 +37,33 @@ def apply_matany_t1_squeeze_guard() -> bool:
         log.info("[MatAnyCompat] MatAnyone version unknown")
     log.debug(f"[MatAnyCompat] MatAnyone attributes: {dir(MatAnyone)}")
-    # Try encode_img first, then fallback to forward or encode
-    method_name = None
-    for candidate in ["encode_img", "forward", "encode"]:
-        if hasattr(MatAnyone, candidate):
-            method_name = candidate
-            break
-    if not method_name:
-        log.warning("[MatAnyCompat] No patchable method (encode_img, forward, encode) found on MatAnyone")
-        return False
-    if getattr(MatAnyone, f"_{method_name}_patched", False):
-        log.info(f"[MatAnyCompat] {method_name} already patched")
-        return True
-    # Store original method
-    orig_method = getattr(MatAnyone, method_name)
-    def method_compat(self, img, *args, **kwargs):
-        # Handle inputs that MatAnyone.step turned into [B,1,C,H,W]
-        try:
-            if isinstance(img, torch.Tensor) and img.dim() == 5 and img.shape[1] == 1:
-                log.info(f"[MatAnyCompat] Squeezing 5D {img.shape} to 4D {img.squeeze(1).shape} in {method_name}")
-                img = img.squeeze(1)  # [B,1,C,H,W] → [B,C,H,W]
-        except Exception as e:
-            log.warning(f"[MatAnyCompat] Failed to process input shape in {method_name}: %s", e)
-        return orig_method(self, img, *args, **kwargs)
-    setattr(MatAnyone, method_name, method_compat)
-    setattr(MatAnyone, f"_{method_name}_patched", True)
-    log.info(f"[MatAnyCompat] Applied T=1 squeeze guard in MatAnyone.{method_name}")
     return True

 #!/usr/bin/env python3
 # MatAnyone HF-compat patch: squeeze time dim T=1 before first Conv2d
 # Changes (2025-09-16):
+# - Aligned with torch==2.3.1+cu121 and MatAnyone v1.0.0
+# - Patch forward, encode, encode_img to cover all code paths
 # - Log dir(MatAnyone) and module version for debugging
 # - Added isinstance(img, torch.Tensor) for non-tensor safety
+# - Log input/output shapes for verification
 import logging
 import torch
 def apply_matany_t1_squeeze_guard() -> bool:
     """
+    Monkey-patch MatAnyone.forward/encode/encode_img to squeeze [B,1,C,H,W] → [B,C,H,W].
     Safe for multi-frame (T>1) as it only squeezes when T==1.
+    Returns True if at least one method patched, False otherwise.
     """
     try:
         import matanyone.model.matanyone as M
         return False
     MatAnyone = M.MatAnyone
+    # Log MatAnyone version and attributes
     try:
         version = importlib.metadata.version("matanyone")
         log.info(f"[MatAnyCompat] MatAnyone version: {version}")
         log.info("[MatAnyCompat] MatAnyone version unknown")
     log.debug(f"[MatAnyCompat] MatAnyone attributes: {dir(MatAnyone)}")
+    # Try patching forward, encode, encode_img
+    patched = False
+    for method_name in ["forward", "encode", "encode_img"]:
+        if not hasattr(MatAnyone, method_name):
+            continue
+        if getattr(MatAnyone, f"_{method_name}_patched", False):
+            log.info(f"[MatAnyCompat] {method_name} already patched")
+            continue
+        # Store original method
+        orig_method = getattr(MatAnyone, method_name)
+        def method_compat(self, img, *args, **kwargs):
+            try:
+                if isinstance(img, torch.Tensor) and img.dim() == 5 and img.shape[1] == 1:
+                    log.info(f"[MatAnyCompat] Squeezing 5D {img.shape} to 4D {img.squeeze(1).shape} in {method_name}")
+                    img = img.squeeze(1)  # [B,1,C,H,W] → [B,C,H,W]
+            except Exception as e:
+                log.warning(f"[MatAnyCompat] Failed to process input shape in {method_name}: %s", e)
+            return orig_method(self, img, *args, **kwargs)
+        setattr(MatAnyone, method_name, method_compat)
+        setattr(MatAny, f"_{method_name}_patched", True)
+        log.info(f"[MatAnyCompat] Applied T=1 squeeze guard in MatAnyone.{method_name}")
+        patched = True
+    if not patched:
+        log.warning("[MatAnyCompat] No patchable methods (forward, encode, encode_img) found on MatAnyone")
+        return False
     return True

models/matanyone_loader.py CHANGED Viewed

@@ -5,16 +5,16 @@
 - SAM2 defines the subject (seed mask) on frame 0.
 - MatAnyone does frame-by-frame alpha matting.
-- Uses T=1 squeeze patch for conv2d compatibility.
-- Falls back to process_frame([H,W,3]) if step() is unavailable.
 Changes (2025-09-16):
-- Added comprehensive error handling for MatAnyone import and initialization
-- Enhanced VRAM management with auto-cleanup
-- Added support for multiple MatAnyone method patching (encode_img/forward/encode)
-- Improved logging with timestamps and memory usage
-- Added environment variable controls for debugging
-- Fixed potential memory leaks in tensor handling
 """
 from __future__ import annotations
@@ -24,6 +24,7 @@
 import logging
 import numpy as np
 import torch
 from pathlib import Path
 from typing import Optional, Callable, Tuple
@@ -71,39 +72,22 @@ def _cuda_snapshot(device: Optional[torch.device]) -> str:
             idx = device.index
         name = torch.cuda.get_device_name(idx)
         alloc = torch.cuda.memory_allocated(idx) / (1024**3)
-        resv  = torch.cuda.memory_reserved(idx)  / (1024**3)
         return f"device={idx}, name={name}, alloc={alloc:.2f}GB, reserved={resv:.2f}GB"
     except Exception as e:
         return f"CUDA snapshot error: {e!r}"
 def _safe_empty_cache():
-    """Safely clear PyTorch cache with detailed memory reporting."""
     try:
-        if not torch.cuda.is_available():
-            return
-        # Log memory stats before cleanup
-        if _env_flag("MATANY_LOG_VRAM"):
-            log.info("[MATANY] VRAM before cleanup:")
-            log.info(f"  Allocated: {torch.cuda.memory_allocated()/1024**2:.1f} MB")
-            log.info(f"  Reserved:  {torch.cuda.memory_reserved()/1024**2:.1f} MB")
-        # Clear cache and sync
         torch.cuda.empty_cache()
-        torch.cuda.synchronize()
-        # Log memory stats after cleanup
-        if _env_flag("MATANY_LOG_VRAM"):
-            log.info("[MATANY] VRAM after cleanup:")
-            log.info(f"  Allocated: {torch.cuda.memory_allocated()/1024**2:.1f} MB")
-            log.info(f"  Reserved:  {torch.cuda.memory_reserved()/1024**2:.1f} MB")
-    except Exception as e:
-        log.warning(f"[MATANY] Error in cache cleanup: {e}", exc_info=True)
-        try:
-            torch.cuda.empty_cache()
-        except Exception as e2:
-            log.warning(f"[MATANY] Secondary cache cleanup failed: {e2}")
 # ---------- SAM2 → seed mask prep ----------
 def _prepare_seed_mask(sam2_mask: np.ndarray, H: int, W: int) -> np.ndarray:
@@ -149,7 +133,7 @@ class MatAnyoneSession:
     """
     Streaming wrapper that seeds MatAnyone on frame 0.
     Prefers step([B,C,H,W]) with T=1 squeeze patch for conv2d compatibility.
-    Falls back to process_frame([H,W,3]) if supported by the wheel.
     """
     def __init__(self, device: Optional[str] = None, precision: str = "auto"):
         from .matany_compat_patch import apply_matany_t1_squeeze_guard
@@ -157,18 +141,25 @@ def __init__(self, device: Optional[str] = None, precision: str = "auto"):
         self.device = torch.device(device) if device else (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"))
         self.precision = precision.lower()
-        # Apply T=1 squeeze patch for conv2d fix
         if apply_matany_t1_squeeze_guard():
-            log.info("[MATANY] T=1 squeeze patch applied for MatAnyone.encode_img")
         else:
             log.warning("[MATANY] T=1 squeeze patch failed; conv2d errors may occur")
         # API/format overrides for debugging
         api_force = os.getenv("MATANY_FORCE_API", "").strip().lower()  # "process" or "step"
         fmt_force = os.getenv("MATANY_FORCE_FORMAT", "4d").strip().lower()  # "4d" or "5d"
         self._force_api_process = (api_force == "process")
         self._force_api_step    = (api_force == "step")
-        self._force_4d = (fmt_force == "4d") or not fmt_force  # Default to 4D post-patch
         self._force_5d = (fmt_force == "5d")
         try:
@@ -255,6 +246,7 @@ def _call_step(self, rgb_hwc: np.ndarray, seed_mask_hw: Optional[np.ndarray], is
         def run(use_5d: bool):
             img = img_5d if use_5d else img_4d
             msk = mask_5d if use_5d else mask_4d
             if is_first and msk is not None:
                 try:
                     return self.core.step(img, msk, is_first=True)
@@ -351,10 +343,10 @@ def process_stream(
         cap_probe = cv2.VideoCapture(str(video_path))
         if not cap_probe.isOpened():
             raise MatAnyError(f"Failed to open video: {video_path}")
-        N   = int(cap_probe.get(cv2.CAP_PROP_FRAME_COUNT))
         fps = cap_probe.get(cv2.CAP_PROP_FPS)
-        W   = int(cap_probe.get(cv2.CAP_PROP_FRAME_WIDTH))
-        H   = int(cap_probe.get(cv2.CAP_PROP_FRAME_HEIGHT))
         cap_probe.release()
         if not fps or fps <= 0 or np.isnan(fps):
             fps = 25.0
@@ -364,10 +356,10 @@ def process_stream(
         _emit_progress(progress_cb, 0.08, "Using per-frame processing")
         alpha_path = out_dir / "alpha.mp4"
-        fg_path    = out_dir / "fg.mp4"
         fourcc = cv2.VideoWriter_fourcc(*"mp4v")
         alpha_writer = cv2.VideoWriter(str(alpha_path), fourcc, fps, (W, H), True)
-        fg_writer    = cv2.VideoWriter(str(fg_path),    fourcc, fps, (W, H), True)
         if not alpha_writer.isOpened() or not fg_writer.isOpened():
             raise MatAnyError("Failed to initialize VideoWriter(s)")
@@ -396,9 +388,9 @@ def process_stream(
                 is_first = (idx == 0)
                 alpha = self._run_frame(frame, seed_mask_np if is_first else None, is_first)
-                alpha_u8  = (alpha * 255.0 + 0.5).astype(np.uint8)
                 alpha_bgr = cv2.cvtColor(alpha_u8, cv2.COLOR_GRAY2BGR)
-                fg_bgr    = (frame.astype(np.float32) * alpha[..., None]).clip(0, 255).astype(np.uint8)
                 alpha_writer.write(alpha_bgr)
                 fg_writer.write(fg_bgr)

 - SAM2 defines the subject (seed mask) on frame 0.
 - MatAnyone does frame-by-frame alpha matting.
+- Prefers step([B,C,H,W]) with T=1 squeeze patch for conv2d compatibility.
+- Falls back to process_frame([H,W,3]) if supported.
 Changes (2025-09-16):
+- Aligned with torch==2.3.1+cu121 and MatAnyone v1.0.0
+- Added shape logging in _call_step to verify 5D-to-4D squeeze
+- Set MATANY_FORCE_FORMAT=4d as default
+- Added VRAM logging in process_stream (MATANY_LOG_VRAM=1)
+- Enhanced _safe_empty_cache with memory_summary
+- Added MatAnyone version logging
 """
 from __future__ import annotations
 import logging
 import numpy as np
 import torch
+import importlib.metadata
 from pathlib import Path
 from typing import Optional, Callable, Tuple
             idx = device.index
         name = torch.cuda.get_device_name(idx)
         alloc = torch.cuda.memory_allocated(idx) / (1024**3)
+        resv = torch.cuda.memory_reserved(idx) / (1024**3)
         return f"device={idx}, name={name}, alloc={alloc:.2f}GB, reserved={resv:.2f}GB"
     except Exception as e:
         return f"CUDA snapshot error: {e!r}"
 def _safe_empty_cache():
+    if not torch.cuda.is_available():
+        return
     try:
+        log.info(f"[MATANY] CUDA memory before empty_cache: {_cuda_snapshot(None)}")
         torch.cuda.empty_cache()
+        log.info(f"[MATANY] CUDA memory after empty_cache: {_cuda_snapshot(None)}")
+        if os.getenv("MATANY_LOG_VRAM", "0") == "1":
+            log.debug(f"[MATANY] VRAM summary:\n{torch.cuda.memory_summary()}")
+    except Exception:
+        pass
 # ---------- SAM2 → seed mask prep ----------
 def _prepare_seed_mask(sam2_mask: np.ndarray, H: int, W: int) -> np.ndarray:
     """
     Streaming wrapper that seeds MatAnyone on frame 0.
     Prefers step([B,C,H,W]) with T=1 squeeze patch for conv2d compatibility.
+    Falls back to process_frame([H,W,3]) if supported.
     """
     def __init__(self, device: Optional[str] = None, precision: str = "auto"):
         from .matany_compat_patch import apply_matany_t1_squeeze_guard
         self.device = torch.device(device) if device else (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"))
         self.precision = precision.lower()
+        # Apply T=1 squeeze patch
         if apply_matany_t1_squeeze_guard():
+            log.info("[MATANY] T=1 squeeze patch applied for MatAnyone")
         else:
             log.warning("[MATANY] T=1 squeeze patch failed; conv2d errors may occur")
+        # Log MatAnyone version
+        try:
+            version = importlib.metadata.version("matanyone")
+            log.info(f"[MATANY] MatAnyone version: {version}")
+        except Exception:
+            log.info("[MATANY] MatAnyone version unknown")
         # API/format overrides for debugging
         api_force = os.getenv("MATANY_FORCE_API", "").strip().lower()  # "process" or "step"
         fmt_force = os.getenv("MATANY_FORCE_FORMAT", "4d").strip().lower()  # "4d" or "5d"
         self._force_api_process = (api_force == "process")
         self._force_api_step    = (api_force == "step")
+        self._force_4d = (fmt_force == "4d") or not fmt_force  # Default to 4D
         self._force_5d = (fmt_force == "5d")
         try:
         def run(use_5d: bool):
             img = img_5d if use_5d else img_4d
             msk = mask_5d if use_5d else mask_4d
+            log.debug(f"[MATANY] Step input: img={img.shape}, mask={msk.shape if msk is not None else None}, is_first={is_first}")
             if is_first and msk is not None:
                 try:
                     return self.core.step(img, msk, is_first=True)
         cap_probe = cv2.VideoCapture(str(video_path))
         if not cap_probe.isOpened():
             raise MatAnyError(f"Failed to open video: {video_path}")
+        N = int(cap_probe.get(cv2.CAP_PROP_FRAME_COUNT))
         fps = cap_probe.get(cv2.CAP_PROP_FPS)
+        W = int(cap_probe.get(cv2.CAP_PROP_FRAME_WIDTH))
+        H = int(cap_probe.get(cv2.CAP_PROP_FRAME_HEIGHT))
         cap_probe.release()
         if not fps or fps <= 0 or np.isnan(fps):
             fps = 25.0
         _emit_progress(progress_cb, 0.08, "Using per-frame processing")
         alpha_path = out_dir / "alpha.mp4"
+        fg_path = out_dir / "fg.mp4"
         fourcc = cv2.VideoWriter_fourcc(*"mp4v")
         alpha_writer = cv2.VideoWriter(str(alpha_path), fourcc, fps, (W, H), True)
+        fg_writer = cv2.VideoWriter(str(fg_path), fourcc, fps, (W, H), True)
         if not alpha_writer.isOpened() or not fg_writer.isOpened():
             raise MatAnyError("Failed to initialize VideoWriter(s)")
                 is_first = (idx == 0)
                 alpha = self._run_frame(frame, seed_mask_np if is_first else None, is_first)
+                alpha_u8 = (alpha * 255.0 + 0.5).astype(np.uint8)
                 alpha_bgr = cv2.cvtColor(alpha_u8, cv2.COLOR_GRAY2BGR)
+                fg_bgr = (frame.astype(np.float32) * alpha[..., None]).clip(0, 255).astype(np.uint8)
                 alpha_writer.write(alpha_bgr)
                 fg_writer.write(fg_bgr)

models/sam2_loader.py CHANGED Viewed

@@ -1,195 +1,279 @@
-# models/sam2_loader.py
-import os, logging, torch
-from huggingface_hub import hf_hub_download
 from pathlib import Path
 import numpy as np
-log = logging.getLogger("sam2_loader")
-DEFAULT_MODEL_ID = os.environ.get("SAM2_MODEL_ID", "facebook/sam2")
-DEFAULT_VARIANT = os.environ.get("SAM2_VARIANT", "sam2_hiera_large")
-# Map variant -> filenames (SAM2 releases follow this pattern)
-VARIANT_FILES = {
-    "sam2_hiera_small": ("sam2_hiera_small.pt", "configs/sam2/sam2_hiera_s.yaml"),
-    "sam2_hiera_base": ("sam2_hiera_base.pt", "configs/sam2/sam2_hiera_b.yaml"),
-    "sam2_hiera_large": ("sam2_hiera_large.pt", "configs/sam2/sam2_hiera_l.yaml"),
-}
-def _download_checkpoint(model_id: str, ckpt_name: str) -> str:
-    return hf_hub_download(repo_id=model_id, filename=ckpt_name, local_dir=os.environ.get("HF_HOME"))
-def _find_sam2_build():
     try:
-        from sam2.build_sam import build_sam2
-        return build_sam2
     except Exception as e:
-        log.error("SAM2 not importable (check Dockerfile vendoring): %s", e)
         return None
-class SAM2Predictor:
-    def __init__(self, device: torch.device):
-        self.device = device
-        self.model = None
-        self.predictor = None
-    def load(self, variant: str = DEFAULT_VARIANT, model_id: str = DEFAULT_MODEL_ID):
-        log.info(f"SAM2Predictor.load() called with variant={variant}")
-        build_sam2 = _find_sam2_build()
-        if build_sam2 is None:
-            log.error("SAM2 build function not available - raising RuntimeError")
-            raise RuntimeError("SAM2 build function not available")
-        ckpt_name, cfg_path = VARIANT_FILES.get(variant, VARIANT_FILES["sam2_hiera_large"])
-        log.info(f"Downloading checkpoint: {ckpt_name}")
-        ckpt = _download_checkpoint(model_id, ckpt_name)
-        log.info(f"Checkpoint downloaded to: {ckpt}")
-        # Use the symlinked config files in the sam2 package directory
-        # From debug output: sam2_hiera_l.yaml -> configs/sam2/sam2_hiera_l.yaml
-        sam2_pkg_dir = os.environ.get("THIRD_PARTY_SAM2_DIR", "/home/user/app/third_party/sam2")
-        config_name = cfg_path.split('/')[-1]  # Extract just the filename (e.g., "sam2_hiera_l.yaml")
-        full_cfg_path = os.path.join(sam2_pkg_dir, "sam2", config_name)
-        log.info(f"SAM2 config path: {full_cfg_path}")
-        log.info(f"Config file exists: {os.path.exists(full_cfg_path)}")
-        log.info("Calling build_sam2()...")
-        model = build_sam2(config_file=full_cfg_path, ckpt_path=ckpt, device=str(self.device))
-        log.info("build_sam2() completed successfully")
-        # Explicitly move model to device and verify
-        model = model.to(self.device)
-        model.eval()
-        # Verify model is on correct device
-        if hasattr(model, 'parameters'):
-            first_param = next(model.parameters(), None)
-            if first_param is not None:
-                actual_device = first_param.device
-                log.info(f"SAM2 model device verification: expected={self.device}, actual={actual_device}")
-                if str(actual_device) != str(self.device):
-                    log.warning(f"SAM2 model device mismatch! Moving to {self.device}")
-                    model = model.to(self.device)
-        self.model = model
         try:
-            from sam2.sam2_video_predictor import SAM2VideoPredictor
-            self.predictor = SAM2VideoPredictor(self.model)
-        except Exception:
-            # Fallback to image predictor if video predictor missing
-            from sam2.sam2_image_predictor import SAM2ImagePredictor
-            self.predictor = SAM2ImagePredictor(self.model)
-        return self
-    def _detect_person_region(self, image_rgb01: np.ndarray) -> np.ndarray:
-        """
-        Simple centered box that works for most cases.
-        Returns [x1, y1, x2, y2] in image coordinates.
-        """
-        h, w = image_rgb01.shape[:2]
-        # Use 70% of the frame size, centered
-        margin_w = int(w * 0.15)
-        margin_h = int(h * 0.15)
-        box = np.array([
-            margin_w,          # x1
-            margin_h,          # y1
-            w - margin_w,     # x2
-            h - margin_h      # y2
-        ], dtype=np.float32)
-        log.info(f"Using simple center box: {box}")
-        return box
-    @torch.inference_mode()
-    def first_frame_mask(self, image_rgb01):
-        """
-        Returns an initial binary mask for the foreground person from first frame.
-        Uses a robust approach with fallback strategies for better reliability.
-        """
-        log.info("🔍 SAM2 first_frame_mask() called - starting segmentation")
         try:
-            # Ensure input tensor is on correct device
-            if isinstance(image_rgb01, torch.Tensor):
-                image_rgb01 = image_rgb01.to(self.device, non_blocking=True)
-            if not hasattr(self.predictor, "set_image"):
-                raise RuntimeError("SAM2 predictor doesn't support set_image")
-            # Convert to numpy for predictor if needed
-            if isinstance(image_rgb01, torch.Tensor):
-                image_np = (image_rgb01.cpu().numpy() * 255).astype("uint8")
-            else:
-                image_np = (image_rgb01 * 255).astype("uint8")
-            # Set the image for prediction
-            self.predictor.set_image(image_np)
-            # Strategy 1: Try with person-focused bounding box first
-            box = self._detect_person_region(image_np)
-            log.info(f"Trying person-focused box: {box}")
-            masks, scores, _ = self.predictor.predict(
-                box=box,
-                multimask_output=True,
-                mask_input=None,
-                return_logits=False
-            )
-            # Strategy 2: If no good masks found, try with a point in the center
-            if len(masks) == 0 or np.max(scores) < 0.5:
-                log.info("No good masks found with box, trying center point")
-                h, w = image_np.shape[:2]
-                point = np.array([[w//2, h//2]])
-                labels = np.array([1])  # 1=foreground point
-                masks, scores, _ = self.predictor.predict(
-                    point_coords=point,
-                    point_labels=labels,
-                    multimask_output=True
-                )
-            # Choose the best mask (highest score)
-            if len(masks) > 0 and len(scores) > 0:
-                best_idx = np.argmax(scores)
-                mask = masks[best_idx]
-                score = float(scores[best_idx])
-                log.info(f"Selected mask {best_idx+1}/{len(masks)} with score {score:.3f}")
-                # Verify mask quality
-                mask_coverage = (np.sum(mask > 0) / mask.size) * 100
-                log.info(f"Mask coverage: {mask_coverage:.1f}% (target: 15-35%)")
-                # If mask is too small or too large, try to refine it
-                if mask_coverage < 5 or mask_coverage > 50:
-                    log.warning(f"Suspicious mask coverage {mask_coverage:.1f}%, applying post-processing")
-                    # Apply morphological operations to clean up the mask
-                    kernel = np.ones((5,5), np.uint8)
-                    if mask_coverage < 5:  # Too small - dilate
-                        mask = cv2.dilate(mask.astype(np.uint8), kernel, iterations=1)
-                    else:  # Too large - erode
-                        mask = cv2.erode(mask.astype(np.uint8), kernel, iterations=1)
-                    # Ensure we still have a valid mask
-                    if np.sum(mask) == 0:
-                        log.warning("Post-processing removed all mask pixels, using original")
-                        mask = masks[best_idx]
             else:
-                log.warning("No valid masks found, using fallback")
-                mask = np.ones_like(image_np[:,:,0], dtype=bool)
-            return mask.astype(np.float32)
-        except Exception as e:
-            log.error(f"Error in first_frame_mask: {e}")
-            # Fallback to a simple centered box if anything goes wrong
-            h, w = image_np.shape[:2]
-            mask = np.zeros((h, w), dtype=np.float32)
-            margin_h, margin_w = h//4, w//4
-            mask[margin_h:h-margin_h, margin_w:w-margin_w] = 1.0
-            return mask
-        return mask.astype("float32")

+#!/usr/bin/env python3
+"""
+SAM2 Loader — Robust loading and mask generation for SAM2
+========================================================
+- Loads SAM2 model with Hydra config resolution
+- Generates seed masks for MatAnyone
+- Aligned with torch==2.3.1+cu121 and SAM2 commit 3c76f73c1a7e7b4a2e8a0a9a3e5b92f7e6e3f2f5
+Changes (2025-09-16):
+- Aligned with torch==2.3.1+cu121 and SAM2 commit
+- Added GPU memory logging for Tesla T4
+- Added SAM2 version logging via importlib.metadata
+- Simplified config resolution to match __init__.py
+"""
+from __future__ import annotations
+import os
+import logging
+import importlib.metadata
 from pathlib import Path
+from typing import Optional, Tuple, Dict, Any
 import numpy as np
+import yaml
+import torch
+# --------------------------------------------------------------------------------------
+# Logging
+# --------------------------------------------------------------------------------------
+logger = logging.getLogger("backgroundfx_pro")
+if not logger.handlers:
+    _h = logging.StreamHandler()
+    _h.setFormatter(logging.Formatter("[%(asctime)s] %(levelname)s: %(message)s"))
+    logger.addHandler(_h)
+logger.setLevel(logging.INFO)
+# --------------------------------------------------------------------------------------
+# Path setup for third_party repos
+# --------------------------------------------------------------------------------------
+ROOT = Path(__file__).resolve().parent.parent  # project root
+TP_SAM2 = Path(os.environ.get("THIRD_PARTY_SAM2_DIR", ROOT / "third_party" / "sam2")).resolve()
+def _add_sys_path(p: Path) -> None:
+    if p.exists():
+        p_str = str(p)
+        if p_str not in sys.path:
+            sys.path.insert(0, p_str)
+    else:
+        logger.warning(f"third_party path not found: {p}")
+_add_sys_path(TP_SAM2)
+# --------------------------------------------------------------------------------------
+# Safe Torch accessors
+# --------------------------------------------------------------------------------------
+def _torch():
     try:
+        import torch
+        return torch
     except Exception as e:
+        logger.warning(f"[sam2_loader.safe-torch] import failed: {e}")
         return None
+def _has_cuda() -> bool:
+    t = _torch()
+    if t is None:
+        return False
+    try:
+        return bool(t.cuda.is_available())
+    except Exception as e:
+        logger.warning(f"[sam2_loader.safe-torch] cuda.is_available() failed: {e}")
+        return False
+def _pick_device(env_key: str) -> str:
+    requested = os.environ.get(env_key, "").strip().lower()
+    has_cuda = _has_cuda()
+    logger.info(f"CUDA environment variables: {{'SAM2_DEVICE': '{os.environ.get('SAM2_DEVICE', '')}'}}")
+    logger.info(f"_pick_device({env_key}): requested='{requested}', has_cuda={has_cuda}")
+    if has_cuda and requested not in {"cpu"}:
+        logger.info(f"FORCING CUDA device (GPU available, requested='{requested}')")
+        return "cuda"
+    elif requested in {"cuda", "cpu"}:
+        logger.info(f"Using explicitly requested device: {requested}")
+        return requested
+    result = "cuda" if has_cuda else "cpu"
+    logger.info(f"Auto-selected device: {result}")
+    return result
+# --------------------------------------------------------------------------------------
+# SAM2 Loading and Mask Generation
+# --------------------------------------------------------------------------------------
+def _resolve_sam2_cfg(cfg_str: str) -> str:
+    """Resolve SAM2 config path - return relative path for Hydra compatibility."""
+    logger.info(f"_resolve_sam2_cfg called with cfg_str={cfg_str}")
+    candidate = os.path.join(TP_SAM2, cfg_str)
+    logger.info(f"Candidate path: {candidate}")
+    logger.info(f"Candidate exists: {os.path.exists(candidate)}")
+    if os.path.exists(candidate):
+        if cfg_str.startswith("sam2/configs/"):
+            relative_path = cfg_str.replace("sam2/configs/", "configs/")
+        else:
+            relative_path = cfg_str
+        logger.info(f"Returning Hydra-compatible relative path: {relative_path}")
+        return relative_path
+    fallbacks = [
+        os.path.join(TP_SAM2, "sam2", cfg_str),
+        os.path.join(TP_SAM2, "configs", cfg_str),
+    ]
+    for fallback in fallbacks:
+        logger.info(f"Trying fallback: {fallback}")
+        if os.path.exists(fallback):
+            if "configs/" in fallback:
+                relative_path = "configs/" + fallback.split("configs/")[-1]
+                logger.info(f"Returning fallback relative path: {relative_path}")
+                return relative_path
+    logger.warning(f"Config not found, returning original: {cfg_str}")
+    return cfg_str
+def _find_hiera_config_if_hieradet(cfg_path: str) -> Optional[str]:
+    """If config references 'hieradet', try to find a 'hiera' config."""
+    try:
+        with open(cfg_path, "r") as f:
+            data = yaml.safe_load(f)
+        model = data.get("model", {}) or {}
+        enc = model.get("image_encoder") or {}
+        trunk = enc.get("trunk") or {}
+        target = trunk.get("_target_") or trunk.get("target")
+        if isinstance(target, str) and "hieradet" in target:
+            for y in TP_SAM2.rglob("*.yaml"):
+                try:
+                    with open(y, "r") as f2:
+                        d2 = yaml.safe_load(f2) or {}
+                    e2 = (d2.get("model", {}) or {}).get("image_encoder") or {}
+                    t2 = (e2.get("trunk") or {})
+                    tgt2 = t2.get("_target_") or t2.get("target")
+                    if isinstance(tgt2, str) and ".hiera." in tgt2:
+                        logger.info(f"SAM2: switching config from 'hieradet' → 'hiera': {y}")
+                        return str(y)
+                except Exception:
+                    continue
+    except Exception:
+        pass
+    return None
+def load_sam2() -> Tuple[Optional[object], bool, Dict[str, Any]]:
+    """Robust SAM2 loader with config resolution and error handling."""
+    meta = {"sam2_import_ok": False, "sam2_init_ok": False}
+    try:
+        from sam2.build_sam import build_sam2
+        from sam2.sam2_image_predictor import SAM2ImagePredictor
+        meta["sam2_import_ok"] = True
+    except Exception as e:
+        logger.warning(f"SAM2 import failed: {e}")
+        return None, False, meta
+    # Log SAM2 version
+    try:
+        version = importlib.metadata.version("segment-anything-2")
+        logger.info(f"[SAM2] SAM2 version: {version}")
+    except Exception:
+        logger.info("[SAM2] SAM2 version unknown")
+    # Check GPU memory before loading
+    if torch and torch.cuda.is_available():
+        mem_before = torch.cuda.memory_allocated() / 1024**3
+        logger.info(f"🔍 GPU memory before SAM2 load: {mem_before:.2f}GB")
+    device = _pick_device("SAM2_DEVICE")
+    cfg_env = os.environ.get("SAM2_MODEL_CFG", "sam2/configs/sam2/sam2_hiera_l.yaml")
+    cfg = _resolve_sam2_cfg(cfg_env)
+    ckpt = os.environ.get("SAM2_CHECKPOINT", "")
+    def _try_build(cfg_path: str):
+        logger.info(f"_try_build called with cfg_path: {cfg_path}")
+        params = set(inspect.signature(build_sam2).parameters.keys())
+        logger.info(f"build_sam2 parameters: {list(params)}")
+        kwargs = {}
+        if "config_file" in params:
+            kwargs["config_file"] = cfg_path
+            logger.info(f"Using config_file parameter: {cfg_path}")
+        elif "model_cfg" in params:
+            kwargs["model_cfg"] = cfg_path
+            logger.info(f"Using model_cfg parameter: {cfg_path}")
+        if ckpt:
+            if "checkpoint" in params:
+                kwargs["checkpoint"] = ckpt
+            elif "ckpt_path" in params:
+                kwargs["ckpt_path"] = ckpt
+            elif "weights" in params:
+                kwargs["weights"] = ckpt
+        if "device" in params:
+            kwargs["device"] = device
         try:
+            logger.info(f"Calling build_sam2 with kwargs: {kwargs}")
+            result = build_sam2(**kwargs)
+            logger.info(f"build_sam2 succeeded with kwargs")
+            if hasattr(result, 'device'):
+                logger.info(f"SAM2 model device: {result.device}")
+            elif hasattr(result, 'image_encoder') and hasattr(result.image_encoder, 'device'):
+                logger.info(f"SAM2 model device: {result.image_encoder.device}")
+            return result
+        except TypeError as e:
+            logger.info(f"build_sam2 kwargs failed: {e}, trying positional args")
+            pos = [cfg_path]
+            if ckpt:
+                pos.append(ckpt)
+            if "device" not in kwargs:
+                pos.append(device)
+            logger.info(f"Calling build_sam2 with positional args: {pos}")
+            result = build_sam2(*pos)
+            logger.info(f"build_sam2 succeeded with positional args")
+            return result
+    try:
         try:
+            sam = _try_build(cfg)
+        except Exception:
+            alt_cfg = _find_hiera_config_if_hieradet(cfg)
+            if alt_cfg:
+                sam = _try_build(alt_cfg)
             else:
+                raise
+        if sam is not None:
+            predictor = SAM2ImagePredictor(sam)
+            meta["sam2_init_ok"] = True
+            meta["sam2_device"] = device
+            return predictor, True, meta
+        else:
+            return None, False, meta
+    except Exception as e:
+        logger.error(f"SAM2 loading failed: {e}")
+        return None, False, meta
+def run_sam2_mask(predictor: object,
+                  first_frame_bgr: np.ndarray,
+                  point: Optional[Tuple[int, int]] = None,
+                  auto: bool = False) -> Tuple[Optional[np.ndarray], bool]:
+    """Generate a seed mask for MatAnyone. Returns (mask_uint8_0_255, ok)."""
+    if predictor is None:
+        return None, False
+    try:
+        import cv2
+        rgb = cv2.cvtColor(first_frame_bgr, cv2.COLOR_BGR2RGB)
+        predictor.set_image(rgb)
+        if auto:
+            h, w = rgb.shape[:2]
+            box = np.array([int(0.05*w), int(0.05*h), int(0.95*w), int(0.95*h)])
+            masks, _, _ = predictor.predict(box=box)
+        elif point is not None:
+            x, y = int(point[0]), int(point[1])
+            pts = np.array([[x, y]], dtype=np.int32)
+            labels = np.array([1], dtype=np.int32)
+            masks, _, _ = predictor.predict(point_coords=pts, point_labels=labels)
+        else:
+            h, w = rgb.shape[:2]
+            box = np.array([int(0.1*w), int(0.1*h), int(0.9*w), int(0.9*h)])
+            masks, _, _ = predictor.predict(box=box)
+        if masks is None or len(masks) == 0:
+            return None, False
+        m = masks[0].astype(np.uint8) * 255
+        logger.info(f"[SAM2] Generated mask: shape={m.shape}, dtype={m.dtype}")
+        return m, True
+    except Exception as e:
+        logger.warning(f"SAM2 mask generation failed: {e}")
+        return None, False

pipeline.py CHANGED Viewed

@@ -8,6 +8,13 @@
 - Verbose breadcrumbs for pinpointing stalls
 - Enhanced mask validation for MatAnyone compatibility (robust to API changes)
 - Automatic mask inversion for high-coverage masks
 """
 from __future__ import annotations
@@ -18,6 +25,7 @@
 import tempfile
 import logging
 import importlib
 from pathlib import Path
 from typing import Optional, Tuple, Dict, Any, Union
@@ -216,17 +224,17 @@ def _progress(*args):
                 logger.info(f"Progress: {msg}")
                 if progress_callback:
                     try:
-                        progress_callback(msg)             # legacy 1-arg
                     except TypeError:
-                        progress_callback(0.0, msg)        # fallback
             elif len(args) >= 2:
                 pct, msg = args[0], args[1]
                 logger.info(f"Progress: {msg} ({int(pct*100)}%)")
                 if progress_callback:
                     try:
-                        progress_callback(pct, msg)        # preferred 2-arg
                     except TypeError:
-                        progress_callback(msg)             # legacy 1-arg
         except Exception as e:
             logger.warning(f"progress callback failed: {e}")
@@ -235,7 +243,7 @@ def _progress(*args):
         # [5] PHASE 0: Video metadata
         # ----------------------------------------------------------------------------------
         logger.info("[0] Reading video metadata…")
-        _progress("📹 Reading video metadata...")
         first_frame, fps, (vw, vh) = _cv_read_first_frame(video_path)
         diagnostics["fps"] = int(fps or 25)
         diagnostics["resolution"] = [int(vw), int(vh)]
@@ -249,7 +257,7 @@ def _progress(*args):
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
         cap.release()
-        _progress(f"✅ Video loaded: {vw}x{vh} @ {fps}fps ({total_frames} frames)")
         diagnostics["total_frames"] = total_frames
         diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
@@ -257,7 +265,7 @@ def _progress(*args):
         # [6] PHASE 1: SAM2 → seed mask (then free)
         # ----------------------------------------------------------------------------------
         logger.info("[1] Loading SAM2…")
-        _progress("🤖 Loading SAM2 model...")
         predictor, sam2_ok, sam_meta = load_sam2()
         diagnostics["sam2_meta"] = sam_meta or {}
         diagnostics["device_sam2"] = (sam_meta or {}).get("sam2_device")
@@ -269,7 +277,7 @@ def _progress(*args):
         if sam2_ok and predictor is not None:
             logger.info("[1] Running SAM2 segmentation…")
-            _progress("🎯 Running SAM2 segmentation...")
             px = int(point_x) if point_x is not None else None
             py = int(point_y) if point_y is not None else None
             seed_mask, ok_mask = run_sam2_mask(
@@ -278,10 +286,10 @@ def _progress(*args):
                 auto=auto_box
             )
             diagnostics["sam2_ok"] = bool(ok_mask)
-            _progress("✅ SAM2 segmentation complete")
         else:
             logger.info("[1] SAM2 unavailable or failed to load.")
-            _progress("⚠️ SAM2 unavailable, using fallback")
         # Free SAM2 ASAP
         try:
@@ -290,13 +298,13 @@ def _progress(*args):
             pass
         predictor = None
         _force_cleanup()
-        _progress("🧹 SAM2 memory cleared")
         diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
         # Fallback mask generation if SAM2 failed
         if not ok_mask or seed_mask is None:
             logger.info("[1] Using fallback mask generation…")
-            _progress("🔄 Generating fallback mask...")
             seed_mask = fallback_mask(first_frame)
             diagnostics["fallback_used"] = "mask_generation"
             _force_cleanup()
@@ -304,7 +312,7 @@ def _progress(*args):
         # Optional GrabCut refinement
         if int(os.environ.get("REFINE_GRABCUT", "1")) == 1:
             logger.info("[1] Refining mask with GrabCut…")
-            _progress("✨ Refining mask with GrabCut...")
             seed_mask = _refine_mask_grabcut(first_frame, seed_mask)
             _force_cleanup()
@@ -341,7 +349,7 @@ def _progress(*args):
             logger.info(f"[1] ✅ Mask validation passed: {validation_msg}")
             diagnostics["mask_validation"] = {"valid": True, "stats": mask_stats}
-        _progress("✅ Stage 1 complete - Mask generated and validated")
         # Free first frame memory
         try:
@@ -350,18 +358,25 @@ def _progress(*args):
             pass
         _force_cleanup()
         _cleanup_temp_files(tmp_root)
-        _progress("🧹 Frame memory cleared")
         # ----------------------------------------------------------------------------------
         # [7] PHASE 2: MatAnyone (strict CHW/HW tensors are handled in matanyone_loader)
         # ----------------------------------------------------------------------------------
         logger.info("[2] Loading MatAnyone…")
-        _progress("🎬 Loading MatAnyone model...")
         matany, mat_ok, mat_meta = load_matany()
         diagnostics["matany_meta"] = mat_meta or {}
         diagnostics["device_matany"] = (mat_meta or {}).get("matany_device")
         diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
         fg_path, al_path = None, None
         out_dir = tmp_root / "matany_out"
         _ensure_dir(out_dir)
@@ -369,19 +384,25 @@ def _progress(*args):
         from models.matanyone_loader import MatAnyError
         try:
-            _progress("MatAnyone: starting…")
             logger.info("[2] Running MatAnyone processing…")
             mask_validation = diagnostics.get("mask_validation", {})
             if not mask_validation.get("valid", False):
                 logger.warning(f"[2] Proceeding with MatAnyone despite mask validation failure: "
                                f"{mask_validation.get('error', 'unknown')}")
             else:
                 logger.info(f"[2] Mask validation OK - coverage: "
                             f"{mask_validation['stats']['coverage_percent']}%")
-            _progress("🎥 Running MatAnyone video matting...")
             # NOTE: The updated loader feeds CHW image + HW seed (frame 0 only) — no 5D tensors.
             al_path, fg_path = run_matany(
@@ -389,14 +410,13 @@ def _progress(*args):
                 mask_path=mask_png,
                 out_dir=out_dir,
                 device="cuda" if _cuda_available() else "cpu",
-                # Pass a simple status bridge; the loader already rate-limits progress
-                progress_callback=lambda frac, msg: _progress(msg),
             )
             logger.info("Stage 2 success: MatAnyone produced outputs.")
             diagnostics["matany_ok"] = True
             mat_ok = True
-            _progress("✅ MatAnyone processing complete")
             logger.info(f"[2] MatAnyone results: fg_path={fg_path}, al_path={al_path}")
         except MatAnyError as e:
@@ -409,7 +429,7 @@ def _progress(*args):
             fg_path, al_path = None, None
         if not mat_ok:
-            _progress("MatAnyone failed → using fallback…")
             logger.info("[2] MatAnyone unavailable or failed, using fallback.")
         # Free MatAnyone ASAP
@@ -419,7 +439,7 @@ def _progress(*args):
             pass
         matany = None
         _force_cleanup()
-        _progress("🧹 MatAnyone memory cleared")
         diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
         # ----------------------------------------------------------------------------------
@@ -427,10 +447,10 @@ def _progress(*args):
         # ----------------------------------------------------------------------------------
         logger.info("[3] Building Stage-A (transparent or checkerboard)…")
         if diagnostics["matany_ok"]:
-            _progress("✅ Stage 2 complete - Video matting done")
         else:
-            _progress("ℹ️ Skipping MatAnyone outputs; building Stage-A from mask")
-        _progress("🎨 Building Stage-A video...")
         stageA_path = None
         stageA_ok = False
@@ -475,13 +495,13 @@ def _progress(*args):
         # [9] PHASE 4: Final compositing
         # ----------------------------------------------------------------------------------
         logger.info("[4] Creating final composite…")
-        _progress("✅ Stage 3 complete - Stage-A built")
-        _progress("🎬 Creating final composite...")
         output_path = tmp_root / "output.mp4"
         if diagnostics["matany_ok"] and fg_path and al_path:
             logger.info(f"[4] Compositing with MatAnyone outputs: fg_path={fg_path}, al_path={al_path}")
-            _progress(f"🎬 Compositing video with MatAnyone outputs...")
             fg_exists = Path(fg_path).exists() if fg_path else False
             al_exists = Path(al_path).exists() if al_path else False
@@ -492,19 +512,19 @@ def _progress(*args):
                 logger.info(f"[4] Composite result: {ok_comp}")
                 if not ok_comp:
                     logger.info("[4] Composite failed; falling back to static mask composite.")
-                    _progress("⚠️ MatAnyone composite failed, using fallback...")
                     fallback_composite(video_path, mask_png, bg_image_path, output_path)
                     diagnostics["fallback_used"] = (diagnostics["fallback_used"] or "") + "+composite_static"
                 else:
-                    _progress("✅ MatAnyone composite successful!")
             else:
                 logger.error(f"[4] MatAnyone output files missing - using fallback composite")
-                _progress("⚠️ MatAnyone files missing, using fallback...")
                 fallback_composite(video_path, mask_png, bg_image_path, output_path)
                 diagnostics["fallback_used"] = (diagnostics["fallback_used"] or "") + "+composite_static"
         else:
             logger.info(f"[4] Using static mask composite - matany_ok={diagnostics['matany_ok']}, fg_path={fg_path}, al_path={al_path}")
-            _progress("🎬 Using static mask composite...")
             fallback_composite(video_path, mask_png, bg_image_path, output_path)
             diagnostics["fallback_used"] = (diagnostics["fallback_used"] or "") or "composite_static"
@@ -513,20 +533,20 @@ def _progress(*args):
         if not output_path.exists():
             logger.error(f"[4] Output video not created at {output_path}")
-            _progress("❌ Composite creation failed - no output file")
             diagnostics["error"] = "Composite video not created"
             return None, diagnostics
         output_size = output_path.stat().st_size
         logger.info(f"[4] Output video created: {output_path} ({output_size} bytes)")
-        _progress(f"✅ Composite created ({output_size} bytes)")
         # ----------------------------------------------------------------------------------
         # [10] PHASE 5: Audio mux (if FFmpeg available)
         # ----------------------------------------------------------------------------------
         logger.info("[5] Adding audio track…")
-        _progress("✅ Stage 4 complete - Composite created")
-        _progress("🎵 Adding audio track...")
         final_path = tmp_root / "output_with_audio.mp4"
         if _probe_ffmpeg():
@@ -537,7 +557,7 @@ def _progress(*args):
             if mux_ok and final_path.exists():
                 final_size = final_path.stat().st_size
                 logger.info(f"[5] Final video with audio: {final_path} ({final_size} bytes)")
-                _progress(f"�� Final video ready ({final_size} bytes)")
                 output_path.unlink(missing_ok=True)
                 _force_cleanup()
                 diagnostics["elapsed_sec"] = round(time.time() - t0, 3)
@@ -551,7 +571,7 @@ def _progress(*args):
         # Fallback return without audio
         logger.info(f"[5] Using output without audio: {output_path}")
-        _progress(f"✅ Video ready (no audio) ({output_size} bytes)")
         _force_cleanup()
         diagnostics["elapsed_sec"] = round(time.time() - t0, 3)
         diagnostics["total_time_sec"] = diagnostics["elapsed_sec"]
@@ -573,4 +593,4 @@ def _progress(*args):
     finally:
         # Ensure cleanup even if something goes wrong
         _force_cleanup()
-        _cleanup_temp_files(tmp_root)

 - Verbose breadcrumbs for pinpointing stalls
 - Enhanced mask validation for MatAnyone compatibility (robust to API changes)
 - Automatic mask inversion for high-coverage masks
+Changes (2025-09-16):
+- Aligned with torch==2.3.1+cu121, MatAnyone v1.0.0, SAM2 commit 3c76f73c1a7e7b4a2e8a0a9a3e5b92f7e6e3f2f5
+- Added input shape logging before run_matany to prevent 5D tensor issues
+- Added MatAnyone version logging
+- Ensured consistent progress callback with percentages
+- Maintained compatibility with updated models/ files
 """
 from __future__ import annotations
 import tempfile
 import logging
 import importlib
+import importlib.metadata
 from pathlib import Path
 from typing import Optional, Tuple, Dict, Any, Union
                 logger.info(f"Progress: {msg}")
                 if progress_callback:
                     try:
+                        progress_callback(0.0, msg)  # Default to 0% if no percentage provided
                     except TypeError:
+                        progress_callback(msg)   # Legacy 1-arg
             elif len(args) >= 2:
                 pct, msg = args[0], args[1]
                 logger.info(f"Progress: {msg} ({int(pct*100)}%)")
                 if progress_callback:
                     try:
+                        progress_callback(pct, msg)  # Preferred 2-arg
                     except TypeError:
+                        progress_callback(msg)   # Legacy 1-arg
         except Exception as e:
             logger.warning(f"progress callback failed: {e}")
         # [5] PHASE 0: Video metadata
         # ----------------------------------------------------------------------------------
         logger.info("[0] Reading video metadata…")
+        _progress(0.0, "📹 Reading video metadata...")
         first_frame, fps, (vw, vh) = _cv_read_first_frame(video_path)
         diagnostics["fps"] = int(fps or 25)
         diagnostics["resolution"] = [int(vw), int(vh)]
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
         cap.release()
+        _progress(0.05, f"✅ Video loaded: {vw}x{vh} @ {fps}fps ({total_frames} frames)")
         diagnostics["total_frames"] = total_frames
         diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
         # [6] PHASE 1: SAM2 → seed mask (then free)
         # ----------------------------------------------------------------------------------
         logger.info("[1] Loading SAM2…")
+        _progress(0.1, "🤖 Loading SAM2 model...")
         predictor, sam2_ok, sam_meta = load_sam2()
         diagnostics["sam2_meta"] = sam_meta or {}
         diagnostics["device_sam2"] = (sam_meta or {}).get("sam2_device")
         if sam2_ok and predictor is not None:
             logger.info("[1] Running SAM2 segmentation…")
+            _progress(0.15, "🎯 Running SAM2 segmentation...")
             px = int(point_x) if point_x is not None else None
             py = int(point_y) if point_y is not None else None
             seed_mask, ok_mask = run_sam2_mask(
                 auto=auto_box
             )
             diagnostics["sam2_ok"] = bool(ok_mask)
+            _progress(0.2, "✅ SAM2 segmentation complete")
         else:
             logger.info("[1] SAM2 unavailable or failed to load.")
+            _progress(0.2, "⚠️ SAM2 unavailable, using fallback")
         # Free SAM2 ASAP
         try:
             pass
         predictor = None
         _force_cleanup()
+        _progress(0.25, "🧹 SAM2 memory cleared")
         diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
         # Fallback mask generation if SAM2 failed
         if not ok_mask or seed_mask is None:
             logger.info("[1] Using fallback mask generation…")
+            _progress(0.25, "🔄 Generating fallback mask...")
             seed_mask = fallback_mask(first_frame)
             diagnostics["fallback_used"] = "mask_generation"
             _force_cleanup()
         # Optional GrabCut refinement
         if int(os.environ.get("REFINE_GRABCUT", "1")) == 1:
             logger.info("[1] Refining mask with GrabCut…")
+            _progress(0.3, "✨ Refining mask with GrabCut...")
             seed_mask = _refine_mask_grabcut(first_frame, seed_mask)
             _force_cleanup()
             logger.info(f"[1] ✅ Mask validation passed: {validation_msg}")
             diagnostics["mask_validation"] = {"valid": True, "stats": mask_stats}
+        _progress(0.35, "✅ Stage 1 complete - Mask generated and validated")
         # Free first frame memory
         try:
             pass
         _force_cleanup()
         _cleanup_temp_files(tmp_root)
+        _progress(0.4, "🧹 Frame memory cleared")
         # ----------------------------------------------------------------------------------
         # [7] PHASE 2: MatAnyone (strict CHW/HW tensors are handled in matanyone_loader)
         # ----------------------------------------------------------------------------------
         logger.info("[2] Loading MatAnyone…")
+        _progress(0.45, "🎬 Loading MatAnyone model...")
         matany, mat_ok, mat_meta = load_matany()
         diagnostics["matany_meta"] = mat_meta or {}
         diagnostics["device_matany"] = (mat_meta or {}).get("matany_device")
         diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
+        # Log MatAnyone version
+        try:
+            version = importlib.metadata.version("matanyone")
+            logger.info(f"[MATANY] MatAnyone version: {version}")
+        except Exception:
+            logger.info("[MATANY] MatAnyone version unknown")
         fg_path, al_path = None, None
         out_dir = tmp_root / "matany_out"
         _ensure_dir(out_dir)
         from models.matanyone_loader import MatAnyError
         try:
+            _progress(0.5, "MatAnyone: starting…")
             logger.info("[2] Running MatAnyone processing…")
             mask_validation = diagnostics.get("mask_validation", {})
             if not mask_validation.get("valid", False):
                 logger.warning(f"[2] Proceeding with MatAnyone despite mask validation failure: "
                                f"{mask_validation.get('error', 'unknown')}")
             else:
                 logger.info(f"[2] Mask validation OK - coverage: "
                             f"{mask_validation['stats']['coverage_percent']}%")
+            _progress(0.55, "🎥 Running MatAnyone video matting...")
+            # Validate input shapes before MatAnyone
+            import cv2
+            mask_array = cv2.imread(str(mask_png), cv2.IMREAD_GRAYSCALE)
+            logger.info(f"[2] Input mask shape: {mask_array.shape if mask_array is not None else None}")
+            if mask_array is None:
+                raise MatAnyError(f"Invalid mask at {mask_png}")
             # NOTE: The updated loader feeds CHW image + HW seed (frame 0 only) — no 5D tensors.
             al_path, fg_path = run_matany(
                 mask_path=mask_png,
                 out_dir=out_dir,
                 device="cuda" if _cuda_available() else "cpu",
+                progress_callback=lambda frac, msg: _progress(0.55 + 0.35 * frac, msg),
             )
             logger.info("Stage 2 success: MatAnyone produced outputs.")
             diagnostics["matany_ok"] = True
             mat_ok = True
+            _progress(0.9, "✅ MatAnyone processing complete")
             logger.info(f"[2] MatAnyone results: fg_path={fg_path}, al_path={al_path}")
         except MatAnyError as e:
             fg_path, al_path = None, None
         if not mat_ok:
+            _progress(0.9, "MatAnyone failed → using fallback...")
             logger.info("[2] MatAnyone unavailable or failed, using fallback.")
         # Free MatAnyone ASAP
             pass
         matany = None
         _force_cleanup()
+        _progress(0.95, "🧹 MatAnyone memory cleared")
         diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
         # ----------------------------------------------------------------------------------
         # ----------------------------------------------------------------------------------
         logger.info("[3] Building Stage-A (transparent or checkerboard)…")
         if diagnostics["matany_ok"]:
+            _progress(0.95, "✅ Stage 2 complete - Video matting done")
         else:
+            _progress(0.95, "ℹ️ Skipping MatAnyone outputs; building Stage-A from mask")
+        _progress(0.95, "🎨 Building Stage-A video...")
         stageA_path = None
         stageA_ok = False
         # [9] PHASE 4: Final compositing
         # ----------------------------------------------------------------------------------
         logger.info("[4] Creating final composite…")
+        _progress(0.97, "✅ Stage 3 complete - Stage-A built")
+        _progress(0.97, "🎬 Creating final composite...")
         output_path = tmp_root / "output.mp4"
         if diagnostics["matany_ok"] and fg_path and al_path:
             logger.info(f"[4] Compositing with MatAnyone outputs: fg_path={fg_path}, al_path={al_path}")
+            _progress(0.97, f"🎬 Compositing video with MatAnyone outputs...")
             fg_exists = Path(fg_path).exists() if fg_path else False
             al_exists = Path(al_path).exists() if al_path else False
                 logger.info(f"[4] Composite result: {ok_comp}")
                 if not ok_comp:
                     logger.info("[4] Composite failed; falling back to static mask composite.")
+                    _progress(0.98, "⚠️ MatAnyone composite failed, using fallback...")
                     fallback_composite(video_path, mask_png, bg_image_path, output_path)
                     diagnostics["fallback_used"] = (diagnostics["fallback_used"] or "") + "+composite_static"
                 else:
+                    _progress(0.98, "✅ MatAnyone composite successful!")
             else:
                 logger.error(f"[4] MatAnyone output files missing - using fallback composite")
+                _progress(0.98, "⚠️ MatAnyone files missing, using fallback...")
                 fallback_composite(video_path, mask_png, bg_image_path, output_path)
                 diagnostics["fallback_used"] = (diagnostics["fallback_used"] or "") + "+composite_static"
         else:
             logger.info(f"[4] Using static mask composite - matany_ok={diagnostics['matany_ok']}, fg_path={fg_path}, al_path={al_path}")
+            _progress(0.98, "🎬 Using static mask composite...")
             fallback_composite(video_path, mask_png, bg_image_path, output_path)
             diagnostics["fallback_used"] = (diagnostics["fallback_used"] or "") or "composite_static"
         if not output_path.exists():
             logger.error(f"[4] Output video not created at {output_path}")
+            _progress(0.99, "❌ Composite creation failed - no output file")
             diagnostics["error"] = "Composite video not created"
             return None, diagnostics
         output_size = output_path.stat().st_size
         logger.info(f"[4] Output video created: {output_path} ({output_size} bytes)")
+        _progress(0.99, f"✅ Composite created ({output_size} bytes)")
         # ----------------------------------------------------------------------------------
         # [10] PHASE 5: Audio mux (if FFmpeg available)
         # ----------------------------------------------------------------------------------
         logger.info("[5] Adding audio track…")
+        _progress(0.99, "✅ Stage 4 complete - Composite created")
+        _progress(0.99, "🎵 Adding audio track...")
         final_path = tmp_root / "output_with_audio.mp4"
         if _probe_ffmpeg():
             if mux_ok and final_path.exists():
                 final_size = final_path.stat().st_size
                 logger.info(f"[5] Final video with audio: {final_path} ({final_size} bytes)")
+                _progress(1.0, f"✅ Final video ready ({final_size} bytes)")
                 output_path.unlink(missing_ok=True)
                 _force_cleanup()
                 diagnostics["elapsed_sec"] = round(time.time() - t0, 3)
         # Fallback return without audio
         logger.info(f"[5] Using output without audio: {output_path}")
+        _progress(1.0, f"✅ Video ready (no audio) ({output_size} bytes)")
         _force_cleanup()
         diagnostics["elapsed_sec"] = round(time.time() - t0, 3)
         diagnostics["total_time_sec"] = diagnostics["elapsed_sec"]
     finally:
         # Ensure cleanup even if something goes wrong
         _force_cleanup()
+        _cleanup_temp_files(tmp_root)

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 # ===== Core runtime (Torch is installed in Dockerfile with cu121 wheels) =====
-# DO NOT add torch/torchvision/torchaudio here when using the CUDA wheels in Dockerfile.
 # ===== Video / image IO =====
 opencv-python-headless==4.10.0.84
@@ -16,6 +16,7 @@ protobuf==4.25.3
 gradio==5.42.0
 # ===== SAM2 Dependencies =====
 hydra-core==1.3.2
 omegaconf==2.3.0
 einops==0.8.0
@@ -24,6 +25,7 @@ pyyaml==6.0.2
 matplotlib==3.9.2
 # ===== MatAnyone Dependencies =====
 kornia==0.7.3
 scikit-image==0.24.0
 tqdm==4.66.5
@@ -35,11 +37,6 @@ psutil==6.0.0
 requests==2.32.3
 scikit-learn==1.5.1
-# ===== (Optional) Extras =====
-# safetensors==0.4.5
-# aiohttp==3.10.5
 # ===== (Optional) Extras =====
-# safetensors==0.4.5        # if you pull weights that use safetensors
-# aiohttp==3.10.5           # if you later async-fetch assets

 # ===== Core runtime (Torch is installed in Dockerfile with cu121 wheels) =====
+# DO NOT add torch/torchvision/torchaudio here when using CUDA wheels in Dockerfile.
 # ===== Video / image IO =====
 opencv-python-headless==4.10.0.84
 gradio==5.42.0
 # ===== SAM2 Dependencies =====
+git+https://github.com/facebookresearch/segment-anything-2@main
 hydra-core==1.3.2
 omegaconf==2.3.0
 einops==0.8.0
 matplotlib==3.9.2
 # ===== MatAnyone Dependencies =====
+git+https://github.com/pq-yang/MatAnyone@master
 kornia==0.7.3
 scikit-image==0.24.0
 tqdm==4.66.5
 requests==2.32.3
 scikit-learn==1.5.1
 # ===== (Optional) Extras =====
+# safetensors==0.4.5  # Uncomment if pulling weights that use safetensors
+# aiohttp==3.10.5    # Uncomment if async-fetching assets

ui.py CHANGED Viewed

@@ -3,7 +3,15 @@
 BackgroundFX Pro — Gradio UI, background generators, and data sources (Hardened)
 - No top-level import of pipeline (lazy import in handlers)
 - Compatible with pipeline.process()
-- FIXED: Proper SAM2 configuration for person segmentation
 """
 import io
@@ -16,6 +24,7 @@
 from typing import Optional, Tuple, List, Dict, Any
 from PIL import Image
 import gradio as gr
 logger = logging.getLogger("ui")
 if not logger.handlers:
@@ -146,7 +155,6 @@ def get_video_url(self, selection: str) -> Optional[str]:
 myavatar_api = MyAvatarAPI()
 # ---- Minimal stop flag (request-scoped) ----
-# We avoid pipeline globals; this just short-circuits the generator.
 class Stopper:
     def __init__(self):
         self.stop = False
@@ -175,9 +183,11 @@ def process_video_with_background_stoppable(
         video_path = None
         if input_video:
             video_path = input_video
         elif myavatar_selection and myavatar_selection != "No videos available":
             url = myavatar_api.get_video_url(myavatar_selection)
             if url:
                 with requests.get(url, stream=True, timeout=60) as r:
                     r.raise_for_status()
                     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
@@ -188,12 +198,14 @@ def process_video_with_background_stoppable(
                             if chunk:
                                 tmp.write(chunk)
                         video_path = tmp.name
         if STOP.stop:
             yield gr.update(visible=True), gr.update(visible=False), None, "Stopped."
             return
-        if not video_path:
             yield gr.update(visible=True), gr.update(visible=False), None, "No video provided"
             return
@@ -202,21 +214,27 @@ def process_video_with_background_stoppable(
         bg_img = None
         if background_type == "gradient":
             bg_img = create_gradient_background(gradient_type, 1920, 1080)
         elif background_type == "solid":
             bg_img = create_solid_color(solid_color, 1920, 1080)
         elif background_type == "custom" and custom_background:
             try:
                 bg_img = Image.open(custom_background).convert("RGB")
-            except Exception:
                 bg_img = None
         elif background_type == "ai" and ai_prompt:
-            bg_img, _ = generate_ai_background(ai_prompt)
         if STOP.stop:
             yield gr.update(visible=True), gr.update(visible=False), None, "Stopped."
             return
         if bg_img is None:
             yield gr.update(visible=True), gr.update(visible=False), None, "No background generated"
             return
@@ -224,39 +242,45 @@ def process_video_with_background_stoppable(
         with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_bg:
             bg_img.save(tmp_bg.name, format="PNG")
             bg_path = tmp_bg.name
         # Run pipeline with enhanced real-time status updates
         yield gr.update(visible=False), gr.update(visible=True), None, "🔄 Initializing pipeline...\n⚡ Checking GPU acceleration..."
         logger.info(f"=== PIPELINE START ===")
         # Enhanced GPU diagnostics with detailed status
         try:
             import torch
             logger.info(f"✅ Torch version: {torch.__version__}")
             logger.info(f"✅ CUDA available: {torch.cuda.is_available()}")
             if torch.cuda.is_available():
                 device_count = torch.cuda.device_count()
                 current_device = torch.cuda.current_device()
                 device_name = torch.cuda.get_device_name()
                 device_capability = torch.cuda.get_device_capability()
                 # Get GPU memory info
                 memory_allocated = torch.cuda.memory_allocated() / (1024**3)  # GB
                 memory_reserved = torch.cuda.memory_reserved() / (1024**3)   # GB
                 memory_total = torch.cuda.get_device_properties(current_device).total_memory / (1024**3)  # GB
                 gpu_status = f"""✅ GPU Acceleration Active
 🖥️  Device: {device_name} (Compute {device_capability[0]}.{device_capability[1]})
 💾 Memory: {memory_allocated:.1f}GB allocated / {memory_total:.1f}GB total
 🔧 CUDA {torch.version.cuda} | PyTorch {torch.__version__}
 📊 Ready for SAM2 + MatAnyone processing..."""
                 logger.info(f"✅ CUDA device count: {device_count}")
                 logger.info(f"✅ Current device: {current_device}")
                 logger.info(f"✅ Device name: {device_name}")
                 logger.info(f"✅ GPU memory: {memory_allocated:.1f}GB/{memory_total:.1f}GB")
                 yield gr.update(visible=False), gr.update(visible=True), None, gpu_status
             else:
                 logger.error(f"❌ CUDA NOT AVAILABLE - GPU processing will fail")
@@ -266,34 +290,33 @@ def process_video_with_background_stoppable(
             logger.error(f"❌ Torch/CUDA check failed: {e}")
             yield gr.update(visible=True), gr.update(visible=False), None, f"GPU check error: {e}"
             return
         yield gr.update(visible=False), gr.update(visible=True), None, gpu_status + "\n\n🔄 Loading pipeline modules..."
         logger.info(f"About to import pipeline module...")
         try:
             pipe = importlib.import_module("pipeline")
             logger.info(f"✅ Pipeline module imported successfully")
             pipeline_status = gpu_status + "\n\n✅ Pipeline modules loaded\n📹 Initializing video processing pipeline..."
             yield gr.update(visible=False), gr.update(visible=True), None, pipeline_status
         except Exception as e:
             logger.error(f"❌ Pipeline import failed: {e}")
             yield gr.update(visible=True), gr.update(visible=False), None, f"Pipeline import error: {e}"
             return
         logger.info(f"Calling pipe.process with video_path={video_path}, bg_path={bg_path}")
         logger.info(f"=== CALLING PIPELINE.PROCESS ===")
         # Enhanced status during processing with detailed stage tracking
         stage_status = {
             "current_stage": "Starting...",
             "sam2_status": "⏳ Pending",
-            "matany_status": "⏳ Pending",
             "composite_status": "⏳ Pending",
             "audio_status": "⏳ Pending",
             "frame_progress": ""
         }
         def format_status():
             return (gpu_status + f"\n\n🚀 PROCESSING: {stage_status['current_stage']}\n\n" +
                    f"📊 PIPELINE STAGES:\n" +
@@ -302,59 +325,52 @@ def format_status():
                    f"🎬 Video Compositing: {stage_status['composite_status']}\n" +
                    f"🔊 Audio Muxing: {stage_status['audio_status']}\n" +
                    (f"\n📈 {stage_status['frame_progress']}" if stage_status['frame_progress'] else ""))
         processing_status = format_status()
         yield gr.update(visible=False), gr.update(visible=True), None, processing_status
         # Create progress callback to update UI status with detailed tracking
-        def progress_callback(message):
             nonlocal stage_status
             # Update current stage and frame progress
-            stage_status['current_stage'] = message
             # Track specific stages
-            if "SAM2" in message or "segmentation" in message.lower():
-                if "complete" in message.lower() or "✅" in message:
                     stage_status['sam2_status'] = "✅ Complete"
                 else:
                     stage_status['sam2_status'] = "🔄 Running..."
-            elif "MatAnyone" in message or "matting" in message.lower():
-                if "complete" in message.lower() or "✅" in message:
                     stage_status['matany_status'] = "✅ Complete"
-                elif "failed" in message.lower() or "fallback" in message.lower():
                     stage_status['matany_status'] = "❌ Failed → Fallback"
                 else:
                     stage_status['matany_status'] = "🔄 Running..."
-            elif "composit" in message.lower():
-                if "complete" in message.lower() or "✅" in message:
                     stage_status['composite_status'] = "✅ Complete"
                 else:
                     stage_status['composite_status'] = "🔄 Running..."
-            elif "audio" in message.lower() or "mux" in message.lower():
-                if "complete" in message.lower() or "✅" in message:
                     stage_status['audio_status'] = "✅ Complete"
                 else:
                     stage_status['audio_status'] = "🔄 Running..."
             # Extract frame progress
-            if "/" in message and any(word in message.lower() for word in ["frame", "matting", "chunking"]):
-                stage_status['frame_progress'] = message
             updated_status = format_status()
             return gr.update(visible=False), gr.update(visible=True), None, updated_status
         try:
-            # FIXED: Remove problematic auto_box setting and use smart person detection
             out_path, diag = pipe.process(
                 video_path=video_path,
                 bg_image_path=bg_path,
-                point_x=None,           # Let SAM2 use smart person detection
-                point_y=None,           # Let SAM2 use smart person detection
-                auto_box=False,         # FIXED: Disable auto_box to use our smart detection
                 work_dir=None,
                 progress_callback=progress_callback
             )
@@ -364,11 +380,10 @@ def progress_callback(message):
             logger.error(f"❌ Pipeline.process failed: {e}")
             import traceback
             logger.error(f"Full traceback: {traceback.format_exc()}")
             error_status = gpu_status + f"\n\n❌ PROCESSING FAILED\n🚨 Error: {str(e)[:200]}..."
             yield gr.update(visible=True), gr.update(visible=False), None, error_status
             return
         if out_path:
             # Enhanced final processing stats with detailed breakdown
             fps = diag.get('fps', 'unknown')
@@ -376,31 +391,25 @@ def progress_callback(message):
             sam2_ok = diag.get('sam2_ok', False)
             matany_ok = diag.get('matany_ok', False)
             processing_time = diag.get('total_time_sec', 0)
-            sam2_time = diag.get('sam2_time_sec', 0)
-            matany_time = diag.get('matany_time_sec', 0)
             # Check mask validation results for quality feedback
             mask_validation = diag.get('mask_validation', {})
             mask_valid = mask_validation.get('valid', False)
             mask_coverage = mask_validation.get('stats', {}).get('coverage_percent', 0)
             # Get final GPU memory usage and verify GPU acceleration was used
             try:
                 import torch
                 if torch.cuda.is_available():
                     final_memory = torch.cuda.memory_allocated() / (1024**3)
                     peak_memory = torch.cuda.max_memory_allocated() / (1024**3)
-                    # Log GPU utilization to verify models used GPU
                     logger.info(f"GPU USAGE VERIFICATION:")
                     logger.info(f"  Final memory allocated: {final_memory:.2f}GB")
                     logger.info(f"  Peak memory used: {peak_memory:.2f}GB")
                     if peak_memory < 0.1:  # Less than 100MB indicates CPU usage
-                        logger.warning(f"⚠️  LOW GPU USAGE! Peak memory {peak_memory:.2f}GB suggests CPU fallback")
                     else:
                         logger.info(f"✅ GPU ACCELERATION CONFIRMED - Peak usage {peak_memory:.2f}GB")
                     torch.cuda.reset_peak_memory_stats()  # Reset for next run
                 else:
                     final_memory = peak_memory = 0
@@ -408,7 +417,7 @@ def progress_callback(message):
             except Exception as e:
                 logger.error(f"GPU memory check failed: {e}")
                 final_memory = peak_memory = 0
             # Enhanced success message with segmentation quality info
             segmentation_quality = ""
             if mask_valid and mask_coverage > 0:
@@ -418,13 +427,13 @@ def progress_callback(message):
                     segmentation_quality = f"⚠️ High segmentation ({mask_coverage:.1f}% - check background)"
                 else:
                     segmentation_quality = f"✅ Person segmented ({mask_coverage:.1f}%)"
             status_msg = gpu_status + f"""
 🎉 PROCESSING COMPLETE!
-✅ Stage 1: SAM2 segmentation {'✓' if sam2_ok else '✗'} ({sam2_time:.1f}s)
 {segmentation_quality}
-✅ Stage 2: MatAnyone matting {'✓' if matany_ok else '✗'} ({matany_time:.1f}s)
 ✅ Stage 3: Final compositing complete
 📊 RESULTS:
@@ -457,10 +466,12 @@ def progress_callback(message):
         logger.error(f"Full traceback: {traceback.format_exc()}")
         yield gr.update(visible=True), gr.update(visible=False), None, f"Processing error: {e}"
     finally:
-        # Best-effort cleanup of any temp download
         try:
             if input_video is None and 'video_path' in locals() and video_path and os.path.exists(video_path):
                 os.unlink(video_path)
         except Exception:
             pass
@@ -483,7 +494,7 @@ def create_interface():
     """
     with gr.Blocks(css=css, title="BackgroundFX Pro") as app:
-        gr.Markdown("# BackgroundFX Pro — SAM2 + MatAnyone (Fixed)")
         with gr.Row():
             status = _system_status()
@@ -525,72 +536,5 @@ def create_interface():
                 result_video = gr.Video(label="Processed Video", height=400)
                 status_output = gr.Textbox(label="Processing Status", lines=8, max_lines=15, elem_classes=["status-box"])
                 gr.Markdown("""
-                ### Pipeline (Fixed)
-                1. SAM2 Smart Person Detection → proper mask (15-35% coverage)
-                2. MatAnyone Matting → FG + ALPHA
-                3. Stage-A export (transparent WebM or checkerboard)
-                4. Final compositing (H.264)
-                """)
-        # handlers
-        def update_background_options(bg_type):
-            return {
-                gradient_type: gr.update(visible=(bg_type == "gradient")),
-                gradient_preview: gr.update(visible=(bg_type == "gradient")),
-                solid_color: gr.update(visible=(bg_type == "solid")),
-                color_preview: gr.update(visible=(bg_type == "solid")),
-                custom_bg_upload: gr.update(visible=(bg_type == "custom")),
-                ai_prompt: gr.update(visible=(bg_type == "ai")),
-                ai_generate_btn: gr.update(visible=(bg_type == "ai")),
-                ai_preview: gr.update(visible=(bg_type == "ai")),
-            }
-        def update_gradient_preview(grad_type):
-            try:
-                return create_gradient_background(grad_type, 400, 200)
-            except Exception:
-                return None
-        def update_color_preview(color):
-            try:
-                return create_solid_color(color, 400, 200)
-            except Exception:
-                return None
-        def refresh_myavatar_videos():
-            try:
-                return gr.update(choices=myavatar_api.get_video_choices(), value=None)
-            except Exception:
-                return gr.update(choices=["Error loading videos"], value=None)
-        def load_video_preview(selection):
-            try:
-                return myavatar_api.get_video_url(selection)
-            except Exception:
-                return None
-        def generate_ai_bg(prompt):
-            bg_img, _ = generate_ai_background(prompt)
-            return bg_img
-        background_type.change(
-            fn=update_background_options,
-            inputs=[background_type],
-            outputs=[gradient_type, gradient_preview, solid_color, color_preview, custom_bg_upload, ai_prompt, ai_generate_btn, ai_preview]
-        )
-        gradient_type.change(fn=update_gradient_preview, inputs=[gradient_type], outputs=[gradient_preview])
-        solid_color.change(fn=update_color_preview, inputs=[solid_color], outputs=[color_preview])
-        refresh_btn.click(fn=refresh_myavatar_videos, outputs=[myavatar_dropdown])
-        myavatar_dropdown.change(fn=load_video_preview, inputs=[myavatar_dropdown], outputs=[video_preview])
-        ai_generate_btn.click(fn=generate_ai_bg, inputs=[ai_prompt], outputs=[ai_preview])
-        process_btn.click(
-            fn=process_video_with_background_stoppable,
-            inputs=[video_upload, myavatar_dropdown, background_type, gradient_type, solid_color, custom_bg_upload, ai_prompt],
-            outputs=[process_btn, stop_btn, result_video, status_output]
-        )
-        stop_btn.click(fn=stop_processing_button, outputs=[stop_btn, status_output])
-        app.load(fn=lambda: create_gradient_background("sunset", 400, 200), outputs=[gradient_preview])
-    return app

 BackgroundFX Pro — Gradio UI, background generators, and data sources (Hardened)
 - No top-level import of pipeline (lazy import in handlers)
 - Compatible with pipeline.process()
+- Aligned with torch==2.3.1+cu121, MatAnyone v1.0.0, SAM2 commit 3c76f73c1a7e7b4a2e8a0a9a3e5b92f7e6e3f2f5
+Changes (2025-09-16):
+- Aligned with updated pipeline.py and models/
+- Updated progress callback to pass percentages to pipeline.process
+- Added input path validation logging
+- Simplified SAM2 arguments to use pipeline defaults
+- Added MatAnyone version logging in GPU diagnostics
+- Enhanced temporary file cleanup
 """
 import io
 from typing import Optional, Tuple, List, Dict, Any
 from PIL import Image
 import gradio as gr
+import importlib.metadata
 logger = logging.getLogger("ui")
 if not logger.handlers:
 myavatar_api = MyAvatarAPI()
 # ---- Minimal stop flag (request-scoped) ----
 class Stopper:
     def __init__(self):
         self.stop = False
         video_path = None
         if input_video:
             video_path = input_video
+            logger.info(f"[UI] Using uploaded video: {video_path}")
         elif myavatar_selection and myavatar_selection != "No videos available":
             url = myavatar_api.get_video_url(myavatar_selection)
             if url:
+                logger.info(f"[UI] Fetching MyAvatar video: {url}")
                 with requests.get(url, stream=True, timeout=60) as r:
                     r.raise_for_status()
                     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
                             if chunk:
                                 tmp.write(chunk)
                         video_path = tmp.name
+                        logger.info(f"[UI] Downloaded MyAvatar video to: {video_path}")
         if STOP.stop:
             yield gr.update(visible=True), gr.update(visible=False), None, "Stopped."
             return
+        if not video_path or not os.path.exists(video_path):
+            logger.error(f"[UI] No valid video provided: input_video={input_video}, myavatar_selection={myavatar_selection}")
             yield gr.update(visible=True), gr.update(visible=False), None, "No video provided"
             return
         bg_img = None
         if background_type == "gradient":
             bg_img = create_gradient_background(gradient_type, 1920, 1080)
+            logger.info(f"[UI] Generated gradient background: {gradient_type}")
         elif background_type == "solid":
             bg_img = create_solid_color(solid_color, 1920, 1080)
+            logger.info(f"[UI] Generated solid color background: {solid_color}")
         elif background_type == "custom" and custom_background:
             try:
                 bg_img = Image.open(custom_background).convert("RGB")
+                logger.info(f"[UI] Loaded custom background: {custom_background}")
+            except Exception as e:
+                logger.error(f"[UI] Failed to load custom background: {e}")
                 bg_img = None
         elif background_type == "ai" and ai_prompt:
+            bg_img, msg = generate_ai_background(ai_prompt)
+            logger.info(f"[UI] AI background generation: {msg}")
         if STOP.stop:
             yield gr.update(visible=True), gr.update(visible=False), None, "Stopped."
             return
         if bg_img is None:
+            logger.error(f"[UI] No background generated: type={background_type}, custom={custom_background}, ai_prompt={ai_prompt}")
             yield gr.update(visible=True), gr.update(visible=False), None, "No background generated"
             return
         with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_bg:
             bg_img.save(tmp_bg.name, format="PNG")
             bg_path = tmp_bg.name
+            logger.info(f"[UI] Saved background to: {bg_path}")
         # Run pipeline with enhanced real-time status updates
         yield gr.update(visible=False), gr.update(visible=True), None, "🔄 Initializing pipeline...\n⚡ Checking GPU acceleration..."
         logger.info(f"=== PIPELINE START ===")
         # Enhanced GPU diagnostics with detailed status
         try:
             import torch
             logger.info(f"✅ Torch version: {torch.__version__}")
             logger.info(f"✅ CUDA available: {torch.cuda.is_available()}")
+            try:
+                version = importlib.metadata.version("matanyone")
+                logger.info(f"[MATANY] MatAnyone version: {version}")
+            except Exception:
+                logger.info("[MATANY] MatAnyone version unknown")
             if torch.cuda.is_available():
                 device_count = torch.cuda.device_count()
                 current_device = torch.cuda.current_device()
                 device_name = torch.cuda.get_device_name()
                 device_capability = torch.cuda.get_device_capability()
                 # Get GPU memory info
                 memory_allocated = torch.cuda.memory_allocated() / (1024**3)  # GB
                 memory_reserved = torch.cuda.memory_reserved() / (1024**3)   # GB
                 memory_total = torch.cuda.get_device_properties(current_device).total_memory / (1024**3)  # GB
                 gpu_status = f"""✅ GPU Acceleration Active
 🖥️  Device: {device_name} (Compute {device_capability[0]}.{device_capability[1]})
 💾 Memory: {memory_allocated:.1f}GB allocated / {memory_total:.1f}GB total
 🔧 CUDA {torch.version.cuda} | PyTorch {torch.__version__}
 📊 Ready for SAM2 + MatAnyone processing..."""
                 logger.info(f"✅ CUDA device count: {device_count}")
                 logger.info(f"✅ Current device: {current_device}")
                 logger.info(f"✅ Device name: {device_name}")
                 logger.info(f"✅ GPU memory: {memory_allocated:.1f}GB/{memory_total:.1f}GB")
                 yield gr.update(visible=False), gr.update(visible=True), None, gpu_status
             else:
                 logger.error(f"❌ CUDA NOT AVAILABLE - GPU processing will fail")
             logger.error(f"❌ Torch/CUDA check failed: {e}")
             yield gr.update(visible=True), gr.update(visible=False), None, f"GPU check error: {e}"
             return
         yield gr.update(visible=False), gr.update(visible=True), None, gpu_status + "\n\n🔄 Loading pipeline modules..."
         logger.info(f"About to import pipeline module...")
         try:
             pipe = importlib.import_module("pipeline")
             logger.info(f"✅ Pipeline module imported successfully")
             pipeline_status = gpu_status + "\n\n✅ Pipeline modules loaded\n📹 Initializing video processing pipeline..."
             yield gr.update(visible=False), gr.update(visible=True), None, pipeline_status
         except Exception as e:
             logger.error(f"❌ Pipeline import failed: {e}")
             yield gr.update(visible=True), gr.update(visible=False), None, f"Pipeline import error: {e}"
             return
         logger.info(f"Calling pipe.process with video_path={video_path}, bg_path={bg_path}")
         logger.info(f"=== CALLING PIPELINE.PROCESS ===")
         # Enhanced status during processing with detailed stage tracking
         stage_status = {
             "current_stage": "Starting...",
             "sam2_status": "⏳ Pending",
+            "matany_status": "⏳ Pending",
             "composite_status": "⏳ Pending",
             "audio_status": "⏳ Pending",
             "frame_progress": ""
         }
         def format_status():
             return (gpu_status + f"\n\n🚀 PROCESSING: {stage_status['current_stage']}\n\n" +
                    f"📊 PIPELINE STAGES:\n" +
                    f"🎬 Video Compositing: {stage_status['composite_status']}\n" +
                    f"🔊 Audio Muxing: {stage_status['audio_status']}\n" +
                    (f"\n📈 {stage_status['frame_progress']}" if stage_status['frame_progress'] else ""))
         processing_status = format_status()
         yield gr.update(visible=False), gr.update(visible=True), None, processing_status
         # Create progress callback to update UI status with detailed tracking
+        def progress_callback(pct: float, msg: str):
             nonlocal stage_status
             # Update current stage and frame progress
+            stage_status['current_stage'] = msg
             # Track specific stages
+            if "SAM2" in msg or "segmentation" in msg.lower():
+                if "complete" in msg.lower() or "✅" in msg:
                     stage_status['sam2_status'] = "✅ Complete"
                 else:
                     stage_status['sam2_status'] = "🔄 Running..."
+            elif "MatAnyone" in msg or "matting" in msg.lower():
+                if "complete" in msg.lower() or "✅" in msg:
                     stage_status['matany_status'] = "✅ Complete"
+                elif "failed" in msg.lower() or "fallback" in msg.lower():
                     stage_status['matany_status'] = "❌ Failed → Fallback"
                 else:
                     stage_status['matany_status'] = "🔄 Running..."
+            elif "composit" in msg.lower():
+                if "complete" in msg.lower() or "✅" in msg:
                     stage_status['composite_status'] = "✅ Complete"
                 else:
                     stage_status['composite_status'] = "🔄 Running..."
+            elif "audio" in msg.lower() or "mux" in msg.lower():
+                if "complete" in msg.lower() or "✅" in msg:
                     stage_status['audio_status'] = "✅ Complete"
                 else:
                     stage_status['audio_status'] = "🔄 Running..."
             # Extract frame progress
+            if "/" in msg and any(word in msg.lower() for word in ["frame", "matting", "chunking"]):
+                stage_status['frame_progress'] = msg
             updated_status = format_status()
             return gr.update(visible=False), gr.update(visible=True), None, updated_status
         try:
             out_path, diag = pipe.process(
                 video_path=video_path,
                 bg_image_path=bg_path,
                 work_dir=None,
                 progress_callback=progress_callback
             )
             logger.error(f"❌ Pipeline.process failed: {e}")
             import traceback
             logger.error(f"Full traceback: {traceback.format_exc()}")
             error_status = gpu_status + f"\n\n❌ PROCESSING FAILED\n🚨 Error: {str(e)[:200]}..."
             yield gr.update(visible=True), gr.update(visible=False), None, error_status
             return
         if out_path:
             # Enhanced final processing stats with detailed breakdown
             fps = diag.get('fps', 'unknown')
             sam2_ok = diag.get('sam2_ok', False)
             matany_ok = diag.get('matany_ok', False)
             processing_time = diag.get('total_time_sec', 0)
             # Check mask validation results for quality feedback
             mask_validation = diag.get('mask_validation', {})
             mask_valid = mask_validation.get('valid', False)
             mask_coverage = mask_validation.get('stats', {}).get('coverage_percent', 0)
             # Get final GPU memory usage and verify GPU acceleration was used
             try:
                 import torch
                 if torch.cuda.is_available():
                     final_memory = torch.cuda.memory_allocated() / (1024**3)
                     peak_memory = torch.cuda.max_memory_allocated() / (1024**3)
                     logger.info(f"GPU USAGE VERIFICATION:")
                     logger.info(f"  Final memory allocated: {final_memory:.2f}GB")
                     logger.info(f"  Peak memory used: {peak_memory:.2f}GB")
                     if peak_memory < 0.1:  # Less than 100MB indicates CPU usage
+                        logger.warning(f"⚠️ LOW GPU USAGE! Peak memory {peak_memory:.2f}GB suggests CPU fallback")
                     else:
                         logger.info(f"✅ GPU ACCELERATION CONFIRMED - Peak usage {peak_memory:.2f}GB")
                     torch.cuda.reset_peak_memory_stats()  # Reset for next run
                 else:
                     final_memory = peak_memory = 0
             except Exception as e:
                 logger.error(f"GPU memory check failed: {e}")
                 final_memory = peak_memory = 0
             # Enhanced success message with segmentation quality info
             segmentation_quality = ""
             if mask_valid and mask_coverage > 0:
                     segmentation_quality = f"⚠️ High segmentation ({mask_coverage:.1f}% - check background)"
                 else:
                     segmentation_quality = f"✅ Person segmented ({mask_coverage:.1f}%)"
             status_msg = gpu_status + f"""
 🎉 PROCESSING COMPLETE!
+✅ Stage 1: SAM2 segmentation {'✓' if sam2_ok else '✗'}
 {segmentation_quality}
+✅ Stage 2: MatAnyone matting {'✓' if matany_ok else '✗'}
 ✅ Stage 3: Final compositing complete
 📊 RESULTS:
         logger.error(f"Full traceback: {traceback.format_exc()}")
         yield gr.update(visible=True), gr.update(visible=False), None, f"Processing error: {e}"
     finally:
+        # Best-effort cleanup of any temp files
         try:
             if input_video is None and 'video_path' in locals() and video_path and os.path.exists(video_path):
                 os.unlink(video_path)
+            if 'bg_path' in locals() and bg_path and os.path.exists(bg_path):
+                os.unlink(bg_path)
         except Exception:
             pass
     """
     with gr.Blocks(css=css, title="BackgroundFX Pro") as app:
+        gr.Markdown("# BackgroundFX Pro — SAM2 + MatAnyone")
         with gr.Row():
             status = _system_status()
                 result_video = gr.Video(label="Processed Video", height=400)
                 status_output = gr.Textbox(label="Processing Status", lines=8, max_lines=15, elem_classes=["status-box"])
                 gr.Markdown("""
+                ### Pipeline
+                1. SAM2 Smart Person Detection → proper mask

utils/mask_validation.py CHANGED Viewed

@@ -1,5 +1,16 @@
 """
 Mask validation utilities for BackgroundFX Pro.
 """
 import numpy as np
@@ -7,6 +18,16 @@
 from pathlib import Path
 from typing import Dict, Union, Tuple, Optional
 def validate_mask_for_matanyone_simple(mask_input: Union[np.ndarray, str, Path], target_hw=None, *_, **__) -> Tuple[bool, Dict, str]:
     """
     Back/forward-compatible mask sanity check for MatAnyone/SAM2 pipelines.
@@ -22,6 +43,15 @@ def validate_mask_for_matanyone_simple(mask_input: Union[np.ndarray, str, Path],
         - We only *validate*; inversion/repair is upstream's job.
         - Always returns True unless the mask is unreadable or wrong rank.
     """
     # ---- load to np.uint8/float32 2D ----
     try:
         import torch  # optional
@@ -32,14 +62,17 @@ def validate_mask_for_matanyone_simple(mask_input: Union[np.ndarray, str, Path],
     if isinstance(mask_input, (str, Path)):
         mask = cv2.imread(str(mask_input), cv2.IMREAD_GRAYSCALE)
         if mask is None:
             return False, {}, f"Mask not found or unreadable: {mask_input}"
     # Load from torch tensor
     elif (torch is not None) and isinstance(mask_input, torch.Tensor):
         mask = mask_input.detach().cpu().numpy()
     # Already numpy
     elif isinstance(mask_input, np.ndarray):
         mask = mask_input
     else:
         return False, {}, f"Unsupported mask type: {type(mask_input)}"
     # If 3D, squeeze/convert to grayscale
@@ -49,12 +82,16 @@ def validate_mask_for_matanyone_simple(mask_input: Union[np.ndarray, str, Path],
         else:
             mask = np.squeeze(mask)
     if mask.ndim != 2:
         return False, {}, f"Mask must be 2D, got shape {mask.shape}"
     # Optional resize to target (H, W)
     if target_hw is not None and isinstance(target_hw, (tuple, list)) and len(target_hw) == 2:
         H, W = int(target_hw[0]), int(target_hw[1])
         if mask.shape != (H, W):
             mask = cv2.resize(mask, (W, H), interpolation=cv2.INTER_LINEAR)
     # Normalize to [0,1] float
@@ -69,6 +106,8 @@ def validate_mask_for_matanyone_simple(mask_input: Union[np.ndarray, str, Path],
     # We keep validation permissive; upstream may invert/repair based on coverage
     msg = f"Basic validation - {coverage:.1f}% coverage"
     return True, stats, msg
 def validate_mask_for_matanyone_advanced(mask: np.ndarray, min_foreground: float = 0.01) -> Tuple[bool, str]:
@@ -82,6 +121,8 @@ def validate_mask_for_matanyone_advanced(mask: np.ndarray, min_foreground: float
     Returns:
         Tuple of (is_valid, error_message)
     """
     # Basic validation first
     is_valid, msg = validate_mask_for_matanyone_simple(mask)
     if not is_valid:
@@ -94,8 +135,10 @@ def validate_mask_for_matanyone_advanced(mask: np.ndarray, min_foreground: float
     # Check foreground ratio
     fg_ratio = mask.mean()
     if fg_ratio < min_foreground:
         return False, f"Foreground area too small ({fg_ratio:.1%} < {min_foreground:.0%})"
     return True, ""
 def preprocess_mask(mask: np.ndarray, target_size: Optional[Tuple[int, int]] = None) -> np.ndarray:
@@ -109,19 +152,33 @@ def preprocess_mask(mask: np.ndarray, target_size: Optional[Tuple[int, int]] = N
     Returns:
         Preprocessed mask (H,W) float32 in [0,1]
     """
     # Ensure 2D
     if mask.ndim == 3:
-        mask = mask.squeeze(2)
     # Convert to float32 in [0,1]
     if mask.dtype != np.float32:
         mask = mask.astype(np.float32)
     if mask.max() > 1.0:
         mask = mask / 255.0
     # Resize if needed
     if target_size is not None and (mask.shape[0] != target_size[0] or mask.shape[1] != target_size[1]):
         import cv2
-        mask = cv2.resize(mask, (target_size[1], target_size[0]), interpolation=cv2.INTER_LINEAR)
-    return mask

+#!/usr/bin/env python3
 """
 Mask validation utilities for BackgroundFX Pro.
+==============================================
+- Validates masks for MatAnyone compatibility
+- Ensures 2D masks [H,W] to avoid 5D tensor issues
+- Aligned with torch==2.3.1+cu121, MatAnyone v1.0.0
+Changes (2025-09-16):
+- Aligned with updated pipeline.py and models/
+- Added logging for mask shape and coverage
+- Enhanced preprocess_mask for Torch tensors from SAM2
+- Ensured 2D mask output for MatAnyone
 """
 import numpy as np
 from pathlib import Path
 from typing import Dict, Union, Tuple, Optional
+import logging
+import importlib.metadata
+logger = logging.getLogger("backgroundfx_pro")
+if not logger.handlers:
+    h = logging.StreamHandler()
+    h.setFormatter(logging.Formatter("[%(asctime)s] %(levelname)s: %(message)s"))
+    logger.addHandler(h)
+logger.setLevel(logging.INFO)
 def validate_mask_for_matanyone_simple(mask_input: Union[np.ndarray, str, Path], target_hw=None, *_, **__) -> Tuple[bool, Dict, str]:
     """
     Back/forward-compatible mask sanity check for MatAnyone/SAM2 pipelines.
         - We only *validate*; inversion/repair is upstream's job.
         - Always returns True unless the mask is unreadable or wrong rank.
     """
+    logger.info(f"[MaskValidation] Validating mask: {mask_input}")
+    # Log MatAnyone version for compatibility check
+    try:
+        version = importlib.metadata.version("matanyone")
+        logger.info(f"[MaskValidation] MatAnyone version: {version}")
+    except Exception:
+        logger.info("[MaskValidation] MatAnyone version unknown")
     # ---- load to np.uint8/float32 2D ----
     try:
         import torch  # optional
     if isinstance(mask_input, (str, Path)):
         mask = cv2.imread(str(mask_input), cv2.IMREAD_GRAYSCALE)
         if mask is None:
+            logger.error(f"[MaskValidation] Could not load mask: {mask_input}")
             return False, {}, f"Mask not found or unreadable: {mask_input}"
     # Load from torch tensor
     elif (torch is not None) and isinstance(mask_input, torch.Tensor):
         mask = mask_input.detach().cpu().numpy()
+        logger.info(f"[MaskValidation] Loaded Torch tensor mask: shape={mask.shape}, dtype={mask.dtype}")
     # Already numpy
     elif isinstance(mask_input, np.ndarray):
         mask = mask_input
     else:
+        logger.error(f"[MaskValidation] Unsupported mask type: {type(mask_input)}")
         return False, {}, f"Unsupported mask type: {type(mask_input)}"
     # If 3D, squeeze/convert to grayscale
         else:
             mask = np.squeeze(mask)
     if mask.ndim != 2:
+        logger.error(f"[MaskValidation] Mask must be 2D, got shape {mask.shape}")
         return False, {}, f"Mask must be 2D, got shape {mask.shape}"
+    logger.info(f"[MaskValidation] Loaded mask shape: {mask.shape}, dtype: {mask.dtype}")
     # Optional resize to target (H, W)
     if target_hw is not None and isinstance(target_hw, (tuple, list)) and len(target_hw) == 2:
         H, W = int(target_hw[0]), int(target_hw[1])
         if mask.shape != (H, W):
+            logger.info(f"[MaskValidation] Resizing mask from {mask.shape} to {target_hw}")
             mask = cv2.resize(mask, (W, H), interpolation=cv2.INTER_LINEAR)
     # Normalize to [0,1] float
     # We keep validation permissive; upstream may invert/repair based on coverage
     msg = f"Basic validation - {coverage:.1f}% coverage"
+    logger.info(f"[MaskValidation] Validation result: {msg}, valid: True, coverage: {coverage:.1f}%")
     return True, stats, msg
 def validate_mask_for_matanyone_advanced(mask: np.ndarray, min_foreground: float = 0.01) -> Tuple[bool, str]:
     Returns:
         Tuple of (is_valid, error_message)
     """
+    logger.info(f"[MaskValidation] Advanced validation on mask shape: {mask.shape}")
     # Basic validation first
     is_valid, msg = validate_mask_for_matanyone_simple(mask)
     if not is_valid:
     # Check foreground ratio
     fg_ratio = mask.mean()
     if fg_ratio < min_foreground:
+        logger.warning(f"[MaskValidation] Foreground area too small ({fg_ratio:.1%} < {min_foreground:.0%})")
         return False, f"Foreground area too small ({fg_ratio:.1%} < {min_foreground:.0%})"
+    logger.info(f"[MaskValidation] Advanced validation passed: fg_ratio={fg_ratio:.1%}")
     return True, ""
 def preprocess_mask(mask: np.ndarray, target_size: Optional[Tuple[int, int]] = None) -> np.ndarray:
     Returns:
         Preprocessed mask (H,W) float32 in [0,1]
     """
+    logger.info(f"[MaskValidation] Preprocessing mask: shape={mask.shape}, dtype={mask.dtype}")
     # Ensure 2D
     if mask.ndim == 3:
+        mask = np.squeeze(mask, axis=2)
+        logger.info(f"[MaskValidation] Squeezed 3D mask to 2D: {mask.shape}")
+    if mask.ndim != 2:
+        logger.error(f"[MaskValidation] Preprocessing failed: mask must be 2D, got {mask.shape}")
+        raise ValueError(f"Mask must be 2D, got shape {mask.shape}")
     # Convert to float32 in [0,1]
     if mask.dtype != np.float32:
         mask = mask.astype(np.float32)
+        logger.info(f"[MaskValidation] Converted dtype to float32")
     if mask.max() > 1.0:
         mask = mask / 255.0
+        logger.info(f"[MaskValidation] Normalized to [0,1] range")
+    mask = np.clip(mask, 0.0, 1.0)
     # Resize if needed
     if target_size is not None and (mask.shape[0] != target_size[0] or mask.shape[1] != target_size[1]):
         import cv2
+        H, W = target_size
+        mask = cv2.resize(mask, (W, H), interpolation=cv2.INTER_LINEAR)
+        logger.info(f"[MaskValidation] Resized mask to {target_size}")
+    logger.info(f"[MaskValidation] Preprocessed mask: shape={mask.shape}, dtype={mask.dtype}, range=[{mask.min():.3f}, {mask.max():.3f}]")
+    return mask