Spaces:

MogensR
/

VideoBackgroundReplacer2

Paused

App Files Files Community

MogensR commited on Sep 13, 2025

Commit

b595219

1 Parent(s): 0deca70

cl.fixes

Browse files

Files changed (2) hide show

models.py +793 -0
pipeline.py +229 -945

models.py ADDED Viewed

	@@ -0,0 +1,793 @@

+#!/usr/bin/env python3
+"""
+BackgroundFX Pro - Model Loading & Utilities
+===========================================
+Contains all model loading, inference functions, and utility functions
+moved from the main pipeline for better organization.
+"""
+from __future__ import annotations
+import os
+import sys
+import cv2
+import subprocess
+import inspect
+import logging
+from pathlib import Path
+from typing import Optional, Tuple, Dict, Any, Union
+import numpy as np
+import yaml
+# --------------------------------------------------------------------------------------
+# Logging
+# --------------------------------------------------------------------------------------
+logger = logging.getLogger("backgroundfx_pro")
+# --------------------------------------------------------------------------------------
+# Optional dependencies
+# --------------------------------------------------------------------------------------
+try:
+    import mediapipe as mp  # type: ignore
+    _HAS_MEDIAPIPE = True
+except Exception:
+    _HAS_MEDIAPIPE = False
+# --------------------------------------------------------------------------------------
+# Path setup for third_party repos
+# --------------------------------------------------------------------------------------
+ROOT = Path(__file__).resolve().parent
+TP_SAM2 = Path(os.environ.get("THIRD_PARTY_SAM2_DIR", ROOT / "third_party" / "sam2")).resolve()
+TP_MATANY = Path(os.environ.get("THIRD_PARTY_MATANY_DIR", ROOT / "third_party" / "matanyone")).resolve()
+def _add_sys_path(p: Path) -> None:
+    p_str = str(p)
+    if p_str not in sys.path:
+        sys.path.insert(0, p_str)
+_add_sys_path(TP_SAM2)
+_add_sys_path(TP_MATANY)
+# --------------------------------------------------------------------------------------
+# Basic Utilities
+# --------------------------------------------------------------------------------------
+def _ffmpeg_bin() -> str:
+    return os.environ.get("FFMPEG_BIN", "ffmpeg")
+def _probe_ffmpeg() -> bool:
+    try:
+        subprocess.run([_ffmpeg_bin(), "-version"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
+        return True
+    except Exception:
+        return False
+def _has_cuda() -> bool:
+    try:
+        import torch  # type: ignore
+        return torch.cuda.is_available()
+    except Exception:
+        return False
+def _pick_device(env_key: str) -> str:
+    requested = os.environ.get(env_key, "").strip().lower()
+    if requested in {"cuda", "cpu"}:
+        return requested
+    return "cuda" if _has_cuda() else "cpu"
+def _ensure_dir(p: Path) -> None:
+    p.mkdir(parents=True, exist_ok=True)
+def _cv_read_first_frame(video_path: Union[str, Path]) -> Tuple[Optional[np.ndarray], int, Tuple[int, int]]:
+    cap = cv2.VideoCapture(str(video_path))
+    if not cap.isOpened():
+        return None, 0, (0, 0)
+    fps = int(round(cap.get(cv2.CAP_PROP_FPS) or 25))
+    ok, frame = cap.read()
+    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) or 0)
+    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or 0)
+    cap.release()
+    if not ok:
+        return None, fps, (w, h)
+    return frame, fps, (w, h)
+def _save_mask_png(mask: np.ndarray, path: Union[str, Path]) -> str:
+    if mask.dtype == bool:
+        mask = (mask.astype(np.uint8) * 255)
+    elif mask.dtype != np.uint8:
+        mask = np.clip(mask, 0, 255).astype(np.uint8)
+    cv2.imwrite(str(path), mask)
+    return str(path)
+def _resize_keep_ar(image: np.ndarray, target_wh: Tuple[int, int]) -> np.ndarray:
+    tw, th = target_wh
+    h, w = image.shape[:2]
+    if h == 0 or w == 0 or tw == 0 or th == 0:
+        return image
+    scale = min(tw / w, th / h)
+    nw, nh = max(1, int(round(w * scale))), max(1, int(round(h * scale)))
+    resized = cv2.resize(image, (nw, nh), interpolation=cv2.INTER_CUBIC)
+    canvas = np.zeros((th, tw, 3), dtype=resized.dtype)
+    x0 = (tw - nw) // 2
+    y0 = (th - nh) // 2
+    canvas[y0:y0+nh, x0:x0+nw] = resized
+    return canvas
+def _video_writer(out_path: Path, fps: int, size: Tuple[int, int]) -> cv2.VideoWriter:
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    return cv2.VideoWriter(str(out_path), fourcc, max(1, fps), size)
+def _mux_audio(src_video: Union[str, Path], silent_video: Union[str, Path], out_path: Union[str, Path]) -> bool:
+    """Copy video from silent_video + audio from src_video into out_path (AAC)."""
+    try:
+        cmd = [
+            _ffmpeg_bin(), "-y",
+            "-i", str(silent_video),
+            "-i", str(src_video),
+            "-map", "0:v:0",
+            "-map", "1:a:0?",
+            "-c:v", "copy",
+            "-c:a", "aac", "-b:a", "192k",
+            "-shortest",
+            str(out_path)
+        ]
+        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        return True
+    except Exception as e:
+        logger.warning(f"Audio mux failed; returning silent video. Reason: {e}")
+        return False
+# --------------------------------------------------------------------------------------
+# Compositing & Image Processing
+# --------------------------------------------------------------------------------------
+def _refine_alpha(alpha: np.ndarray, erode_px: int = 1, dilate_px: int = 2, blur_px: float = 1.5) -> np.ndarray:
+    """Erode→dilate + gentle blur → float alpha in [0,1]."""
+    if alpha.dtype != np.float32:
+        a = alpha.astype(np.float32)
+        if a.max() > 1.0:
+            a = a / 255.0
+    else:
+        a = alpha.copy()
+    a_u8 = np.clip(np.round(a * 255.0), 0, 255).astype(np.uint8)
+    if erode_px > 0:
+        k = max(1, int(erode_px))
+        a_u8 = cv2.erode(a_u8, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (k, k)), iterations=1)
+    if dilate_px > 0:
+        k = max(1, int(dilate_px))
+        a_u8 = cv2.dilate(a_u8, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (k, k)), iterations=1)
+    a = a_u8.astype(np.float32) / 255.0
+    if blur_px and blur_px > 0:
+        rad = max(1, int(round(blur_px)))
+        a = cv2.GaussianBlur(a, (rad | 1, rad | 1), 0)
+    return np.clip(a, 0.0, 1.0)
+def _to_linear(rgb: np.ndarray, gamma: float = 2.2) -> np.ndarray:
+    x = np.clip(rgb.astype(np.float32) / 255.0, 0.0, 1.0)
+    return np.power(x, gamma)
+def _to_srgb(lin: np.ndarray, gamma: float = 2.2) -> np.ndarray:
+    x = np.clip(lin, 0.0, 1.0)
+    return np.clip(np.power(x, 1.0 / gamma) * 255.0, 0, 255).astype(np.uint8)
+def _light_wrap(bg_rgb: np.ndarray, alpha01: np.ndarray, radius: int = 5, amount: float = 0.18) -> np.ndarray:
+    """Simple light wrap from background into subject edges."""
+    r = max(1, int(radius))
+    inv = 1.0 - alpha01
+    inv_blur = cv2.GaussianBlur(inv, (r | 1, r | 1), 0)
+    lw = (bg_rgb.astype(np.float32) * inv_blur[..., None] * float(amount))
+    return lw
+def _despill_edges(fg_rgb: np.ndarray, alpha01: np.ndarray, amount: float = 0.35) -> np.ndarray:
+    """Reduce saturation in boundary band (alpha≈0.5) to remove old-background tint."""
+    w = 1.0 - 2.0 * np.abs(alpha01 - 0.5)  # bell-shaped weight
+    w = np.clip(w, 0.0, 1.0)
+    hsv = cv2.cvtColor(fg_rgb.astype(np.uint8), cv2.COLOR_RGB2HSV).astype(np.float32)
+    H, S, V = cv2.split(hsv)
+    S = S * (1.0 - amount * w)
+    hsv2 = cv2.merge([H, np.clip(S, 0, 255), V])
+    out = cv2.cvtColor(hsv2.astype(np.uint8), cv2.COLOR_HSV2RGB)
+    return out
+def _composite_frame_pro(fg_rgb: np.ndarray, alpha: np.ndarray, bg_rgb: np.ndarray,
+                         erode_px: int = None, dilate_px: int = None, blur_px: float = None,
+                         lw_radius: int = None, lw_amount: float = None,
+                         despill_amount: float = None) -> np.ndarray:
+    """Gamma-aware composite + edge refinement + light wrap + boundary de-spill."""
+    erode_px = erode_px if erode_px is not None else int(os.environ.get("EDGE_ERODE", "1"))
+    dilate_px = dilate_px if dilate_px is not None else int(os.environ.get("EDGE_DILATE", "2"))
+    blur_px   = blur_px   if blur_px   is not None else float(os.environ.get("EDGE_BLUR", "1.5"))
+    lw_radius = lw_radius if lw_radius is not None else int(os.environ.get("LIGHTWRAP_RADIUS", "5"))
+    lw_amount = lw_amount if lw_amount is not None else float(os.environ.get("LIGHTWRAP_AMOUNT", "0.18"))
+    despill_amount = despill_amount if despill_amount is not None else float(os.environ.get("DESPILL_AMOUNT", "0.35"))
+    # refine alpha [0,1]
+    a = _refine_alpha(alpha, erode_px=erode_px, dilate_px=dilate_px, blur_px=blur_px)
+    # edge de-spill: temper saturation where a≈0.5
+    fg_rgb = _despill_edges(fg_rgb, a, amount=despill_amount)
+    # linearize for better blending
+    fg_lin = _to_linear(fg_rgb)
+    bg_lin = _to_linear(bg_rgb)
+    # light wrap
+    lw = _light_wrap(bg_rgb, a, radius=lw_radius, amount=lw_amount)
+    lw_lin = _to_linear(np.clip(lw, 0, 255).astype(np.uint8))
+    comp_lin = fg_lin * a[..., None] + bg_lin * (1.0 - a[..., None]) + lw_lin
+    comp = _to_srgb(comp_lin)
+    return comp
+# --------------------------------------------------------------------------------------
+# SAM2 Integration
+# --------------------------------------------------------------------------------------
+def _resolve_sam2_cfg(cfg_str: str) -> str:
+    """Make the SAM2 config path absolute (prefer inside TP_SAM2)."""
+    cfg_path = Path(cfg_str)
+    if not cfg_path.is_absolute():
+        candidate = TP_SAM2 / cfg_path
+        if candidate.exists():
+            return str(candidate)
+    if cfg_path.exists():
+        return str(cfg_path)
+    # Last resort: common defaults inside the repo
+    for name in ["configs/sam2/sam2_hiera_l.yaml", "configs/sam2/sam2_hiera_b.yaml", "configs/sam2/sam2_hiera_s.yaml"]:
+        p = TP_SAM2 / name
+        if p.exists():
+            return str(p)
+    return str(cfg_str)  # let build_sam2 raise a clear error
+def _find_hiera_config_if_hieradet(cfg_path: str) -> Optional[str]:
+    """If config references 'hieradet', try to find a 'hiera' config."""
+    try:
+        with open(cfg_path, "r") as f:
+            data = yaml.safe_load(f)
+        target = None
+        model = data.get("model", {})
+        enc = (model.get("image_encoder") or {})
+        trunk = (enc.get("trunk") or {})
+        target = trunk.get("_target_") or trunk.get("target")
+        if isinstance(target, str) and "hieradet" in target:
+            for y in TP_SAM2.rglob("*.yaml"):
+                try:
+                    with open(y, "r") as f2:
+                        d2 = yaml.safe_load(f2)
+                    m2 = (d2 or {}).get("model", {})
+                    e2 = (m2.get("image_encoder") or {})
+                    t2 = (e2.get("trunk") or {})
+                    tgt2 = t2.get("_target_") or t2.get("target")
+                    if isinstance(tgt2, str) and ".hiera." in tgt2:
+                        logger.info(f"SAM2: switching config from 'hieradet' → 'hiera': {y}")
+                        return str(y)
+                except Exception:
+                    continue
+    except Exception:
+        pass
+    return None
+def load_sam2() -> Tuple[Optional[object], bool, Dict[str, Any]]:
+    """Robust SAM2 loader with config resolution and error handling."""
+    meta = {"sam2_import_ok": False, "sam2_init_ok": False}
+    try:
+        from sam2.build_sam import build_sam2             # type: ignore
+        from sam2.sam2_image_predictor import SAM2ImagePredictor  # type: ignore
+        meta["sam2_import_ok"] = True
+    except Exception as e:
+        logger.warning(f"SAM2 import failed: {e}")
+        return None, False, meta
+    device = _pick_device("SAM2_DEVICE")
+    cfg_env = os.environ.get("SAM2_MODEL_CFG", "configs/sam2/sam2_hiera_l.yaml")
+    cfg = _resolve_sam2_cfg(cfg_env)
+    ckpt = os.environ.get("SAM2_CHECKPOINT", "")
+    def _try_build(cfg_path: str):
+        params = set(inspect.signature(build_sam2).parameters.keys())
+        kwargs = {}
+        if "config_file" in params:
+            kwargs["config_file"] = cfg_path
+        elif "model_cfg" in params:
+            kwargs["model_cfg"] = cfg_path
+        if ckpt:
+            if "checkpoint" in params:
+                kwargs["checkpoint"] = ckpt
+            elif "ckpt_path" in params:
+                kwargs["ckpt_path"] = ckpt
+            elif "weights" in params:
+                kwargs["weights"] = ckpt
+        if "device" in params:
+            kwargs["device"] = device
+        try:
+            return build_sam2(**kwargs)
+        except TypeError:
+            pos = [cfg_path]
+            if ckpt:
+                pos.append(ckpt)
+            if "device" not in kwargs:
+                pos.append(device)
+            return build_sam2(*pos)
+    try:
+        try:
+            sam = _try_build(cfg)
+        except Exception as e1:
+            alt_cfg = _find_hiera_config_if_hieradet(cfg)
+            if alt_cfg:
+                logger.info(f"SAM2: retrying with alt config: {alt_cfg}")
+                sam = _try_build(alt_cfg)
+                cfg = alt_cfg
+            else:
+                raise
+        predictor = SAM2ImagePredictor(sam)
+        meta.update({
+            "sam2_init_ok": True,
+            "sam2_device": device,
+            "sam2_cfg": cfg,
+            "sam2_ckpt": ckpt or "(repo default)"
+        })
+        return predictor, True, meta
+    except Exception as e:
+        logger.error(f"SAM2 init failed: {e}")
+        return None, False, meta
+def run_sam2_mask(predictor: object,
+                  first_frame_bgr: np.ndarray,
+                  point: Optional[Tuple[int, int]] = None,
+                  auto: bool = False) -> Tuple[Optional[np.ndarray], bool]:
+    """Return (mask_uint8_0_255, ok)."""
+    if predictor is None:
+        return None, False
+    try:
+        rgb = cv2.cvtColor(first_frame_bgr, cv2.COLOR_BGR2RGB)
+        predictor.set_image(rgb)
+        if auto:
+            h, w = rgb.shape[:2]
+            box = np.array([int(0.05*w), int(0.05*h), int(0.95*w), int(0.95*h)])
+            masks, _, _ = predictor.predict(box=box)
+        elif point is not None:
+            x, y = int(point[0]), int(point[1])
+            pts = np.array([[x, y]], dtype=np.int32)
+            labels = np.array([1], dtype=np.int32)
+            masks, _, _ = predictor.predict(point_coords=pts, point_labels=labels)
+        else:
+            h, w = rgb.shape[:2]
+            box = np.array([int(0.1*w), int(0.1*h), int(0.9*w), int(0.9*h)])
+            masks, _, _ = predictor.predict(box=box)
+        if masks is None or len(masks) == 0:
+            return None, False
+        m = masks[0].astype(np.uint8) * 255
+        return m, True
+    except Exception as e:
+        logger.warning(f"SAM2 mask failed: {e}")
+        return None, False
+def _refine_mask_grabcut(image_bgr: np.ndarray,
+                         mask_u8: np.ndarray,
+                         iters: int = None,
+                         trimap_erode: int = None,
+                         trimap_dilate: int = None) -> np.ndarray:
+    """Use SAM2 seed as initialization for GrabCut refinement."""
+    iters = int(os.environ.get("REFINE_GRABCUT_ITERS", "2")) if iters is None else int(iters)
+    e = int(os.environ.get("REFINE_TRIMAP_ERODE", "3")) if trimap_erode is None else int(trimap_erode)
+    d = int(os.environ.get("REFINE_TRIMAP_DILATE", "6")) if trimap_dilate is None else int(trimap_dilate)
+    h, w = mask_u8.shape[:2]
+    m = (mask_u8 > 127).astype(np.uint8) * 255
+    sure_fg = cv2.erode(m, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (max(1, e), max(1, e))), iterations=1)
+    sure_bg = cv2.erode(255 - m, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (max(1, d), max(1, d))), iterations=1)
+    gc_mask = np.full((h, w), cv2.GC_PR_BGD, dtype=np.uint8)
+    gc_mask[sure_bg > 0] = cv2.GC_BGD
+    gc_mask[sure_fg > 0] = cv2.GC_FGD
+    bgdModel = np.zeros((1, 65), np.float64)
+    fgdModel = np.zeros((1, 65), np.float64)
+    try:
+        cv2.grabCut(image_bgr, gc_mask, None, bgdModel, fgdModel, iters, cv2.GC_INIT_WITH_MASK)
+        out = np.where((gc_mask == cv2.GC_FGD) | (gc_mask == cv2.GC_PR_FGD), 255, 0).astype(np.uint8)
+        out = cv2.medianBlur(out, 5)
+        return out
+    except Exception as e:
+        logger.warning(f"GrabCut refinement failed; using original mask. Reason: {e}")
+        return m
+# --------------------------------------------------------------------------------------
+# MatAnyone Integration
+# --------------------------------------------------------------------------------------
+def load_matany() -> Tuple[Optional[object], bool, Dict[str, Any]]:
+    """MatAnyone loader with disable switch and error handling."""
+    meta = {"matany_import_ok": False, "matany_init_ok": False}
+    enable_env = os.environ.get("ENABLE_MATANY", "1").strip().lower()
+    if enable_env in {"0", "false", "off", "no"}:
+        logger.info("MatAnyone disabled by ENABLE_MATANY=0.")
+        meta["disabled"] = True
+        return None, False, meta
+    try:
+        try:
+            from inference_core import InferenceCore  # type: ignore
+        except Exception:
+            from matanyone.inference.inference_core import InferenceCore  # type: ignore
+        meta["matany_import_ok"] = True
+    except Exception as e:
+        logger.warning(f"MatAnyone import failed: {e}")
+        return None, False, meta
+    device = _pick_device("MATANY_DEVICE")
+    repo_id = os.environ.get("MATANY_REPO_ID", "")
+    ckpt = os.environ.get("MATANY_CHECKPOINT", "")
+    # Check if this fork needs a prebuilt network
+    try:
+        sig = inspect.signature(InferenceCore)
+        if "network" in sig.parameters and sig.parameters["network"].default is inspect._empty:
+            logger.error(
+                "This MatAnyone fork expects `InferenceCore(network=...)`. "
+                "Pin a fork/commit that supplies a checkpoint-based constructor, "
+                "or set ENABLE_MATANY=0 to skip."
+            )
+            meta["needs_network_arg"] = True
+            return None, False, meta
+    except Exception:
+        pass
+    candidates = [
+        {"kwargs": {"repo_id": repo_id or None, "checkpoint": ckpt or None, "device": device}},
+        {"kwargs": {"checkpoint": ckpt or None, "device": device}},
+        {"args": (), "kwargs": {"device": device}},
+    ]
+    last_err = None
+    for cand in candidates:
+        try:
+            matany = InferenceCore(*cand.get("args", ()), **cand.get("kwargs", {}))
+            meta["matany_init_ok"] = True
+            meta["matany_device"] = device
+            meta["matany_repo_id"] = repo_id or "(unset)"
+            meta["matany_checkpoint"] = ckpt or "(unset)"
+            return matany, True, meta
+        except Exception as e:
+            last_err = e
+            continue
+    logger.error(f"MatAnyone init failed with all fallbacks: {last_err}")
+    return None, False, meta
+def run_matany(matany: object,
+               video_path: Union[str, Path],
+               first_mask_path: Union[str, Path],
+               work_dir: Union[str, Path]) -> Tuple[Optional[str], Optional[str], bool]:
+    """Return (foreground_video_path, alpha_video_path, ok)."""
+    if matany is None:
+        return None, None, False
+    try:
+        if hasattr(matany, "process_video"):
+            out = matany.process_video(input_path=str(video_path), mask_path=str(first_mask_path), output_dir=str(work_dir))
+            if isinstance(out, (list, tuple)) and len(out) >= 2:
+                return str(out[0]), str(out[1]), True
+            if isinstance(out, dict):
+                fg = out.get("foreground") or out.get("fg") or out.get("foreground_path")
+                al = out.get("alpha") or out.get("alpha_path")
+                if fg and al:
+                    return str(fg), str(al), True
+        if hasattr(matany, "run"):
+            out = matany.run(video_path=str(video_path), seed_mask=str(first_mask_path), out_dir=str(work_dir))
+            if isinstance(out, dict):
+                fg = out.get("foreground") or out.get("fg") or out.get("foreground_path")
+                al = out.get("alpha") or out.get("alpha_path")
+                if fg and al:
+                    return str(fg), str(al), True
+        logger.error("MatAnyone returned no usable paths.")
+        return None, None, False
+    except Exception as e:
+        logger.warning(f"MatAnyone processing failed: {e}")
+        return None, None, False
+# --------------------------------------------------------------------------------------
+# Fallback Functions
+# --------------------------------------------------------------------------------------
+def fallback_mask(first_frame_bgr: np.ndarray) -> np.ndarray:
+    """Prefer MediaPipe; fallback to GrabCut. Returns uint8 mask 0/255."""
+    h, w = first_frame_bgr.shape[:2]
+    if _HAS_MEDIAPIPE:
+        try:
+            mp_selfie = mp.solutions.selfie_segmentation
+            with mp_selfie.SelfieSegmentation(model_selection=1) as segmenter:
+                rgb = cv2.cvtColor(first_frame_bgr, cv2.COLOR_BGR2RGB)
+                res = segmenter.process(rgb)
+                m = (np.clip(res.segmentation_mask, 0, 1) > 0.5).astype(np.uint8) * 255
+                m = cv2.medianBlur(m, 5)
+                return m
+        except Exception as e:
+            logger.warning(f"MediaPipe fallback failed: {e}")
+    # Ultimate fallback: GrabCut
+    mask = np.zeros((h, w), np.uint8)
+    rect = (int(0.1*w), int(0.1*h), int(0.8*w), int(0.8*h))
+    bgdModel = np.zeros((1, 65), np.float64)
+    fgdModel = np.zeros((1, 65), np.float64)
+    try:
+        cv2.grabCut(first_frame_bgr, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
+        mask_bin = np.where((mask == cv2.GC_FGD) | (mask == cv2.GC_PR_FGD), 255, 0).astype(np.uint8)
+        return mask_bin
+    except Exception as e:
+        logger.warning(f"GrabCut failed: {e}")
+        return np.zeros((h, w), dtype=np.uint8)
+def composite_video(fg_path: Union[str, Path],
+                    alpha_path: Union[str, Path],
+                    bg_image_path: Union[str, Path],
+                    out_path: Union[str, Path],
+                    fps: int,
+                    size: Tuple[int, int]) -> bool:
+    """Blend MatAnyone FG+ALPHA over background using pro compositor."""
+    fg_cap = cv2.VideoCapture(str(fg_path))
+    al_cap = cv2.VideoCapture(str(alpha_path))
+    if not fg_cap.isOpened() or not al_cap.isOpened():
+        return False
+    w, h = size
+    bg = cv2.imread(str(bg_image_path), cv2.IMREAD_COLOR)
+    if bg is None:
+        bg = np.full((h, w, 3), 127, dtype=np.uint8)
+    bg_f = _resize_keep_ar(bg, (w, h))
+    if _probe_ffmpeg():
+        tmp_out = Path(str(out_path) + ".tmp.mp4")
+        writer = _video_writer(tmp_out, fps, (w, h))
+        post_h264 = True
+    else:
+        writer = _video_writer(Path(out_path), fps, (w, h))
+        post_h264 = False
+    ok_any = False
+    try:
+        while True:
+            ok_fg, fg = fg_cap.read()
+            ok_al, al = al_cap.read()
+            if not ok_fg or not ok_al:
+                break
+            fg = cv2.resize(fg, (w, h), interpolation=cv2.INTER_CUBIC)
+            al_gray = cv2.cvtColor(cv2.resize(al, (w, h)), cv2.COLOR_BGR2GRAY)
+            comp = _composite_frame_pro(
+                cv2.cvtColor(fg, cv2.COLOR_BGR2RGB),
+                al_gray,
+                cv2.cvtColor(bg_f, cv2.COLOR_BGR2RGB)
+            )
+            writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
+            ok_any = True
+    finally:
+        fg_cap.release()
+        al_cap.release()
+        writer.release()
+    if post_h264 and ok_any:
+        try:
+            cmd = [
+                _ffmpeg_bin(), "-y",
+                "-i", str(tmp_out),
+                "-c:v", "libx264", "-pix_fmt", "yuv420p", "-movflags", "+faststart",
+                str(out_path)
+            ]
+            subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            tmp_out.unlink(missing_ok=True)
+        except Exception as e:
+            logger.warning(f"ffmpeg finalize failed: {e}")
+            Path(out_path).unlink(missing_ok=True)
+            tmp_out.replace(out_path)
+    return ok_any
+def fallback_composite(video_path: Union[str, Path],
+                       mask_path: Union[str, Path],
+                       bg_image_path: Union[str, Path],
+                       out_path: Union[str, Path]) -> bool:
+    """Static-mask compositing using pro compositor."""
+    mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)
+    cap = cv2.VideoCapture(str(video_path))
+    if mask is None or not cap.isOpened():
+        return False
+    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) or 0)
+    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or 0)
+    fps = int(round(cap.get(cv2.CAP_PROP_FPS) or 25))
+    bg = cv2.imread(str(bg_image_path), cv2.IMREAD_COLOR)
+    if bg is None:
+        bg = np.full((h, w, 3), 127, dtype=np.uint8)
+    mask_resized = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
+    bg_f = _resize_keep_ar(bg, (w, h))
+    if _probe_ffmpeg():
+        tmp_out = Path(str(out_path) + ".tmp.mp4")
+        writer = _video_writer(tmp_out, fps, (w, h))
+        use_post_ffmpeg = True
+    else:
+        writer = _video_writer(Path(out_path), fps, (w, h))
+        use_post_ffmpeg = False
+    ok_any = False
+    try:
+        while True:
+            ok, frame = cap.read()
+            if not ok:
+                break
+            comp = _composite_frame_pro(
+                cv2.cvtColor(frame, cv2.COLOR_BGR2RGB),
+                mask_resized,
+                cv2.cvtColor(bg_f, cv2.COLOR_BGR2RGB)
+            )
+            writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
+            ok_any = True
+    finally:
+        cap.release()
+        writer.release()
+    if use_post_ffmpeg and ok_any:
+        try:
+            cmd = [
+                _ffmpeg_bin(), "-y",
+                "-i", str(tmp_out),
+                "-c:v", "libx264", "-pix_fmt", "yuv420p", "-movflags", "+faststart",
+                str(out_path)
+            ]
+            subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            tmp_out.unlink(missing_ok=True)
+        except Exception as e:
+            logger.warning(f"ffmpeg H.264 finalize failed: {e}")
+            Path(out_path).unlink(missing_ok=True)
+            tmp_out.replace(out_path)
+    return ok_any
+# --------------------------------------------------------------------------------------
+# Stage-A (Transparent Export) Functions
+# --------------------------------------------------------------------------------------
+def _checkerboard_bg(w: int, h: int, tile: int = 32) -> np.ndarray:
+    """RGB checkerboard for preview when no real alpha is possible."""
+    y, x = np.mgrid[0:h, 0:w]
+    c = ((x // tile) + (y // tile)) % 2
+    a = np.where(c == 0, 200, 150).astype(np.uint8)
+    return np.stack([a, a, a], axis=-1)
+def _build_stage_a_rgba_vp9_from_fg_alpha(
+    fg_path: Union[str, Path],
+    alpha_path: Union[str, Path],
+    out_webm: Union[str, Path],
+    fps: int,
+    size: Tuple[int, int],
+    src_audio: Optional[Union[str, Path]] = None,
+) -> bool:
+    """Merge FG+ALPHA → RGBA WebM (VP9 with alpha)."""
+    if not _probe_ffmpeg():
+        return False
+    w, h = size
+    try:
+        cmd = [_ffmpeg_bin(), "-y", "-i", str(fg_path), "-i", str(alpha_path)]
+        if src_audio:
+            cmd += ["-i", str(src_audio)]
+        fcx = f"[1:v]format=gray,scale={w}:{h},fps={fps}[al];" \
+              f"[0:v]scale={w}:{h},fps={fps}[fg];" \
+              f"[fg][al]alphamerge[outv]"
+        cmd += ["-filter_complex", fcx, "-map", "[outv]"]
+        if src_audio:
+            cmd += ["-map", "2:a:0?", "-c:a", "libopus", "-b:a", "128k"]
+        cmd += [
+            "-c:v", "libvpx-vp9", "-pix_fmt", "yuva420p",
+            "-crf", os.environ.get("STAGEA_VP9_CRF", "28"),
+            "-b:v", "0", "-row-mt", "1", "-shortest", str(out_webm),
+        ]
+        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        return True
+    except Exception as e:
+        logger.warning(f"Stage-A VP9(alpha) build failed: {e}")
+        return False
+def _build_stage_a_rgba_vp9_from_mask(
+    video_path: Union[str, Path],
+    mask_png: Union[str, Path],
+    out_webm: Union[str, Path],
+    fps: int,
+    size: Tuple[int, int],
+) -> bool:
+    """Merge original video + static mask → RGBA WebM (VP9 with alpha)."""
+    if not _probe_ffmpeg():
+        return False
+    w, h = size
+    try:
+        cmd = [
+            _ffmpeg_bin(), "-y",
+            "-i", str(video_path),
+            "-loop", "1", "-i", str(mask_png),
+            "-filter_complex",
+            f"[1:v]format=gray,scale={w}:{h},fps={fps}[al];"
+            f"[0:v]scale={w}:{h},fps={fps}[fg];"
+            f"[fg][al]alphamerge[outv]",
+            "-map", "[outv]",
+            "-c:v", "libvpx-vp9", "-pix_fmt", "yuva420p",
+            "-crf", os.environ.get("STAGEA_VP9_CRF", "28"),
+            "-b:v", "0", "-row-mt", "1", "-shortest", str(out_webm),
+        ]
+        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        return True
+    except Exception as e:
+        logger.warning(f"Stage-A VP9(alpha) (mask) build failed: {e}")
+        return False
+def _build_stage_a_checkerboard_from_fg_alpha(
+    fg_path: Union[str, Path],
+    alpha_path: Union[str, Path],
+    out_mp4: Union[str, Path],
+    fps: int,
+    size: Tuple[int, int],
+) -> bool:
+    """Preview: FG+ALPHA over checkerboard → MP4 (no real alpha)."""
+    fg_cap = cv2.VideoCapture(str(fg_path))
+    al_cap = cv2.VideoCapture(str(alpha_path))
+    if not fg_cap.isOpened() or not al_cap.isOpened():
+        return False
+    w, h = size
+    writer = _video_writer(Path(out_mp4), fps, (w, h))
+    bg = _checkerboard_bg(w, h)
+    ok_any = False
+    try:
+        while True:
+            okf, fg = fg_cap.read()
+            oka, al = al_cap.read()
+            if not okf or not oka:
+                break
+            fg = cv2.resize(fg, (w, h))
+            al = cv2.cvtColor(cv2.resize(al, (w, h)), cv2.COLOR_BGR2GRAY)
+            comp = _composite_frame_pro(cv2.cvtColor(fg, cv2.COLOR_BGR2RGB), al, bg)
+            writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
+            ok_any = True
+    finally:
+        fg_cap.release()
+        al_cap.release()
+        writer.release()
+    return ok_any
+def _build_stage_a_checkerboard_from_mask(
+    video_path: Union[str, Path],
+    mask_png: Union[str, Path],
+    out_mp4: Union[str, Path],
+    fps: int,
+    size: Tuple[int, int],
+) -> bool:
+    """Preview: original video + static mask over checkerboard → MP4."""
+    cap = cv2.VideoCapture(str(video_path))
+    if not cap.isOpened():
+        return False
+    w, h = size
+    mask = cv2.imread(str(mask_png), cv2.IMREAD_GRAYSCALE)
+    if mask is None:
+        return False
+    mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
+    writer = _video_writer(Path(out_mp4), fps, (w, h))
+    bg = _checkerboard_bg(w, h)
+    ok_any = False
+    try:
+        while True:
+            ok, frame = cap.read()
+            if not ok:
+                break
+            frame = cv2.resize(frame, (w, h))
+            comp = _composite_frame_pro(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), mask, bg)
+            writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
+            ok_any = True
+    finally:
+        cap.release()
+        writer.release()
+    return ok_any

pipeline.py CHANGED Viewed

@@ -1,52 +1,32 @@
-# pipeline.py
 #!/usr/bin/env python3
 """
-BackgroundFX Pro - Dynamic SAM2 + MatAnyone Pipeline (pro masking, pro compositing, audio mux)
-==============================================================================================
-What's inside:
-- SAM2 (first-frame segmentation) via third_party/sam2 (env-configurable)
-- MatAnyone (temporal matting) via third_party/matanyone (env-configurable)
-- First-frame mask refinement via GrabCut (optional, default ON)
-- Pro compositing: alpha refinement, gamma-aware blending, light wrap, edge de-spill
-- Fallbacks: MediaPipe SelfieSegmentation → else OpenCV GrabCut
-- H.264 MP4 output (ffmpeg when available; OpenCV fallback)
-- Audio mux: original audio copied into final output (AAC) if present
-- Stage-A transparent export (VP9 with alpha or checkerboard preview)
-Environment knobs (all optional):
-- THIRD_PARTY_SAM2_DIR, THIRD_PARTY_MATANY_DIR
-- SAM2_MODEL_CFG, SAM2_CHECKPOINT, SAM2_DEVICE
-- MATANY_REPO_ID, MATANY_CHECKPOINT, MATANY_DEVICE, ENABLE_MATANY=1|0
-- FFMPEG_BIN
-- REFINE_GRABCUT=1 | 0                (enable/disable seed mask GrabCut refinement)
-- REFINE_GRABCUT_ITERS=2              (GrabCut iterations)
-- REFINE_TRIMAP_ERODE=3               (px for sure-FG erode)
-- REFINE_TRIMAP_DILATE=6              (px for sure-BG erode of inverse)
-- EDGE_ERODE=1, EDGE_DILATE=2, EDGE_BLUR=1.5
-- LIGHTWRAP_RADIUS=5, LIGHTWRAP_AMOUNT=0.18
-- DESPILL_AMOUNT=0.35
-- RETURN_STAGE_A=0 | 1                (if 1, return Stage-A file instead of final composite)
-- STAGEA_VP9_CRF=28                   (quality for VP9 alpha export)
 """
 from __future__ import annotations
 import os
-import sys
-import cv2
 import time
 import tempfile
 import logging
-import subprocess
-import inspect
 from pathlib import Path
 from typing import Optional, Tuple, Dict, Any, Union
-import numpy as np
-import yaml  # for SAM2 config introspection
-# Try to apply GPU/perf tuning early if present
 try:
     import perf_tuning  # noqa: F401
 except Exception:
@@ -63,821 +43,41 @@
     logger.addHandler(_h)
 # --------------------------------------------------------------------------------------
-# Optional dependency: MediaPipe SelfieSegmentation
-# --------------------------------------------------------------------------------------
-try:
-    import mediapipe as mp  # type: ignore
-    _HAS_MEDIAPIPE = True
-except Exception:
-    _HAS_MEDIAPIPE = False
-# --------------------------------------------------------------------------------------
-# Path setup for third_party repos (dynamically override-able)
-# --------------------------------------------------------------------------------------
-ROOT = Path(__file__).resolve().parent
-TP_SAM2 = Path(os.environ.get("THIRD_PARTY_SAM2_DIR", ROOT / "third_party" / "sam2")).resolve()
-TP_MATANY = Path(os.environ.get("THIRD_PARTY_MATANY_DIR", ROOT / "third_party" / "matanyone")).resolve()
-def _add_sys_path(p: Path) -> None:
-    p_str = str(p)
-    if p_str not in sys.path:
-        sys.path.insert(0, p_str)
-_add_sys_path(TP_SAM2)
-_add_sys_path(TP_MATANY)
-# --------------------------------------------------------------------------------------
-# Utilities
-# --------------------------------------------------------------------------------------
-def _ffmpeg_bin() -> str:
-    return os.environ.get("FFMPEG_BIN", "ffmpeg")
-def _probe_ffmpeg() -> bool:
-    try:
-        subprocess.run([_ffmpeg_bin(), "-version"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
-        return True
-    except Exception:
-        return False
-def _mux_audio(src_video: Union[str, Path], silent_video: Union[str, Path], out_path: Union[str, Path]) -> bool:
-    """Copy video from silent_video + audio from src_video into out_path (AAC)."""
-    try:
-        cmd = [
-            _ffmpeg_bin(), "-y",
-            "-i", str(silent_video),
-            "-i", str(src_video),
-            "-map", "0:v:0",
-            "-map", "1:a:0?",
-            "-c:v", "copy",
-            "-c:a", "aac", "-b:a", "192k",
-            "-shortest",
-            str(out_path)
-        ]
-        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        return True
-    except Exception as e:
-        logger.warning(f"Audio mux failed; returning silent video. Reason: {e}")
-        return False
-def _has_cuda() -> bool:
-    try:
-        import torch  # type: ignore
-        return torch.cuda.is_available()
-    except Exception:
-        return False
-def _pick_device(env_key: str) -> str:
-    requested = os.environ.get(env_key, "").strip().lower()
-    if requested in {"cuda", "cpu"}:
-        return requested
-    return "cuda" if _has_cuda() else "cpu"
-def _ensure_dir(p: Path) -> None:
-    p.mkdir(parents=True, exist_ok=True)
-def _cv_read_first_frame(video_path: Union[str, Path]) -> Tuple[Optional[np.ndarray], int, Tuple[int, int]]:
-    cap = cv2.VideoCapture(str(video_path))
-    if not cap.isOpened():
-        return None, 0, (0, 0)
-    fps = int(round(cap.get(cv2.CAP_PROP_FPS) or 25))
-    ok, frame = cap.read()
-    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) or 0)
-    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or 0)
-    cap.release()
-    if not ok:
-        return None, fps, (w, h)
-    return frame, fps, (w, h)
-def _save_mask_png(mask: np.ndarray, path: Union[str, Path]) -> str:
-    # expects mask as uint8 0..255 or bool
-    if mask.dtype == bool:
-        mask = (mask.astype(np.uint8) * 255)
-    elif mask.dtype != np.uint8:
-        mask = np.clip(mask, 0, 255).astype(np.uint8)
-    cv2.imwrite(str(path), mask)
-    return str(path)
-def _resize_keep_ar(image: np.ndarray, target_wh: Tuple[int, int]) -> np.ndarray:
-    tw, th = target_wh
-    h, w = image.shape[:2]
-    if h == 0 or w == 0 or tw == 0 or th == 0:
-        return image
-    scale = min(tw / w, th / h)
-    nw, nh = max(1, int(round(w * scale))), max(1, int(round(h * scale)))
-    resized = cv2.resize(image, (nw, nh), interpolation=cv2.INTER_CUBIC)
-    canvas = np.zeros((th, tw, 3), dtype=resized.dtype)
-    x0 = (tw - nw) // 2
-    y0 = (th - nh) // 2
-    canvas[y0:y0+nh, x0:x0+nw] = resized
-    return canvas
-# ---- Edge refinement / compositing helpers ----
-def _refine_alpha(alpha: np.ndarray, erode_px: int = 1, dilate_px: int = 2, blur_px: float = 1.5) -> np.ndarray:
-    """Erode→dilate + gentle blur → float alpha in [0,1]."""
-    if alpha.dtype != np.float32:
-        a = alpha.astype(np.float32)
-        if a.max() > 1.0:
-            a = a / 255.0
-    else:
-        a = alpha.copy()
-    a_u8 = np.clip(np.round(a * 255.0), 0, 255).astype(np.uint8)
-    if erode_px > 0:
-        k = max(1, int(erode_px))
-        a_u8 = cv2.erode(a_u8, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (k, k)), iterations=1)
-    if dilate_px > 0:
-        k = max(1, int(dilate_px))
-        a_u8 = cv2.dilate(a_u8, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (k, k)), iterations=1)
-    a = a_u8.astype(np.float32) / 255.0
-    if blur_px and blur_px > 0:
-        rad = max(1, int(round(blur_px)))
-        a = cv2.GaussianBlur(a, (rad | 1, rad | 1), 0)
-    return np.clip(a, 0.0, 1.0)
-def _to_linear(rgb: np.ndarray, gamma: float = 2.2) -> np.ndarray:
-    x = np.clip(rgb.astype(np.float32) / 255.0, 0.0, 1.0)
-    return np.power(x, gamma)
-def _to_srgb(lin: np.ndarray, gamma: float = 2.2) -> np.ndarray:
-    x = np.clip(lin, 0.0, 1.0)
-    return np.clip(np.power(x, 1.0 / gamma) * 255.0, 0, 255).astype(np.uint8)
-def _light_wrap(bg_rgb: np.ndarray, alpha01: np.ndarray, radius: int = 5, amount: float = 0.18) -> np.ndarray:
-    """Simple light wrap from background into subject edges."""
-    r = max(1, int(radius))
-    inv = 1.0 - alpha01
-    inv_blur = cv2.GaussianBlur(inv, (r | 1, r | 1), 0)
-    lw = (bg_rgb.astype(np.float32) * inv_blur[..., None] * float(amount))
-    return lw
-def _despill_edges(fg_rgb: np.ndarray, alpha01: np.ndarray, amount: float = 0.35) -> np.ndarray:
-    """
-    Reduce saturation only in the boundary band (alpha≈0.5) to remove old-background tint.
-    amount: 0..1 (how strongly to desaturate)
-    """
-    w = 1.0 - 2.0 * np.abs(alpha01 - 0.5)  # bell-shaped weight
-    w = np.clip(w, 0.0, 1.0)
-    hsv = cv2.cvtColor(fg_rgb.astype(np.uint8), cv2.COLOR_RGB2HSV).astype(np.float32)
-    H, S, V = cv2.split(hsv)
-    S = S * (1.0 - amount * w)
-    hsv2 = cv2.merge([H, np.clip(S, 0, 255), V])
-    out = cv2.cvtColor(hsv2.astype(np.uint8), cv2.COLOR_HSV2RGB)
-    return out
-def _composite_frame_pro(fg_rgb: np.ndarray, alpha: np.ndarray, bg_rgb: np.ndarray,
-                         erode_px: int = None, dilate_px: int = None, blur_px: float = None,
-                         lw_radius: int = None, lw_amount: float = None,
-                         despill_amount: float = None) -> np.ndarray:
-    """Gamma-aware composite + edge refinement + light wrap + boundary de-spill."""
-    erode_px = erode_px if erode_px is not None else int(os.environ.get("EDGE_ERODE", "1"))
-    dilate_px = dilate_px if dilate_px is not None else int(os.environ.get("EDGE_DILATE", "2"))
-    blur_px   = blur_px   if blur_px   is not None else float(os.environ.get("EDGE_BLUR", "1.5"))
-    lw_radius = lw_radius if lw_radius is not None else int(os.environ.get("LIGHTWRAP_RADIUS", "5"))
-    lw_amount = lw_amount if lw_amount is not None else float(os.environ.get("LIGHTWRAP_AMOUNT", "0.18"))
-    despill_amount = despill_amount if despill_amount is not None else float(os.environ.get("DESPILL_AMOUNT", "0.35"))
-    # refine alpha [0,1]
-    a = _refine_alpha(alpha, erode_px=erode_px, dilate_px=dilate_px, blur_px=blur_px)
-    # edge de-spill: temper saturation where a≈0.5
-    fg_rgb = _despill_edges(fg_rgb, a, amount=despill_amount)
-    # linearize for better blending
-    fg_lin = _to_linear(fg_rgb)
-    bg_lin = _to_linear(bg_rgb)
-    # light wrap
-    lw = _light_wrap(bg_rgb, a, radius=lw_radius, amount=lw_amount)
-    lw_lin = _to_linear(np.clip(lw, 0, 255).astype(np.uint8))
-    comp_lin = fg_lin * a[..., None] + bg_lin * (1.0 - a[..., None]) + lw_lin
-    comp = _to_srgb(comp_lin)
-    return comp
-def _video_writer(out_path: Path, fps: int, size: Tuple[int, int]) -> cv2.VideoWriter:
-    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
-    return cv2.VideoWriter(str(out_path), fourcc, max(1, fps), size)
-# --- Stage-A (transparent) builders ----------------------------------------------------
-def _build_stage_a_rgba_vp9_from_fg_alpha(
-    fg_path: Union[str, Path],
-    alpha_path: Union[str, Path],
-    out_webm: Union[str, Path],
-    fps: int,
-    size: Tuple[int, int],
-    src_audio: Optional[Union[str, Path]] = None,
-) -> bool:
-    """Merge FG+ALPHA → RGBA WebM (VP9 with alpha). Optionally mux original audio (Opus)."""
-    if not _probe_ffmpeg():
-        return False
-    w, h = size
-    try:
-        cmd = [
-            _ffmpeg_bin(), "-y",
-            "-i", str(fg_path),                     # 0: FG video
-            "-i", str(alpha_path),                  # 1: ALPHA video (grayscale)
-        ]
-        if src_audio:
-            cmd += ["-i", str(src_audio)]           # 2: original (for audio)
-        fcx = f"[1:v]format=gray,scale={w}:{h},fps={fps}[al];" \
-              f"[0:v]scale={w}:{h},fps={fps}[fg];" \
-              f"[fg][al]alphamerge[outv]"
-        cmd += ["-filter_complex", fcx, "-map", "[outv]"]
-        if src_audio:
-            cmd += ["-map", "2:a:0?", "-c:a", "libopus", "-b:a", "128k"]
-        cmd += [
-            "-c:v", "libvpx-vp9", "-pix_fmt", "yuva420p",
-            "-crf", os.environ.get("STAGEA_VP9_CRF", "28"),
-            "-b:v", "0", "-row-mt", "1",
-            "-shortest",
-            str(out_webm),
-        ]
-        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        return True
-    except Exception as e:
-        logger.warning(f"Stage-A VP9(alpha) build failed: {e}")
-        return False
-def _build_stage_a_rgba_vp9_from_mask(
-    video_path: Union[str, Path],
-    mask_png: Union[str, Path],
-    out_webm: Union[str, Path],
-    fps: int,
-    size: Tuple[int, int],
-) -> bool:
-    """Merge original video + static mask → RGBA WebM (VP9 with alpha)."""
-    if not _probe_ffmpeg():
-        return False
-    w, h = size
-    try:
-        cmd = [
-            _ffmpeg_bin(), "-y",
-            "-i", str(video_path),                  # 0: original video
-            "-loop", "1", "-i", str(mask_png),      # 1: static PNG mask (grayscale)
-            "-filter_complex",
-            f"[1:v]format=gray,scale={w}:{h},fps={fps}[al];"
-            f"[0:v]scale={w}:{h},fps={fps}[fg];"
-            f"[fg][al]alphamerge[outv]",
-            "-map", "[outv]",
-            "-c:v", "libvpx-vp9", "-pix_fmt", "yuva420p",
-            "-crf", os.environ.get("STAGEA_VP9_CRF", "28"),
-            "-b:v", "0", "-row-mt", "1",
-            "-shortest",
-            str(out_webm),
-        ]
-        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        return True
-    except Exception as e:
-        logger.warning(f"Stage-A VP9(alpha) (mask) build failed: {e}")
-        return False
-def _checkerboard_bg(w: int, h: int, tile: int = 32) -> np.ndarray:
-    """RGB checkerboard (for preview when no real alpha is possible)."""
-    y, x = np.mgrid[0:h, 0:w]
-    c = ((x // tile) + (y // tile)) % 2
-    a = np.where(c == 0, 200, 150).astype(np.uint8)
-    return np.stack([a, a, a], axis=-1)
-def _build_stage_a_checkerboard_from_fg_alpha(
-    fg_path: Union[str, Path],
-    alpha_path: Union[str, Path],
-    out_mp4: Union[str, Path],
-    fps: int,
-    size: Tuple[int, int],
-) -> bool:
-    """Preview: FG+ALPHA over checkerboard → MP4 (no real alpha)."""
-    fg_cap = cv2.VideoCapture(str(fg_path))
-    al_cap = cv2.VideoCapture(str(alpha_path))
-    if not fg_cap.isOpened() or not al_cap.isOpened():
-        return False
-    w, h = size
-    writer = _video_writer(Path(out_mp4), fps, (w, h))
-    bg = _checkerboard_bg(w, h)
-    ok_any = False
-    try:
-        while True:
-            okf, fg = fg_cap.read()
-            oka, al = al_cap.read()
-            if not okf or not oka:
-                break
-            fg = cv2.resize(fg, (w, h))
-            al = cv2.cvtColor(cv2.resize(al, (w, h)), cv2.COLOR_BGR2GRAY)
-            comp = _composite_frame_pro(cv2.cvtColor(fg, cv2.COLOR_BGR2RGB), al, bg)
-            writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
-            ok_any = True
-    finally:
-        fg_cap.release()
-        al_cap.release()
-        writer.release()
-    return ok_any
-def _build_stage_a_checkerboard_from_mask(
-    video_path: Union[str, Path],
-    mask_png: Union[str, Path],
-    out_mp4: Union[str, Path],
-    fps: int,
-    size: Tuple[int, int],
-) -> bool:
-    """Preview: original video + static mask over checkerboard → MP4."""
-    cap = cv2.VideoCapture(str(video_path))
-    if not cap.isOpened():
-        return False
-    w, h = size
-    mask = cv2.imread(str(mask_png), cv2.IMREAD_GRAYSCALE)
-    if mask is None:
-        return False
-    mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
-    writer = _video_writer(Path(out_mp4), fps, (w, h))
-    bg = _checkerboard_bg(w, h)
-    ok_any = False
-    try:
-        while True:
-            ok, frame = cap.read()
-            if not ok:
-                break
-            frame = cv2.resize(frame, (w, h))
-            comp = _composite_frame_pro(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), mask, bg)
-            writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
-            ok_any = True
-    finally:
-        cap.release()
-        writer.release()
-    return ok_any
-# --------------------------------------------------------------------------------------
-# SAM2 helpers (config resolution & robust loader)
 # --------------------------------------------------------------------------------------
-def _resolve_sam2_cfg(cfg_str: str) -> str:
-    """Make the SAM2 config path absolute (prefer inside TP_SAM2)."""
-    cfg_path = Path(cfg_str)
-    if not cfg_path.is_absolute():
-        candidate = TP_SAM2 / cfg_path
-        if candidate.exists():
-            return str(candidate)
-    if cfg_path.exists():
-        return str(cfg_path)
-    # Last resort: common defaults inside the repo
-    for name in ["configs/sam2/sam2_hiera_l.yaml", "configs/sam2/sam2_hiera_b.yaml", "configs/sam2/sam2_hiera_s.yaml"]:
-        p = TP_SAM2 / name
-        if p.exists():
-            return str(p)
-    return str(cfg_str)  # let build_sam2 raise a clear error
-def _find_hiera_config_if_hieradet(cfg_path: str) -> Optional[str]:
-    """
-    If the given config references 'hieradet', try to find a 'hiera' config in the repo and return it.
-    """
     try:
-        with open(cfg_path, "r") as f:
-            data = yaml.safe_load(f)
-        # Look for target under model.image_encoder.trunk._target_ (Hydra style)
-        target = None
-        model = data.get("model", {})
-        enc = (model.get("image_encoder") or {})
-        trunk = (enc.get("trunk") or {})
-        target = trunk.get("_target_") or trunk.get("target")
-        if isinstance(target, str) and "hieradet" in target:
-            # Search all yaml files under TP_SAM2/configs for those that reference ".hiera."
-            for y in TP_SAM2.rglob("*.yaml"):
-                try:
-                    with open(y, "r") as f2:
-                        d2 = yaml.safe_load(f2)
-                    m2 = (d2 or {}).get("model", {})
-                    e2 = (m2.get("image_encoder") or {})
-                    t2 = (e2.get("trunk") or {})
-                    tgt2 = t2.get("_target_") or t2.get("target")
-                    if isinstance(tgt2, str) and ".hiera." in tgt2:
-                        logger.info(f"SAM2: switching config from 'hieradet' → 'hiera': {y}")
-                        return str(y)
-                except Exception:
-                    continue
     except Exception:
         pass
-    return None
-# --------------------------------------------------------------------------------------
-# SAM2 Integration (robust to different build_sam2 signatures)
-# --------------------------------------------------------------------------------------
-def load_sam2() -> Tuple[Optional[object], bool, Dict[str, Any]]:
-    """
-    Robust SAM2 loader that adapts to different build_sam2 signatures:
-    - config_file vs model_cfg
-    - checkpoint vs ckpt_path vs weights
-    - optional device kwarg
-    - absolute config resolution (inside third_party/sam2)
-    - auto-fix if config references 'hieradet' but repo has 'hiera'
-    """
-    meta = {"sam2_import_ok": False, "sam2_init_ok": False}
-    try:
-        from sam2.build_sam import build_sam2             # type: ignore
-        from sam2.sam2_image_predictor import SAM2ImagePredictor  # type: ignore
-        meta["sam2_import_ok"] = True
-    except Exception as e:
-        logger.warning(f"SAM2 import failed: {e}")
-        return None, False, meta
-    device = _pick_device("SAM2_DEVICE")
-    cfg_env = os.environ.get("SAM2_MODEL_CFG", "configs/sam2/sam2_hiera_l.yaml")
-    cfg = _resolve_sam2_cfg(cfg_env)
-    ckpt = os.environ.get("SAM2_CHECKPOINT", "")
-    def _try_build(cfg_path: str):
-        params = set(inspect.signature(build_sam2).parameters.keys())
-        kwargs = {}
-        # Config arg
-        if "config_file" in params:
-            kwargs["config_file"] = cfg_path
-        elif "model_cfg" in params:
-            kwargs["model_cfg"] = cfg_path
-        # Checkpoint arg
-        if ckpt:
-            if "checkpoint" in params:
-                kwargs["checkpoint"] = ckpt
-            elif "ckpt_path" in params:
-                kwargs["ckpt_path"] = ckpt
-            elif "weights" in params:
-                kwargs["weights"] = ckpt
-        # Device
-        if "device" in params:
-            kwargs["device"] = device
-        # Try keywords first, then positional fallback
         try:
-            return build_sam2(**kwargs)
-        except TypeError:
-            pos = [cfg_path]
-            if ckpt:
-                pos.append(ckpt)
-            if "device" not in kwargs:
-                pos.append(device)
-            return build_sam2(*pos)
-    try:
-        try:
-            sam = _try_build(cfg)
-        except Exception as e1:
-            msg = str(e1)
-            # If the config is using 'hieradet', try to swap to a 'hiera' config
-            alt_cfg = _find_hiera_config_if_hieradet(cfg)
-            if alt_cfg:
-                logger.info(f"SAM2: retrying with alt config: {alt_cfg}")
-                sam = _try_build(alt_cfg)
-                cfg = alt_cfg
-            else:
-                raise
-        predictor = SAM2ImagePredictor(sam)
-        meta.update({
-            "sam2_init_ok": True,
-            "sam2_device": device,
-            "sam2_cfg": cfg,
-            "sam2_ckpt": ckpt or "(repo default)"
-        })
-        return predictor, True, meta
-    except Exception as e:
-        logger.error(f"SAM2 init failed: {e}")
-        return None, False, meta
-def run_sam2_mask(predictor: object,
-                  first_frame_bgr: np.ndarray,
-                  point: Optional[Tuple[int, int]] = None,
-                  auto: bool = False) -> Tuple[Optional[np.ndarray], bool]:
-    """Return (mask_uint8_0_255, ok)."""
-    if predictor is None:
-        return None, False
-    try:
-        rgb = cv2.cvtColor(first_frame_bgr, cv2.COLOR_BGR2RGB)
-        predictor.set_image(rgb)
-        if auto:
-            h, w = rgb.shape[:2]
-            box = np.array([int(0.05*w), int(0.05*h), int(0.95*w), int(0.95*h)])
-            masks, _, _ = predictor.predict(box=box)
-        elif point is not None:
-            x, y = int(point[0]), int(point[1])
-            pts = np.array([[x, y]], dtype=np.int32)
-            labels = np.array([1], dtype=np.int32)
-            masks, _, _ = predictor.predict(point_coords=pts, point_labels=labels)
-        else:
-            h, w = rgb.shape[:2]
-            box = np.array([int(0.1*w), int(0.1*h), int(0.9*w), int(0.9*h)])
-            masks, _, _ = predictor.predict(box=box)
-        if masks is None or len(masks) == 0:
-            return None, False
-        m = masks[0].astype(np.uint8) * 255
-        return m, True
-    except Exception as e:
-        logger.warning(f"SAM2 mask failed: {e}")
-        return None, False
-# --------------------------------------------------------------------------------------
-# First-frame mask refinement (GrabCut with mask init)
-# --------------------------------------------------------------------------------------
-def _refine_mask_grabcut(image_bgr: np.ndarray,
-                         mask_u8: np.ndarray,
-                         iters: int = None,
-                         trimap_erode: int = None,
-                         trimap_dilate: int = None) -> np.ndarray:
-    """
-    Use SAM2 seed as initialization for GrabCut (GC_INIT_WITH_MASK).
-    - sure FG: eroded mask
-    - sure BG: eroded inverse
-    - unknown: rest
-    Returns refined binary mask (uint8 0/255).
-    """
-    iters = int(os.environ.get("REFINE_GRABCUT_ITERS", "2")) if iters is None else int(iters)
-    e = int(os.environ.get("REFINE_TRIMAP_ERODE", "3")) if trimap_erode is None else int(trimap_erode)
-    d = int(os.environ.get("REFINE_TRIMAP_DILATE", "6")) if trimap_dilate is None else int(trimap_dilate)
-    h, w = mask_u8.shape[:2]
-    m = (mask_u8 > 127).astype(np.uint8) * 255
-    sure_fg = cv2.erode(m, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (max(1, e), max(1, e))), iterations=1)
-    sure_bg = cv2.erode(255 - m, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (max(1, d), max(1, d))), iterations=1)
-    gc_mask = np.full((h, w), cv2.GC_PR_BGD, dtype=np.uint8)
-    gc_mask[sure_bg > 0] = cv2.GC_BGD
-    gc_mask[sure_fg > 0] = cv2.GC_FGD
-    bgdModel = np.zeros((1, 65), np.float64)
-    fgdModel = np.zeros((1, 65), np.float64)
-    try:
-        cv2.grabCut(image_bgr, gc_mask, None, bgdModel, fgdModel, iters, cv2.GC_INIT_WITH_MASK)
-        out = np.where((gc_mask == cv2.GC_FGD) | (gc_mask == cv2.GC_PR_FGD), 255, 0).astype(np.uint8)
-        out = cv2.medianBlur(out, 5)
-        return out
-    except Exception as e:
-        logger.warning(f"GrabCut refinement failed; using original mask. Reason: {e}")
-        return m
-# --------------------------------------------------------------------------------------
-# MatAnyone Integration (robust + disable switch)
-# --------------------------------------------------------------------------------------
-def load_matany() -> Tuple[Optional[object], bool, Dict[str, Any]]:
-    """
-    MatAnyone loader that:
-    - Skips if ENABLE_MATANY=0
-    - Detects forks that require a `network` arg and exits cleanly with diagnostics
-    - Otherwise tries repo/checkpoint style constructors
-    """
-    meta = {"matany_import_ok": False, "matany_init_ok": False}
-    enable_env = os.environ.get("ENABLE_MATANY", "1").strip().lower()
-    if enable_env in {"0", "false", "off", "no"}:
-        logger.info("MatAnyone disabled by ENABLE_MATANY=0.")
-        meta["disabled"] = True
-        return None, False, meta
-    try:
-        try:
-            from inference_core import InferenceCore  # type: ignore
         except Exception:
-            from matanyone.inference.inference_core import InferenceCore  # type: ignore
-        meta["matany_import_ok"] = True
-    except Exception as e:
-        logger.warning(f"MatAnyone import failed: {e}")
-        return None, False, meta
-    device = _pick_device("MATANY_DEVICE")
-    repo_id = os.environ.get("MATANY_REPO_ID", "")
-    ckpt = os.environ.get("MATANY_CHECKPOINT", "")
-    # If this fork needs a prebuilt network, tell the user and skip
-    try:
-        sig = inspect.signature(InferenceCore)
-        if "network" in sig.parameters and sig.parameters["network"].default is inspect._empty:
-            logger.error(
-                "This MatAnyone fork expects `InferenceCore(network=...)`. "
-                "Pin a fork/commit that supplies a checkpoint-based constructor, "
-                "or set ENABLE_MATANY=0 to skip."
-            )
-            meta["needs_network_arg"] = True
-            return None, False, meta
-    except Exception:
-        pass
-    candidates = [
-        {"kwargs": {"repo_id": repo_id or None, "checkpoint": ckpt or None, "device": device}},
-        {"kwargs": {"checkpoint": ckpt or None, "device": device}},
-        {"args": (), "kwargs": {"device": device}},
-    ]
-    last_err = None
-    for cand in candidates:
-        try:
-            matany = InferenceCore(*cand.get("args", ()), **cand.get("kwargs", {}))
-            meta["matany_init_ok"] = True
-            meta["matany_device"] = device
-            meta["matany_repo_id"] = repo_id or "(unset)"
-            meta["matany_checkpoint"] = ckpt or "(unset)"
-            return matany, True, meta
-        except Exception as e:
-            last_err = e
-            continue
-    logger.error(f"MatAnyone init failed with all fallbacks: {last_err}")
-    return None, False, meta
-def run_matany(matany: object,
-               video_path: Union[str, Path],
-               first_mask_path: Union[str, Path],
-               work_dir: Union[str, Path]) -> Tuple[Optional[str], Optional[str], bool]:
-    """Return (foreground_video_path, alpha_video_path, ok)."""
-    if matany is None:
-        return None, None, False
-    video_path = str(video_path)
-    first_mask_path = str(first_mask_path)
-    work_dir = str(work_dir)
     try:
-        if hasattr(matany, "process_video"):
-            out = matany.process_video(input_path=video_path, mask_path=first_mask_path, output_dir=work_dir)
-            if isinstance(out, (list, tuple)) and len(out) >= 2:
-                return str(out[0]), str(out[1]), True
-            if isinstance(out, dict):
-                fg = out.get("foreground") or out.get("fg") or out.get("foreground_path")
-                al = out.get("alpha") or out.get("alpha_path")
-                if fg and al:
-                    return str(fg), str(al), True
-        if hasattr(matany, "run"):
-            out = matany.run(video_path=video_path, seed_mask=first_mask_path, out_dir=work_dir)
-            if isinstance(out, dict):
-                fg = out.get("foreground") or out.get("fg") or out.get("foreground_path")
-                al = out.get("alpha") or out.get("alpha_path")
-                if fg and al:
-                    return str(fg), str(al), True
-        logger.error("MatAnyone returned no usable paths.")
-        return None, None, False
     except Exception as e:
-        logger.warning(f"MatAnyone processing failed: {e}")
-        return None, None, False
 # --------------------------------------------------------------------------------------
-# Fallbacks
-# --------------------------------------------------------------------------------------
-def fallback_mask(first_frame_bgr: np.ndarray) -> np.ndarray:
-    """Prefer MediaPipe; fallback to GrabCut. Returns uint8 mask 0/255."""
-    h, w = first_frame_bgr.shape[:2]
-    if _HAS_MEDIAPIPE:
-        try:
-            mp_selfie = mp.solutions.selfie_segmentation
-            with mp_selfie.SelfieSegmentation(model_selection=1) as segmenter:
-                rgb = cv2.cvtColor(first_frame_bgr, cv2.COLOR_BGR2RGB)
-                res = segmenter.process(rgb)
-                m = (np.clip(res.segmentation_mask, 0, 1) > 0.5).astype(np.uint8) * 255
-                m = cv2.medianBlur(m, 5)
-                return m
-        except Exception as e:
-            logger.warning(f"MediaPipe fallback failed: {e}")
-    mask = np.zeros((h, w), np.uint8)
-    rect = (int(0.1*w), int(0.1*h), int(0.8*w), int(0.8*h))
-    bgdModel = np.zeros((1, 65), np.float64)
-    fgdModel = np.zeros((1, 65), np.float64)
-    try:
-        cv2.grabCut(first_frame_bgr, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
-        mask_bin = np.where((mask == cv2.GC_FGD) | (mask == cv2.GC_PR_FGD), 255, 0).astype(np.uint8)
-        return mask_bin
-    except Exception as e:
-        logger.warning(f"GrabCut failed: {e}")
-        return np.zeros((h, w), dtype=np.uint8)
-def fallback_composite(video_path: Union[str, Path],
-                       mask_path: Union[str, Path],
-                       bg_image_path: Union[str, Path],
-                       out_path: Union[str, Path]) -> bool:
-    """Static-mask compositing (uses pro compositor to reduce halos)."""
-    mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)
-    cap = cv2.VideoCapture(str(video_path))
-    if mask is None or not cap.isOpened():
-        return False
-    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) or 0)
-    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or 0)
-    fps = int(round(cap.get(cv2.CAP_PROP_FPS) or 25))
-    bg = cv2.imread(str(bg_image_path), cv2.IMREAD_COLOR)
-    if bg is None:
-        bg = np.full((h, w, 3), 127, dtype=np.uint8)
-    mask_resized = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
-    bg_f = _resize_keep_ar(bg, (w, h))
-    if _probe_ffmpeg():
-        tmp_out = Path(str(out_path) + ".tmp.mp4")
-        writer = _video_writer(tmp_out, fps, (w, h))
-        use_post_ffmpeg = True
-    else:
-        writer = _video_writer(Path(out_path), fps, (w, h))
-        use_post_ffmpeg = False
-    ok_any = False
-    try:
-        while True:
-            ok, frame = cap.read()
-            if not ok:
-                break
-            comp = _composite_frame_pro(
-                cv2.cvtColor(frame, cv2.COLOR_BGR2RGB),
-                mask_resized,
-                cv2.cvtColor(bg_f, cv2.COLOR_BGR2RGB)
-            )
-            writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
-            ok_any = True
-    finally:
-        cap.release()
-        writer.release()
-    if use_post_ffmpeg and ok_any:
-        try:
-            cmd = [
-                _ffmpeg_bin(), "-y",
-                "-i", str(tmp_out),
-                "-c:v", "libx264", "-pix_fmt", "yuv420p", "-movflags", "+faststart",
-                str(out_path)
-            ]
-            subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-            tmp_out.unlink(missing_ok=True)
-        except Exception as e:
-            logger.warning(f"ffmpeg H.264 finalize failed: {e}")
-            Path(out_path).unlink(missing_ok=True)
-            tmp_out.replace(out_path)
-    return ok_any
-# --------------------------------------------------------------------------------------
-# Compositing using MatAnyone outputs
-# --------------------------------------------------------------------------------------
-def composite_video(fg_path: Union[str, Path],
-                    alpha_path: Union[str, Path],
-                    bg_image_path: Union[str, Path],
-                    out_path: Union[str, Path],
-                    fps: int,
-                    size: Tuple[int, int]) -> bool:
-    """Blend MatAnyone FG+ALPHA over background using pro compositor."""
-    fg_cap = cv2.VideoCapture(str(fg_path))
-    al_cap = cv2.VideoCapture(str(alpha_path))
-    if not fg_cap.isOpened() or not al_cap.isOpened():
-        return False
-    w, h = size
-    bg = cv2.imread(str(bg_image_path), cv2.IMREAD_COLOR)
-    if bg is None:
-        bg = np.full((h, w, 3), 127, dtype=np.uint8)
-    bg_f = _resize_keep_ar(bg, (w, h))
-    if _probe_ffmpeg():
-        tmp_out = Path(str(out_path) + ".tmp.mp4")
-        writer = _video_writer(tmp_out, fps, (w, h))
-        post_h264 = True
-    else:
-        writer = _video_writer(Path(out_path), fps, (w, h))
-        post_h264 = False
-    ok_any = False
-    try:
-        while True:
-            ok_fg, fg = fg_cap.read()
-            ok_al, al = al_cap.read()
-            if not ok_fg or not ok_al:
-                break
-            fg = cv2.resize(fg, (w, h), interpolation=cv2.INTER_CUBIC)
-            al_gray = cv2.cvtColor(cv2.resize(al, (w, h)), cv2.COLOR_BGR2GRAY)
-            comp = _composite_frame_pro(
-                cv2.cvtColor(fg, cv2.COLOR_BGR2RGB),
-                al_gray,
-                cv2.cvtColor(bg_f, cv2.COLOR_BGR2RGB)
-            )
-            writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
-            ok_any = True
-    finally:
-        fg_cap.release()
-        al_cap.release()
-        writer.release()
-    if post_h264 and ok_any:
-        try:
-            cmd = [
-                _ffmpeg_bin(), "-y",
-                "-i", str(tmp_out),
-                "-c:v", "libx264", "-pix_fmt", "yuv420p", "-movflags", "+faststart",
-                str(out_path)
-            ]
-            subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-            tmp_out.unlink(missing_ok=True)
-        except Exception as e:
-            logger.warning(f"ffmpeg finalize failed: {e}")
-            Path(out_path).unlink(missing_ok=True)
-            tmp_out.replace(out_path)
-    return ok_any
-# --------------------------------------------------------------------------------------
-# High-level process function (for app.py)
 # --------------------------------------------------------------------------------------
 def process(video_path: Union[str, Path],
             bg_image_path: Union[str, Path],
@@ -885,7 +85,14 @@ def process(video_path: Union[str, Path],
             point_y: Optional[float] = None,
             auto_box: bool = False,
             work_dir: Optional[Union[str, Path]] = None) -> Tuple[Optional[str], Dict[str, Any]]:
-    """Orchestrate: SAM2 mask → (optional GrabCut refine) → MatAnyone → Stage-A → composite → mux audio."""
     t0 = time.time()
     diagnostics: Dict[str, Any] = {
         "sam2_ok": False,
@@ -897,120 +104,197 @@ def process(video_path: Union[str, Path],
         "matany_meta": {},
         "device_sam2": None,
         "device_matany": None,
     }
     tmp_root = Path(work_dir) if work_dir else Path(tempfile.mkdtemp(prefix="bfx_"))
     _ensure_dir(tmp_root)
-    # 0) Basic video info
-    first_frame, fps, (vw, vh) = _cv_read_first_frame(video_path)
-    diagnostics["fps"] = int(fps or 25)
-    diagnostics["resolution"] = [int(vw), int(vh)]
-    if first_frame is None or vw == 0 or vh == 0:
-        diagnostics["fallback_used"] = "invalid_video"
-        return None, diagnostics
-    # 1) First-frame mask via SAM2 (or fallback)
-    mask_png = tmp_root / "seed_mask.png"
-    predictor, sam2_ok, sam_meta = load_sam2()
-    diagnostics["sam2_meta"] = sam_meta
-    diagnostics["device_sam2"] = sam_meta.get("sam2_device") if sam_meta else None
-    seed_mask = None
-    if sam2_ok:
-        px = int(point_x) if point_x is not None else None
-        py = int(point_y) if point_y is not None else None
-        seed_mask, ok_mask = run_sam2_mask(
-            predictor, first_frame,
-            point=(px, py) if (px is not None and py is not None) else None,
-            auto=auto_box
         )
-        diagnostics["sam2_ok"] = bool(ok_mask)
-    else:
-        ok_mask = False
-    if not ok_mask or seed_mask is None:
-        logger.info("SAM2 failed or not available. Using fallback mask.")
-        seed_mask = fallback_mask(first_frame)
-        diagnostics["fallback_used"] = "mask_generation"
-    # 1b) Optional GrabCut refinement over SAM2 seed
-    if int(os.environ.get("REFINE_GRABCUT", "1")) == 1:
-        seed_mask = _refine_mask_grabcut(first_frame, seed_mask)
-    _save_mask_png(seed_mask, mask_png)
-    # 2) Try MatAnyone
-    matany, mat_ok, mat_meta = load_matany()
-    diagnostics["matany_meta"] = mat_meta
-    diagnostics["device_matany"] = mat_meta.get("matany_device") if mat_meta else None
-    out_dir = tmp_root / "matany_out"
-    _ensure_dir(out_dir)
-    fg_path, al_path = None, None
-    if mat_ok:
-        fg_path, al_path, ran = run_matany(matany, video_path, mask_png, out_dir)
-        diagnostics["matany_ok"] = bool(ran)
-    else:
-        ran = False
-    # --- Build Stage-A (transparent) file for inspection ---
-    stageA_path = None
-    stageA_ok = False
-    if diagnostics["matany_ok"] and fg_path and al_path:
-        stageA_path = tmp_root / "stageA_transparent.webm"
-        if _probe_ffmpeg():
-            stageA_ok = _build_stage_a_rgba_vp9_from_fg_alpha(
-                fg_path, al_path, stageA_path, diagnostics["fps"], (vw, vh), src_audio=video_path
-            )
-        if not stageA_ok:
-            stageA_path = tmp_root / "stageA_checkerboard.mp4"
-            stageA_ok = _build_stage_a_checkerboard_from_fg_alpha(
-                fg_path, al_path, stageA_path, diagnostics["fps"], (vw, vh)
-            )
-    else:
-        stageA_path = tmp_root / "stageA_transparent.webm"
         if _probe_ffmpeg():
-            stageA_ok = _build_stage_a_rgba_vp9_from_mask(
-                video_path, mask_png, stageA_path, diagnostics["fps"], (vw, vh)
-            )
-        if not stageA_ok:
-            stageA_path = tmp_root / "stageA_checkerboard.mp4"
-            stageA_ok = _build_stage_a_checkerboard_from_mask(
-                video_path, mask_png, stageA_path, diagnostics["fps"], (vw, vh)
-            )
-    diagnostics["stageA_path"] = str(stageA_path) if stageA_ok else None
-    diagnostics["stageA_note"] = (
-        "WebM with real alpha (VP9)" if stageA_ok and str(stageA_path).endswith(".webm")
-        else ("MP4 checkerboard preview (no real alpha)" if stageA_ok else "Stage-A build failed")
-    )
-    # Optional: return Stage-A instead of final composite
-    if os.environ.get("RETURN_STAGE_A", "0").strip() == "1" and stageA_ok:
-        return str(stageA_path), diagnostics
-    # 3) Composite to final background
-    output_path = tmp_root / "output.mp4"
-    if diagnostics["matany_ok"] and fg_path and al_path:
-        ok_comp = composite_video(fg_path, al_path, bg_image_path, output_path, diagnostics["fps"], (vw, vh))
-        if not ok_comp:
-            logger.info("MatAnyone composite failed; falling back to static mask composite.")
-            fallback_composite(video_path, mask_png, bg_image_path, output_path)
-            diagnostics["fallback_used"] = (diagnostics["fallback_used"] or "") + "+composite_static"
-    else:
-        logger.info("MatAnyone not used; doing static mask composite.")
-        fallback_composite(video_path, mask_png, bg_image_path, output_path)
-        diagnostics["fallback_used"] = (diagnostics["fallback_used"] or "") or "composite_static"
-    diagnostics["elapsed_sec"] = round(time.time() - t0, 3)
-    # 4) Add audio back from the original input (if present)
-    final_path = tmp_root / "output_with_audio.mp4"
-    if _probe_ffmpeg():
-        mux_ok = _mux_audio(video_path, output_path, final_path)
-        if mux_ok:
-            return str(final_path), diagnostics
-    # Fallback: return the silent video if mux failed or ffmpeg not available
-    return str(output_path), diagnostics

 #!/usr/bin/env python3
 """
+BackgroundFX Pro - Memory-Optimized Pipeline
+===========================================
+Orchestrates SAM2 → MatAnyone → Compositing with aggressive memory management.
+Models are loaded sequentially and freed immediately after use.
 """
 from __future__ import annotations
 import os
+import gc
 import time
 import tempfile
 import logging
 from pathlib import Path
 from typing import Optional, Tuple, Dict, Any, Union
+import torch
+from models import (
+    load_sam2, run_sam2_mask, load_matany, run_matany,
+    fallback_mask, fallback_composite, composite_video,
+    _cv_read_first_frame, _save_mask_png, _ensure_dir, _mux_audio, _probe_ffmpeg,
+    _refine_mask_grabcut, _build_stage_a_rgba_vp9_from_fg_alpha,
+    _build_stage_a_rgba_vp9_from_mask, _build_stage_a_checkerboard_from_fg_alpha,
+    _build_stage_a_checkerboard_from_mask
+)
+# Try to apply GPU/perf tuning early
 try:
     import perf_tuning  # noqa: F401
 except Exception:
     logger.addHandler(_h)
 # --------------------------------------------------------------------------------------
+# Memory Management Utilities
 # --------------------------------------------------------------------------------------
+def _cleanup_temp_files(tmp_root: Path) -> None:
+    """Clean up temporary files aggressively"""
     try:
+        for pattern in ["*.tmp", "*.temp", "*.bak"]:
+            for f in tmp_root.glob(pattern):
+                f.unlink(missing_ok=True)
     except Exception:
         pass
+def _log_memory() -> float:
+    """Log current GPU memory usage and return allocated GB"""
+    if torch.cuda.is_available():
         try:
+            allocated = torch.cuda.memory_allocated() / 1e9
+            reserved = torch.cuda.memory_reserved() / 1e9
+            logger.info(f"GPU memory: {allocated:.1f}GB allocated, {reserved:.1f}GB reserved")
+            return allocated
         except Exception:
+            pass
+    return 0.0
+def _force_cleanup() -> None:
+    """Aggressive memory cleanup"""
     try:
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            torch.cuda.synchronize()
     except Exception as e:
+        logger.warning(f"Cleanup warning: {e}")
 # --------------------------------------------------------------------------------------
+# Main Processing Function (Memory-Optimized)
 # --------------------------------------------------------------------------------------
 def process(video_path: Union[str, Path],
             bg_image_path: Union[str, Path],
             point_y: Optional[float] = None,
             auto_box: bool = False,
             work_dir: Optional[Union[str, Path]] = None) -> Tuple[Optional[str], Dict[str, Any]]:
+    """
+    Memory-optimized orchestration: lazy loading, sequential model usage, aggressive cleanup.
+    Flow:
+    1. Load SAM2 → get mask → FREE SAM2 immediately
+    2. Load MatAnyone → process → FREE MatAnyone immediately
+    3. Composite & finalize (CPU-based operations)
+    """
     t0 = time.time()
     diagnostics: Dict[str, Any] = {
         "sam2_ok": False,
         "matany_meta": {},
         "device_sam2": None,
         "device_matany": None,
+        "memory_peak_gb": 0.0,
     }
     tmp_root = Path(work_dir) if work_dir else Path(tempfile.mkdtemp(prefix="bfx_"))
     _ensure_dir(tmp_root)
+    try:
+        # 0) Basic video info
+        logger.info("Reading video metadata...")
+        first_frame, fps, (vw, vh) = _cv_read_first_frame(video_path)
+        diagnostics["fps"] = int(fps or 25)
+        diagnostics["resolution"] = [int(vw), int(vh)]
+        if first_frame is None or vw == 0 or vh == 0:
+            diagnostics["fallback_used"] = "invalid_video"
+            return None, diagnostics
+        diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
+        # 1) PHASE 1: SAM2 Loading & Processing → IMMEDIATE CLEANUP
+        logger.info("=== PHASE 1: Loading SAM2 for segmentation ===")
+        predictor, sam2_ok, sam_meta = load_sam2()
+        diagnostics["sam2_meta"] = sam_meta
+        diagnostics["device_sam2"] = sam_meta.get("sam2_device") if sam_meta else None
+        diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
+        seed_mask = None
+        mask_png = tmp_root / "seed_mask.png"
+        if sam2_ok and predictor is not None:
+            logger.info("Running SAM2 segmentation...")
+            px = int(point_x) if point_x is not None else None
+            py = int(point_y) if point_y is not None else None
+            seed_mask, ok_mask = run_sam2_mask(
+                predictor, first_frame,
+                point=(px, py) if (px is not None and py is not None) else None,
+                auto=auto_box
+            )
+            diagnostics["sam2_ok"] = bool(ok_mask)
+            # CRITICAL: Free SAM2 immediately after getting the mask
+            logger.info("Freeing SAM2 memory...")
+            del predictor
+            predictor = None
+            _force_cleanup()
+            diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
+        else:
+            ok_mask = False
+            logger.info("SAM2 not available or failed to load")
+        # Fallback mask generation if SAM2 failed
+        if not ok_mask or seed_mask is None:
+            logger.info("Using fallback mask generation...")
+            seed_mask = fallback_mask(first_frame)
+            diagnostics["fallback_used"] = "mask_generation"
+            _force_cleanup()
+        # Optional GrabCut refinement
+        if int(os.environ.get("REFINE_GRABCUT", "1")) == 1:
+            logger.info("Refining mask with GrabCut...")
+            seed_mask = _refine_mask_grabcut(first_frame, seed_mask)
+            _force_cleanup()
+        _save_mask_png(seed_mask, mask_png)
+        # Clean up the first frame from memory
+        del first_frame
+        _force_cleanup()
+        _cleanup_temp_files(tmp_root)
+        # 2) PHASE 2: MatAnyone Loading & Processing → IMMEDIATE CLEANUP
+        logger.info("=== PHASE 2: Loading MatAnyone for temporal processing ===")
+        matany, mat_ok, mat_meta = load_matany()
+        diagnostics["matany_meta"] = mat_meta
+        diagnostics["device_matany"] = mat_meta.get("matany_device") if mat_meta else None
+        diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
+        fg_path, al_path = None, None
+        out_dir = tmp_root / "matany_out"
+        _ensure_dir(out_dir)
+        if mat_ok and matany is not None:
+            logger.info("Running MatAnyone processing...")
+            fg_path, al_path, ran = run_matany(matany, video_path, mask_png, out_dir)
+            diagnostics["matany_ok"] = bool(ran)
+            # CRITICAL: Free MatAnyone immediately after processing
+            logger.info("Freeing MatAnyone memory...")
+            del matany
+            matany = None
+            _force_cleanup()
+            diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
+        else:
+            ran = False
+            logger.info("MatAnyone not available, disabled, or failed to load")
+        # 3) PHASE 3: Stage-A Creation (lightweight, CPU-based)
+        logger.info("=== PHASE 3: Building Stage-A (transparent export) ===")
+        stageA_path = None
+        stageA_ok = False
+        if diagnostics["matany_ok"] and fg_path and al_path:
+            stageA_path = tmp_root / "stageA_transparent.webm"
+            if _probe_ffmpeg():
+                stageA_ok = _build_stage_a_rgba_vp9_from_fg_alpha(
+                    fg_path, al_path, stageA_path, diagnostics["fps"], (vw, vh), src_audio=video_path
+                )
+            if not stageA_ok:
+                stageA_path = tmp_root / "stageA_checkerboard.mp4"
+                stageA_ok = _build_stage_a_checkerboard_from_fg_alpha(
+                    fg_path, al_path, stageA_path, diagnostics["fps"], (vw, vh)
+                )
+        else:
+            stageA_path = tmp_root / "stageA_transparent.webm"
+            if _probe_ffmpeg():
+                stageA_ok = _build_stage_a_rgba_vp9_from_mask(
+                    video_path, mask_png, stageA_path, diagnostics["fps"], (vw, vh)
+                )
+            if not stageA_ok:
+                stageA_path = tmp_root / "stageA_checkerboard.mp4"
+                stageA_ok = _build_stage_a_checkerboard_from_mask(
+                    video_path, mask_png, stageA_path, diagnostics["fps"], (vw, vh)
+                )
+        diagnostics["stageA_path"] = str(stageA_path) if stageA_ok else None
+        diagnostics["stageA_note"] = (
+            "WebM with real alpha (VP9)" if stageA_ok and str(stageA_path).endswith(".webm")
+            else ("MP4 checkerboard preview (no real alpha)" if stageA_ok else "Stage-A build failed")
         )
+        # Optional: return Stage-A instead of final composite
+        if os.environ.get("RETURN_STAGE_A", "0").strip() == "1" and stageA_ok:
+            _force_cleanup()
+            _cleanup_temp_files(tmp_root)
+            return str(stageA_path), diagnostics
+        # 4) PHASE 4: Final Compositing (CPU-based, memory-efficient)
+        logger.info("=== PHASE 4: Creating final composite ===")
+        output_path = tmp_root / "output.mp4"
+        if diagnostics["matany_ok"] and fg_path and al_path:
+            logger.info("Compositing with MatAnyone outputs...")
+            ok_comp = composite_video(fg_path, al_path, bg_image_path, output_path, diagnostics["fps"], (vw, vh))
+            if not ok_comp:
+                logger.info("MatAnyone composite failed; falling back to static mask composite.")
+                fallback_composite(video_path, mask_png, bg_image_path, output_path)
+                diagnostics["fallback_used"] = (diagnostics["fallback_used"] or "") + "+composite_static"
+        else:
+            logger.info("Using static mask composite...")
+            fallback_composite(video_path, mask_png, bg_image_path, output_path)
+            diagnostics["fallback_used"] = (diagnostics["fallback_used"] or "") or "composite_static"
+        # Clean up intermediate files
+        _cleanup_temp_files(tmp_root)
+        _force_cleanup()
+        # 5) PHASE 5: Audio Muxing (final step)
+        logger.info("=== PHASE 5: Adding audio track ===")
+        final_path = tmp_root / "output_with_audio.mp4"
         if _probe_ffmpeg():
+            mux_ok = _mux_audio(video_path, output_path, final_path)
+            if mux_ok:
+                # Clean up the silent version
+                output_path.unlink(missing_ok=True)
+                _force_cleanup()
+                diagnostics["elapsed_sec"] = round(time.time() - t0, 3)
+                logger.info(f"Processing completed successfully in {diagnostics['elapsed_sec']}s")
+                logger.info(f"Peak GPU memory usage: {diagnostics['memory_peak_gb']:.1f}GB")
+                return str(final_path), diagnostics
+        # Final cleanup
+        _force_cleanup()
+        diagnostics["elapsed_sec"] = round(time.time() - t0, 3)
+        logger.info(f"Processing completed in {diagnostics['elapsed_sec']}s (no audio)")
+        logger.info(f"Peak GPU memory usage: {diagnostics['memory_peak_gb']:.1f}GB")
+        return str(output_path), diagnostics
+    except Exception as e:
+        logger.error(f"Processing failed: {e}")
+        import traceback
+        logger.error(f"Traceback: {traceback.format_exc()}")
+        _force_cleanup()
+        diagnostics["error"] = str(e)
+        diagnostics["elapsed_sec"] = round(time.time() - t0, 3)
+        return None, diagnostics
+    finally:
+        # Ensure cleanup even if something goes wrong
+        _force_cleanup()
+        _cleanup_temp_files(tmp_root)