Spaces:

MogensR
/

VideoBackgroundReplacer2

Paused

App Files Files Community

MogensR commited on Sep 14, 2025

Commit

7aa0137

1 Parent(s): 90db7e5

fix ny

Browse files

Files changed (2) hide show

models.py +0 -795
models/__init__.py +795 -0

models.py DELETED Viewed

@@ -1,795 +0,0 @@
-#!/usr/bin/env python3
-"""
-BackgroundFX Pro - Model Loading & Utilities
-===========================================
-Contains all model loading, inference functions, and utility functions
-moved from the main pipeline for better organization.
-"""
-from __future__ import annotations
-import os
-import sys
-import cv2
-import subprocess
-import inspect
-import logging
-from pathlib import Path
-from typing import Optional, Tuple, Dict, Any, Union
-import numpy as np
-import yaml
-import torch  # For memory management and CUDA operations
-import torch  # For memory management and CUDA operations
-# --------------------------------------------------------------------------------------
-# Logging
-# --------------------------------------------------------------------------------------
-logger = logging.getLogger("backgroundfx_pro")
-# --------------------------------------------------------------------------------------
-# Optional dependencies
-# --------------------------------------------------------------------------------------
-try:
-    import mediapipe as mp  # type: ignore
-    _HAS_MEDIAPIPE = True
-except Exception:
-    _HAS_MEDIAPIPE = False
-# --------------------------------------------------------------------------------------
-# Path setup for third_party repos
-# --------------------------------------------------------------------------------------
-ROOT = Path(__file__).resolve().parent
-TP_SAM2 = Path(os.environ.get("THIRD_PARTY_SAM2_DIR", ROOT / "third_party" / "sam2")).resolve()
-TP_MATANY = Path(os.environ.get("THIRD_PARTY_MATANY_DIR", ROOT / "third_party" / "matanyone")).resolve()
-def _add_sys_path(p: Path) -> None:
-    p_str = str(p)
-    if p_str not in sys.path:
-        sys.path.insert(0, p_str)
-_add_sys_path(TP_SAM2)
-_add_sys_path(TP_MATANY)
-# --------------------------------------------------------------------------------------
-# Basic Utilities
-# --------------------------------------------------------------------------------------
-def _ffmpeg_bin() -> str:
-    return os.environ.get("FFMPEG_BIN", "ffmpeg")
-def _probe_ffmpeg() -> bool:
-    try:
-        subprocess.run([_ffmpeg_bin(), "-version"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
-        return True
-    except Exception:
-        return False
-def _has_cuda() -> bool:
-    try:
-        import torch  # type: ignore
-        return torch.cuda.is_available()
-    except Exception:
-        return False
-def _pick_device(env_key: str) -> str:
-    requested = os.environ.get(env_key, "").strip().lower()
-    if requested in {"cuda", "cpu"}:
-        return requested
-    return "cuda" if _has_cuda() else "cpu"
-def _ensure_dir(p: Path) -> None:
-    p.mkdir(parents=True, exist_ok=True)
-def _cv_read_first_frame(video_path: Union[str, Path]) -> Tuple[Optional[np.ndarray], int, Tuple[int, int]]:
-    cap = cv2.VideoCapture(str(video_path))
-    if not cap.isOpened():
-        return None, 0, (0, 0)
-    fps = int(round(cap.get(cv2.CAP_PROP_FPS) or 25))
-    ok, frame = cap.read()
-    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) or 0)
-    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or 0)
-    cap.release()
-    if not ok:
-        return None, fps, (w, h)
-    return frame, fps, (w, h)
-def _save_mask_png(mask: np.ndarray, path: Union[str, Path]) -> str:
-    if mask.dtype == bool:
-        mask = (mask.astype(np.uint8) * 255)
-    elif mask.dtype != np.uint8:
-        mask = np.clip(mask, 0, 255).astype(np.uint8)
-    cv2.imwrite(str(path), mask)
-    return str(path)
-def _resize_keep_ar(image: np.ndarray, target_wh: Tuple[int, int]) -> np.ndarray:
-    tw, th = target_wh
-    h, w = image.shape[:2]
-    if h == 0 or w == 0 or tw == 0 or th == 0:
-        return image
-    scale = min(tw / w, th / h)
-    nw, nh = max(1, int(round(w * scale))), max(1, int(round(h * scale)))
-    resized = cv2.resize(image, (nw, nh), interpolation=cv2.INTER_CUBIC)
-    canvas = np.zeros((th, tw, 3), dtype=resized.dtype)
-    x0 = (tw - nw) // 2
-    y0 = (th - nh) // 2
-    canvas[y0:y0+nh, x0:x0+nw] = resized
-    return canvas
-def _video_writer(out_path: Path, fps: int, size: Tuple[int, int]) -> cv2.VideoWriter:
-    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
-    return cv2.VideoWriter(str(out_path), fourcc, max(1, fps), size)
-def _mux_audio(src_video: Union[str, Path], silent_video: Union[str, Path], out_path: Union[str, Path]) -> bool:
-    """Copy video from silent_video + audio from src_video into out_path (AAC)."""
-    try:
-        cmd = [
-            _ffmpeg_bin(), "-y",
-            "-i", str(silent_video),
-            "-i", str(src_video),
-            "-map", "0:v:0",
-            "-map", "1:a:0?",
-            "-c:v", "copy",
-            "-c:a", "aac", "-b:a", "192k",
-            "-shortest",
-            str(out_path)
-        ]
-        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        return True
-    except Exception as e:
-        logger.warning(f"Audio mux failed; returning silent video. Reason: {e}")
-        return False
-# --------------------------------------------------------------------------------------
-# Compositing & Image Processing
-# --------------------------------------------------------------------------------------
-def _refine_alpha(alpha: np.ndarray, erode_px: int = 1, dilate_px: int = 2, blur_px: float = 1.5) -> np.ndarray:
-    """Erode→dilate + gentle blur → float alpha in [0,1]."""
-    if alpha.dtype != np.float32:
-        a = alpha.astype(np.float32)
-        if a.max() > 1.0:
-            a = a / 255.0
-    else:
-        a = alpha.copy()
-    a_u8 = np.clip(np.round(a * 255.0), 0, 255).astype(np.uint8)
-    if erode_px > 0:
-        k = max(1, int(erode_px))
-        a_u8 = cv2.erode(a_u8, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (k, k)), iterations=1)
-    if dilate_px > 0:
-        k = max(1, int(dilate_px))
-        a_u8 = cv2.dilate(a_u8, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (k, k)), iterations=1)
-    a = a_u8.astype(np.float32) / 255.0
-    if blur_px and blur_px > 0:
-        rad = max(1, int(round(blur_px)))
-        a = cv2.GaussianBlur(a, (rad | 1, rad | 1), 0)
-    return np.clip(a, 0.0, 1.0)
-def _to_linear(rgb: np.ndarray, gamma: float = 2.2) -> np.ndarray:
-    x = np.clip(rgb.astype(np.float32) / 255.0, 0.0, 1.0)
-    return np.power(x, gamma)
-def _to_srgb(lin: np.ndarray, gamma: float = 2.2) -> np.ndarray:
-    x = np.clip(lin, 0.0, 1.0)
-    return np.clip(np.power(x, 1.0 / gamma) * 255.0, 0, 255).astype(np.uint8)
-def _light_wrap(bg_rgb: np.ndarray, alpha01: np.ndarray, radius: int = 5, amount: float = 0.18) -> np.ndarray:
-    """Simple light wrap from background into subject edges."""
-    r = max(1, int(radius))
-    inv = 1.0 - alpha01
-    inv_blur = cv2.GaussianBlur(inv, (r | 1, r | 1), 0)
-    lw = (bg_rgb.astype(np.float32) * inv_blur[..., None] * float(amount))
-    return lw
-def _despill_edges(fg_rgb: np.ndarray, alpha01: np.ndarray, amount: float = 0.35) -> np.ndarray:
-    """Reduce saturation in boundary band (alpha≈0.5) to remove old-background tint."""
-    w = 1.0 - 2.0 * np.abs(alpha01 - 0.5)  # bell-shaped weight
-    w = np.clip(w, 0.0, 1.0)
-    hsv = cv2.cvtColor(fg_rgb.astype(np.uint8), cv2.COLOR_RGB2HSV).astype(np.float32)
-    H, S, V = cv2.split(hsv)
-    S = S * (1.0 - amount * w)
-    hsv2 = cv2.merge([H, np.clip(S, 0, 255), V])
-    out = cv2.cvtColor(hsv2.astype(np.uint8), cv2.COLOR_HSV2RGB)
-    return out
-def _composite_frame_pro(fg_rgb: np.ndarray, alpha: np.ndarray, bg_rgb: np.ndarray,
-                         erode_px: int = None, dilate_px: int = None, blur_px: float = None,
-                         lw_radius: int = None, lw_amount: float = None,
-                         despill_amount: float = None) -> np.ndarray:
-    """Gamma-aware composite + edge refinement + light wrap + boundary de-spill."""
-    erode_px = erode_px if erode_px is not None else int(os.environ.get("EDGE_ERODE", "1"))
-    dilate_px = dilate_px if dilate_px is not None else int(os.environ.get("EDGE_DILATE", "2"))
-    blur_px   = blur_px   if blur_px   is not None else float(os.environ.get("EDGE_BLUR", "1.5"))
-    lw_radius = lw_radius if lw_radius is not None else int(os.environ.get("LIGHTWRAP_RADIUS", "5"))
-    lw_amount = lw_amount if lw_amount is not None else float(os.environ.get("LIGHTWRAP_AMOUNT", "0.18"))
-    despill_amount = despill_amount if despill_amount is not None else float(os.environ.get("DESPILL_AMOUNT", "0.35"))
-    # refine alpha [0,1]
-    a = _refine_alpha(alpha, erode_px=erode_px, dilate_px=dilate_px, blur_px=blur_px)
-    # edge de-spill: temper saturation where a≈0.5
-    fg_rgb = _despill_edges(fg_rgb, a, amount=despill_amount)
-    # linearize for better blending
-    fg_lin = _to_linear(fg_rgb)
-    bg_lin = _to_linear(bg_rgb)
-    # light wrap
-    lw = _light_wrap(bg_rgb, a, radius=lw_radius, amount=lw_amount)
-    lw_lin = _to_linear(np.clip(lw, 0, 255).astype(np.uint8))
-    comp_lin = fg_lin * a[..., None] + bg_lin * (1.0 - a[..., None]) + lw_lin
-    comp = _to_srgb(comp_lin)
-    return comp
-# --------------------------------------------------------------------------------------
-# SAM2 Integration
-# --------------------------------------------------------------------------------------
-def _resolve_sam2_cfg(cfg_str: str) -> str:
-    """Make the SAM2 config path absolute (prefer inside TP_SAM2)."""
-    cfg_path = Path(cfg_str)
-    if not cfg_path.is_absolute():
-        candidate = TP_SAM2 / cfg_path
-        if candidate.exists():
-            return str(candidate)
-    if cfg_path.exists():
-        return str(cfg_path)
-    # Last resort: common defaults inside the repo
-    for name in ["configs/sam2/sam2_hiera_l.yaml", "configs/sam2/sam2_hiera_b.yaml", "configs/sam2/sam2_hiera_s.yaml"]:
-        p = TP_SAM2 / name
-        if p.exists():
-            return str(p)
-    return str(cfg_str)  # let build_sam2 raise a clear error
-def _find_hiera_config_if_hieradet(cfg_path: str) -> Optional[str]:
-    """If config references 'hieradet', try to find a 'hiera' config."""
-    try:
-        with open(cfg_path, "r") as f:
-            data = yaml.safe_load(f)
-        target = None
-        model = data.get("model", {})
-        enc = (model.get("image_encoder") or {})
-        trunk = (enc.get("trunk") or {})
-        target = trunk.get("_target_") or trunk.get("target")
-        if isinstance(target, str) and "hieradet" in target:
-            for y in TP_SAM2.rglob("*.yaml"):
-                try:
-                    with open(y, "r") as f2:
-                        d2 = yaml.safe_load(f2)
-                    m2 = (d2 or {}).get("model", {})
-                    e2 = (m2.get("image_encoder") or {})
-                    t2 = (e2.get("trunk") or {})
-                    tgt2 = t2.get("_target_") or t2.get("target")
-                    if isinstance(tgt2, str) and ".hiera." in tgt2:
-                        logger.info(f"SAM2: switching config from 'hieradet' → 'hiera': {y}")
-                        return str(y)
-                except Exception:
-                    continue
-    except Exception:
-        pass
-    return None
-def load_sam2() -> Tuple[Optional[object], bool, Dict[str, Any]]:
-    """Robust SAM2 loader with config resolution and error handling."""
-    meta = {"sam2_import_ok": False, "sam2_init_ok": False}
-    try:
-        from sam2.build_sam import build_sam2             # type: ignore
-        from sam2.sam2_image_predictor import SAM2ImagePredictor  # type: ignore
-        meta["sam2_import_ok"] = True
-    except Exception as e:
-        logger.warning(f"SAM2 import failed: {e}")
-        return None, False, meta
-    device = _pick_device("SAM2_DEVICE")
-    cfg_env = os.environ.get("SAM2_MODEL_CFG", "configs/sam2/sam2_hiera_l.yaml")
-    cfg = _resolve_sam2_cfg(cfg_env)
-    ckpt = os.environ.get("SAM2_CHECKPOINT", "")
-    def _try_build(cfg_path: str):
-        params = set(inspect.signature(build_sam2).parameters.keys())
-        kwargs = {}
-        if "config_file" in params:
-            kwargs["config_file"] = cfg_path
-        elif "model_cfg" in params:
-            kwargs["model_cfg"] = cfg_path
-        if ckpt:
-            if "checkpoint" in params:
-                kwargs["checkpoint"] = ckpt
-            elif "ckpt_path" in params:
-                kwargs["ckpt_path"] = ckpt
-            elif "weights" in params:
-                kwargs["weights"] = ckpt
-        if "device" in params:
-            kwargs["device"] = device
-        try:
-            return build_sam2(**kwargs)
-        except TypeError:
-            pos = [cfg_path]
-            if ckpt:
-                pos.append(ckpt)
-            if "device" not in kwargs:
-                pos.append(device)
-            return build_sam2(*pos)
-    try:
-        try:
-            sam = _try_build(cfg)
-        except Exception as e1:
-            alt_cfg = _find_hiera_config_if_hieradet(cfg)
-            if alt_cfg:
-                logger.info(f"SAM2: retrying with alt config: {alt_cfg}")
-                sam = _try_build(alt_cfg)
-                cfg = alt_cfg
-            else:
-                raise
-        predictor = SAM2ImagePredictor(sam)
-        meta.update({
-            "sam2_init_ok": True,
-            "sam2_device": device,
-            "sam2_cfg": cfg,
-            "sam2_ckpt": ckpt or "(repo default)"
-        })
-        return predictor, True, meta
-    except Exception as e:
-        logger.error(f"SAM2 init failed: {e}")
-        return None, False, meta
-def run_sam2_mask(predictor: object,
-                  first_frame_bgr: np.ndarray,
-                  point: Optional[Tuple[int, int]] = None,
-                  auto: bool = False) -> Tuple[Optional[np.ndarray], bool]:
-    """Return (mask_uint8_0_255, ok)."""
-    if predictor is None:
-        return None, False
-    try:
-        rgb = cv2.cvtColor(first_frame_bgr, cv2.COLOR_BGR2RGB)
-        predictor.set_image(rgb)
-        if auto:
-            h, w = rgb.shape[:2]
-            box = np.array([int(0.05*w), int(0.05*h), int(0.95*w), int(0.95*h)])
-            masks, _, _ = predictor.predict(box=box)
-        elif point is not None:
-            x, y = int(point[0]), int(point[1])
-            pts = np.array([[x, y]], dtype=np.int32)
-            labels = np.array([1], dtype=np.int32)
-            masks, _, _ = predictor.predict(point_coords=pts, point_labels=labels)
-        else:
-            h, w = rgb.shape[:2]
-            box = np.array([int(0.1*w), int(0.1*h), int(0.9*w), int(0.9*h)])
-            masks, _, _ = predictor.predict(box=box)
-        if masks is None or len(masks) == 0:
-            return None, False
-        m = masks[0].astype(np.uint8) * 255
-        return m, True
-    except Exception as e:
-        logger.warning(f"SAM2 mask failed: {e}")
-        return None, False
-def _refine_mask_grabcut(image_bgr: np.ndarray,
-                         mask_u8: np.ndarray,
-                         iters: int = None,
-                         trimap_erode: int = None,
-                         trimap_dilate: int = None) -> np.ndarray:
-    """Use SAM2 seed as initialization for GrabCut refinement."""
-    iters = int(os.environ.get("REFINE_GRABCUT_ITERS", "2")) if iters is None else int(iters)
-    e = int(os.environ.get("REFINE_TRIMAP_ERODE", "3")) if trimap_erode is None else int(trimap_erode)
-    d = int(os.environ.get("REFINE_TRIMAP_DILATE", "6")) if trimap_dilate is None else int(trimap_dilate)
-    h, w = mask_u8.shape[:2]
-    m = (mask_u8 > 127).astype(np.uint8) * 255
-    sure_fg = cv2.erode(m, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (max(1, e), max(1, e))), iterations=1)
-    sure_bg = cv2.erode(255 - m, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (max(1, d), max(1, d))), iterations=1)
-    gc_mask = np.full((h, w), cv2.GC_PR_BGD, dtype=np.uint8)
-    gc_mask[sure_bg > 0] = cv2.GC_BGD
-    gc_mask[sure_fg > 0] = cv2.GC_FGD
-    bgdModel = np.zeros((1, 65), np.float64)
-    fgdModel = np.zeros((1, 65), np.float64)
-    try:
-        cv2.grabCut(image_bgr, gc_mask, None, bgdModel, fgdModel, iters, cv2.GC_INIT_WITH_MASK)
-        out = np.where((gc_mask == cv2.GC_FGD) | (gc_mask == cv2.GC_PR_FGD), 255, 0).astype(np.uint8)
-        out = cv2.medianBlur(out, 5)
-        return out
-    except Exception as e:
-        logger.warning(f"GrabCut refinement failed; using original mask. Reason: {e}")
-        return m
-# --------------------------------------------------------------------------------------
-# MatAnyone Integration
-# --------------------------------------------------------------------------------------
-def load_matany() -> Tuple[Optional[object], bool, Dict[str, Any]]:
-    """MatAnyone loader with disable switch and error handling."""
-    meta = {"matany_import_ok": False, "matany_init_ok": False}
-    enable_env = os.environ.get("ENABLE_MATANY", "1").strip().lower()
-    if enable_env in {"0", "false", "off", "no"}:
-        logger.info("MatAnyone disabled by ENABLE_MATANY=0.")
-        meta["disabled"] = True
-        return None, False, meta
-    try:
-        try:
-            from inference_core import InferenceCore  # type: ignore
-        except Exception:
-            from matanyone.inference.inference_core import InferenceCore  # type: ignore
-        meta["matany_import_ok"] = True
-    except Exception as e:
-        logger.warning(f"MatAnyone import failed: {e}")
-        return None, False, meta
-    device = _pick_device("MATANY_DEVICE")
-    repo_id = os.environ.get("MATANY_REPO_ID", "")
-    ckpt = os.environ.get("MATANY_CHECKPOINT", "")
-    # Check if this fork needs a prebuilt network
-    try:
-        sig = inspect.signature(InferenceCore)
-        if "network" in sig.parameters and sig.parameters["network"].default is inspect._empty:
-            logger.error(
-                "This MatAnyone fork expects `InferenceCore(network=...)`. "
-                "Pin a fork/commit that supplies a checkpoint-based constructor, "
-                "or set ENABLE_MATANY=0 to skip."
-            )
-            meta["needs_network_arg"] = True
-            return None, False, meta
-    except Exception:
-        pass
-    candidates = [
-        {"kwargs": {"repo_id": repo_id or None, "checkpoint": ckpt or None, "device": device}},
-        {"kwargs": {"checkpoint": ckpt or None, "device": device}},
-        {"args": (), "kwargs": {"device": device}},
-    ]
-    last_err = None
-    for cand in candidates:
-        try:
-            matany = InferenceCore(*cand.get("args", ()), **cand.get("kwargs", {}))
-            meta["matany_init_ok"] = True
-            meta["matany_device"] = device
-            meta["matany_repo_id"] = repo_id or "(unset)"
-            meta["matany_checkpoint"] = ckpt or "(unset)"
-            return matany, True, meta
-        except Exception as e:
-            last_err = e
-            continue
-    logger.error(f"MatAnyone init failed with all fallbacks: {last_err}")
-    return None, False, meta
-def run_matany(matany: object,
-               video_path: Union[str, Path],
-               first_mask_path: Union[str, Path],
-               work_dir: Union[str, Path]) -> Tuple[Optional[str], Optional[str], bool]:
-    """Return (foreground_video_path, alpha_video_path, ok)."""
-    if matany is None:
-        return None, None, False
-    try:
-        if hasattr(matany, "process_video"):
-            out = matany.process_video(input_path=str(video_path), mask_path=str(first_mask_path), output_dir=str(work_dir))
-            if isinstance(out, (list, tuple)) and len(out) >= 2:
-                return str(out[0]), str(out[1]), True
-            if isinstance(out, dict):
-                fg = out.get("foreground") or out.get("fg") or out.get("foreground_path")
-                al = out.get("alpha") or out.get("alpha_path")
-                if fg and al:
-                    return str(fg), str(al), True
-        if hasattr(matany, "run"):
-            out = matany.run(video_path=str(video_path), seed_mask=str(first_mask_path), out_dir=str(work_dir))
-            if isinstance(out, dict):
-                fg = out.get("foreground") or out.get("fg") or out.get("foreground_path")
-                al = out.get("alpha") or out.get("alpha_path")
-                if fg and al:
-                    return str(fg), str(al), True
-        logger.error("MatAnyone returned no usable paths.")
-        return None, None, False
-    except Exception as e:
-        logger.warning(f"MatAnyone processing failed: {e}")
-        return None, None, False
-# --------------------------------------------------------------------------------------
-# Fallback Functions
-# --------------------------------------------------------------------------------------
-def fallback_mask(first_frame_bgr: np.ndarray) -> np.ndarray:
-    """Prefer MediaPipe; fallback to GrabCut. Returns uint8 mask 0/255."""
-    h, w = first_frame_bgr.shape[:2]
-    if _HAS_MEDIAPIPE:
-        try:
-            mp_selfie = mp.solutions.selfie_segmentation
-            with mp_selfie.SelfieSegmentation(model_selection=1) as segmenter:
-                rgb = cv2.cvtColor(first_frame_bgr, cv2.COLOR_BGR2RGB)
-                res = segmenter.process(rgb)
-                m = (np.clip(res.segmentation_mask, 0, 1) > 0.5).astype(np.uint8) * 255
-                m = cv2.medianBlur(m, 5)
-                return m
-        except Exception as e:
-            logger.warning(f"MediaPipe fallback failed: {e}")
-    # Ultimate fallback: GrabCut
-    mask = np.zeros((h, w), np.uint8)
-    rect = (int(0.1*w), int(0.1*h), int(0.8*w), int(0.8*h))
-    bgdModel = np.zeros((1, 65), np.float64)
-    fgdModel = np.zeros((1, 65), np.float64)
-    try:
-        cv2.grabCut(first_frame_bgr, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
-        mask_bin = np.where((mask == cv2.GC_FGD) | (mask == cv2.GC_PR_FGD), 255, 0).astype(np.uint8)
-        return mask_bin
-    except Exception as e:
-        logger.warning(f"GrabCut failed: {e}")
-        return np.zeros((h, w), dtype=np.uint8)
-def composite_video(fg_path: Union[str, Path],
-                    alpha_path: Union[str, Path],
-                    bg_image_path: Union[str, Path],
-                    out_path: Union[str, Path],
-                    fps: int,
-                    size: Tuple[int, int]) -> bool:
-    """Blend MatAnyone FG+ALPHA over background using pro compositor."""
-    fg_cap = cv2.VideoCapture(str(fg_path))
-    al_cap = cv2.VideoCapture(str(alpha_path))
-    if not fg_cap.isOpened() or not al_cap.isOpened():
-        return False
-    w, h = size
-    bg = cv2.imread(str(bg_image_path), cv2.IMREAD_COLOR)
-    if bg is None:
-        bg = np.full((h, w, 3), 127, dtype=np.uint8)
-    bg_f = _resize_keep_ar(bg, (w, h))
-    if _probe_ffmpeg():
-        tmp_out = Path(str(out_path) + ".tmp.mp4")
-        writer = _video_writer(tmp_out, fps, (w, h))
-        post_h264 = True
-    else:
-        writer = _video_writer(Path(out_path), fps, (w, h))
-        post_h264 = False
-    ok_any = False
-    try:
-        while True:
-            ok_fg, fg = fg_cap.read()
-            ok_al, al = al_cap.read()
-            if not ok_fg or not ok_al:
-                break
-            fg = cv2.resize(fg, (w, h), interpolation=cv2.INTER_CUBIC)
-            al_gray = cv2.cvtColor(cv2.resize(al, (w, h)), cv2.COLOR_BGR2GRAY)
-            comp = _composite_frame_pro(
-                cv2.cvtColor(fg, cv2.COLOR_BGR2RGB),
-                al_gray,
-                cv2.cvtColor(bg_f, cv2.COLOR_BGR2RGB)
-            )
-            writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
-            ok_any = True
-    finally:
-        fg_cap.release()
-        al_cap.release()
-        writer.release()
-    if post_h264 and ok_any:
-        try:
-            cmd = [
-                _ffmpeg_bin(), "-y",
-                "-i", str(tmp_out),
-                "-c:v", "libx264", "-pix_fmt", "yuv420p", "-movflags", "+faststart",
-                str(out_path)
-            ]
-            subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-            tmp_out.unlink(missing_ok=True)
-        except Exception as e:
-            logger.warning(f"ffmpeg finalize failed: {e}")
-            Path(out_path).unlink(missing_ok=True)
-            tmp_out.replace(out_path)
-    return ok_any
-def fallback_composite(video_path: Union[str, Path],
-                       mask_path: Union[str, Path],
-                       bg_image_path: Union[str, Path],
-                       out_path: Union[str, Path]) -> bool:
-    """Static-mask compositing using pro compositor."""
-    mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)
-    cap = cv2.VideoCapture(str(video_path))
-    if mask is None or not cap.isOpened():
-        return False
-    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) or 0)
-    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or 0)
-    fps = int(round(cap.get(cv2.CAP_PROP_FPS) or 25))
-    bg = cv2.imread(str(bg_image_path), cv2.IMREAD_COLOR)
-    if bg is None:
-        bg = np.full((h, w, 3), 127, dtype=np.uint8)
-    mask_resized = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
-    bg_f = _resize_keep_ar(bg, (w, h))
-    if _probe_ffmpeg():
-        tmp_out = Path(str(out_path) + ".tmp.mp4")
-        writer = _video_writer(tmp_out, fps, (w, h))
-        use_post_ffmpeg = True
-    else:
-        writer = _video_writer(Path(out_path), fps, (w, h))
-        use_post_ffmpeg = False
-    ok_any = False
-    try:
-        while True:
-            ok, frame = cap.read()
-            if not ok:
-                break
-            comp = _composite_frame_pro(
-                cv2.cvtColor(frame, cv2.COLOR_BGR2RGB),
-                mask_resized,
-                cv2.cvtColor(bg_f, cv2.COLOR_BGR2RGB)
-            )
-            writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
-            ok_any = True
-    finally:
-        cap.release()
-        writer.release()
-    if use_post_ffmpeg and ok_any:
-        try:
-            cmd = [
-                _ffmpeg_bin(), "-y",
-                "-i", str(tmp_out),
-                "-c:v", "libx264", "-pix_fmt", "yuv420p", "-movflags", "+faststart",
-                str(out_path)
-            ]
-            subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-            tmp_out.unlink(missing_ok=True)
-        except Exception as e:
-            logger.warning(f"ffmpeg H.264 finalize failed: {e}")
-            Path(out_path).unlink(missing_ok=True)
-            tmp_out.replace(out_path)
-    return ok_any
-# --------------------------------------------------------------------------------------
-# Stage-A (Transparent Export) Functions
-# --------------------------------------------------------------------------------------
-def _checkerboard_bg(w: int, h: int, tile: int = 32) -> np.ndarray:
-    """RGB checkerboard for preview when no real alpha is possible."""
-    y, x = np.mgrid[0:h, 0:w]
-    c = ((x // tile) + (y // tile)) % 2
-    a = np.where(c == 0, 200, 150).astype(np.uint8)
-    return np.stack([a, a, a], axis=-1)
-def _build_stage_a_rgba_vp9_from_fg_alpha(
-    fg_path: Union[str, Path],
-    alpha_path: Union[str, Path],
-    out_webm: Union[str, Path],
-    fps: int,
-    size: Tuple[int, int],
-    src_audio: Optional[Union[str, Path]] = None,
-) -> bool:
-    """Merge FG+ALPHA → RGBA WebM (VP9 with alpha)."""
-    if not _probe_ffmpeg():
-        return False
-    w, h = size
-    try:
-        cmd = [_ffmpeg_bin(), "-y", "-i", str(fg_path), "-i", str(alpha_path)]
-        if src_audio:
-            cmd += ["-i", str(src_audio)]
-        fcx = f"[1:v]format=gray,scale={w}:{h},fps={fps}[al];" \
-              f"[0:v]scale={w}:{h},fps={fps}[fg];" \
-              f"[fg][al]alphamerge[outv]"
-        cmd += ["-filter_complex", fcx, "-map", "[outv]"]
-        if src_audio:
-            cmd += ["-map", "2:a:0?", "-c:a", "libopus", "-b:a", "128k"]
-        cmd += [
-            "-c:v", "libvpx-vp9", "-pix_fmt", "yuva420p",
-            "-crf", os.environ.get("STAGEA_VP9_CRF", "28"),
-            "-b:v", "0", "-row-mt", "1", "-shortest", str(out_webm),
-        ]
-        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        return True
-    except Exception as e:
-        logger.warning(f"Stage-A VP9(alpha) build failed: {e}")
-        return False
-def _build_stage_a_rgba_vp9_from_mask(
-    video_path: Union[str, Path],
-    mask_png: Union[str, Path],
-    out_webm: Union[str, Path],
-    fps: int,
-    size: Tuple[int, int],
-) -> bool:
-    """Merge original video + static mask → RGBA WebM (VP9 with alpha)."""
-    if not _probe_ffmpeg():
-        return False
-    w, h = size
-    try:
-        cmd = [
-            _ffmpeg_bin(), "-y",
-            "-i", str(video_path),
-            "-loop", "1", "-i", str(mask_png),
-            "-filter_complex",
-            f"[1:v]format=gray,scale={w}:{h},fps={fps}[al];"
-            f"[0:v]scale={w}:{h},fps={fps}[fg];"
-            f"[fg][al]alphamerge[outv]",
-            "-map", "[outv]",
-            "-c:v", "libvpx-vp9", "-pix_fmt", "yuva420p",
-            "-crf", os.environ.get("STAGEA_VP9_CRF", "28"),
-            "-b:v", "0", "-row-mt", "1", "-shortest", str(out_webm),
-        ]
-        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        return True
-    except Exception as e:
-        logger.warning(f"Stage-A VP9(alpha) (mask) build failed: {e}")
-        return False
-def _build_stage_a_checkerboard_from_fg_alpha(
-    fg_path: Union[str, Path],
-    alpha_path: Union[str, Path],
-    out_mp4: Union[str, Path],
-    fps: int,
-    size: Tuple[int, int],
-) -> bool:
-    """Preview: FG+ALPHA over checkerboard → MP4 (no real alpha)."""
-    fg_cap = cv2.VideoCapture(str(fg_path))
-    al_cap = cv2.VideoCapture(str(alpha_path))
-    if not fg_cap.isOpened() or not al_cap.isOpened():
-        return False
-    w, h = size
-    writer = _video_writer(Path(out_mp4), fps, (w, h))
-    bg = _checkerboard_bg(w, h)
-    ok_any = False
-    try:
-        while True:
-            okf, fg = fg_cap.read()
-            oka, al = al_cap.read()
-            if not okf or not oka:
-                break
-            fg = cv2.resize(fg, (w, h))
-            al = cv2.cvtColor(cv2.resize(al, (w, h)), cv2.COLOR_BGR2GRAY)
-            comp = _composite_frame_pro(cv2.cvtColor(fg, cv2.COLOR_BGR2RGB), al, bg)
-            writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
-            ok_any = True
-    finally:
-        fg_cap.release()
-        al_cap.release()
-        writer.release()
-    return ok_any
-def _build_stage_a_checkerboard_from_mask(
-    video_path: Union[str, Path],
-    mask_png: Union[str, Path],
-    out_mp4: Union[str, Path],
-    fps: int,
-    size: Tuple[int, int],
-) -> bool:
-    """Preview: original video + static mask over checkerboard → MP4."""
-    cap = cv2.VideoCapture(str(video_path))
-    if not cap.isOpened():
-        return False
-    w, h = size
-    mask = cv2.imread(str(mask_png), cv2.IMREAD_GRAYSCALE)
-    if mask is None:
-        return False
-    mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
-    writer = _video_writer(Path(out_mp4), fps, (w, h))
-    bg = _checkerboard_bg(w, h)
-    ok_any = False
-    try:
-        while True:
-            ok, frame = cap.read()
-            if not ok:
-                break
-            frame = cv2.resize(frame, (w, h))
-            comp = _composite_frame_pro(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), mask, bg)
-            writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
-            ok_any = True
-    finally:
-        cap.release()
-        writer.release()
-    return ok_any

models/__init__.py CHANGED Viewed

	@@ -0,0 +1,795 @@

+#!/usr/bin/env python3
+"""
+BackgroundFX Pro - Model Loading & Utilities
+===========================================
+Contains all model loading, inference functions, and utility functions
+moved from the main pipeline for better organization.
+"""
+from __future__ import annotations
+import os
+import sys
+import cv2
+import subprocess
+import inspect
+import logging
+from pathlib import Path
+from typing import Optional, Tuple, Dict, Any, Union
+import numpy as np
+import yaml
+import torch  # For memory management and CUDA operations
+import torch  # For memory management and CUDA operations
+# --------------------------------------------------------------------------------------
+# Logging
+# --------------------------------------------------------------------------------------
+logger = logging.getLogger("backgroundfx_pro")
+# --------------------------------------------------------------------------------------
+# Optional dependencies
+# --------------------------------------------------------------------------------------
+try:
+    import mediapipe as mp  # type: ignore
+    _HAS_MEDIAPIPE = True
+except Exception:
+    _HAS_MEDIAPIPE = False
+# --------------------------------------------------------------------------------------
+# Path setup for third_party repos
+# --------------------------------------------------------------------------------------
+ROOT = Path(__file__).resolve().parent
+TP_SAM2 = Path(os.environ.get("THIRD_PARTY_SAM2_DIR", ROOT / "third_party" / "sam2")).resolve()
+TP_MATANY = Path(os.environ.get("THIRD_PARTY_MATANY_DIR", ROOT / "third_party" / "matanyone")).resolve()
+def _add_sys_path(p: Path) -> None:
+    p_str = str(p)
+    if p_str not in sys.path:
+        sys.path.insert(0, p_str)
+_add_sys_path(TP_SAM2)
+_add_sys_path(TP_MATANY)
+# --------------------------------------------------------------------------------------
+# Basic Utilities
+# --------------------------------------------------------------------------------------
+def _ffmpeg_bin() -> str:
+    return os.environ.get("FFMPEG_BIN", "ffmpeg")
+def _probe_ffmpeg() -> bool:
+    try:
+        subprocess.run([_ffmpeg_bin(), "-version"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
+        return True
+    except Exception:
+        return False
+def _has_cuda() -> bool:
+    try:
+        import torch  # type: ignore
+        return torch.cuda.is_available()
+    except Exception:
+        return False
+def _pick_device(env_key: str) -> str:
+    requested = os.environ.get(env_key, "").strip().lower()
+    if requested in {"cuda", "cpu"}:
+        return requested
+    return "cuda" if _has_cuda() else "cpu"
+def _ensure_dir(p: Path) -> None:
+    p.mkdir(parents=True, exist_ok=True)
+def _cv_read_first_frame(video_path: Union[str, Path]) -> Tuple[Optional[np.ndarray], int, Tuple[int, int]]:
+    cap = cv2.VideoCapture(str(video_path))
+    if not cap.isOpened():
+        return None, 0, (0, 0)
+    fps = int(round(cap.get(cv2.CAP_PROP_FPS) or 25))
+    ok, frame = cap.read()
+    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) or 0)
+    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or 0)
+    cap.release()
+    if not ok:
+        return None, fps, (w, h)
+    return frame, fps, (w, h)
+def _save_mask_png(mask: np.ndarray, path: Union[str, Path]) -> str:
+    if mask.dtype == bool:
+        mask = (mask.astype(np.uint8) * 255)
+    elif mask.dtype != np.uint8:
+        mask = np.clip(mask, 0, 255).astype(np.uint8)
+    cv2.imwrite(str(path), mask)
+    return str(path)
+def _resize_keep_ar(image: np.ndarray, target_wh: Tuple[int, int]) -> np.ndarray:
+    tw, th = target_wh
+    h, w = image.shape[:2]
+    if h == 0 or w == 0 or tw == 0 or th == 0:
+        return image
+    scale = min(tw / w, th / h)
+    nw, nh = max(1, int(round(w * scale))), max(1, int(round(h * scale)))
+    resized = cv2.resize(image, (nw, nh), interpolation=cv2.INTER_CUBIC)
+    canvas = np.zeros((th, tw, 3), dtype=resized.dtype)
+    x0 = (tw - nw) // 2
+    y0 = (th - nh) // 2
+    canvas[y0:y0+nh, x0:x0+nw] = resized
+    return canvas
+def _video_writer(out_path: Path, fps: int, size: Tuple[int, int]) -> cv2.VideoWriter:
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    return cv2.VideoWriter(str(out_path), fourcc, max(1, fps), size)
+def _mux_audio(src_video: Union[str, Path], silent_video: Union[str, Path], out_path: Union[str, Path]) -> bool:
+    """Copy video from silent_video + audio from src_video into out_path (AAC)."""
+    try:
+        cmd = [
+            _ffmpeg_bin(), "-y",
+            "-i", str(silent_video),
+            "-i", str(src_video),
+            "-map", "0:v:0",
+            "-map", "1:a:0?",
+            "-c:v", "copy",
+            "-c:a", "aac", "-b:a", "192k",
+            "-shortest",
+            str(out_path)
+        ]
+        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        return True
+    except Exception as e:
+        logger.warning(f"Audio mux failed; returning silent video. Reason: {e}")
+        return False
+# --------------------------------------------------------------------------------------
+# Compositing & Image Processing
+# --------------------------------------------------------------------------------------
+def _refine_alpha(alpha: np.ndarray, erode_px: int = 1, dilate_px: int = 2, blur_px: float = 1.5) -> np.ndarray:
+    """Erode→dilate + gentle blur → float alpha in [0,1]."""
+    if alpha.dtype != np.float32:
+        a = alpha.astype(np.float32)
+        if a.max() > 1.0:
+            a = a / 255.0
+    else:
+        a = alpha.copy()
+    a_u8 = np.clip(np.round(a * 255.0), 0, 255).astype(np.uint8)
+    if erode_px > 0:
+        k = max(1, int(erode_px))
+        a_u8 = cv2.erode(a_u8, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (k, k)), iterations=1)
+    if dilate_px > 0:
+        k = max(1, int(dilate_px))
+        a_u8 = cv2.dilate(a_u8, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (k, k)), iterations=1)
+    a = a_u8.astype(np.float32) / 255.0
+    if blur_px and blur_px > 0:
+        rad = max(1, int(round(blur_px)))
+        a = cv2.GaussianBlur(a, (rad | 1, rad | 1), 0)
+    return np.clip(a, 0.0, 1.0)
+def _to_linear(rgb: np.ndarray, gamma: float = 2.2) -> np.ndarray:
+    x = np.clip(rgb.astype(np.float32) / 255.0, 0.0, 1.0)
+    return np.power(x, gamma)
+def _to_srgb(lin: np.ndarray, gamma: float = 2.2) -> np.ndarray:
+    x = np.clip(lin, 0.0, 1.0)
+    return np.clip(np.power(x, 1.0 / gamma) * 255.0, 0, 255).astype(np.uint8)
+def _light_wrap(bg_rgb: np.ndarray, alpha01: np.ndarray, radius: int = 5, amount: float = 0.18) -> np.ndarray:
+    """Simple light wrap from background into subject edges."""
+    r = max(1, int(radius))
+    inv = 1.0 - alpha01
+    inv_blur = cv2.GaussianBlur(inv, (r | 1, r | 1), 0)
+    lw = (bg_rgb.astype(np.float32) * inv_blur[..., None] * float(amount))
+    return lw
+def _despill_edges(fg_rgb: np.ndarray, alpha01: np.ndarray, amount: float = 0.35) -> np.ndarray:
+    """Reduce saturation in boundary band (alpha≈0.5) to remove old-background tint."""
+    w = 1.0 - 2.0 * np.abs(alpha01 - 0.5)  # bell-shaped weight
+    w = np.clip(w, 0.0, 1.0)
+    hsv = cv2.cvtColor(fg_rgb.astype(np.uint8), cv2.COLOR_RGB2HSV).astype(np.float32)
+    H, S, V = cv2.split(hsv)
+    S = S * (1.0 - amount * w)
+    hsv2 = cv2.merge([H, np.clip(S, 0, 255), V])
+    out = cv2.cvtColor(hsv2.astype(np.uint8), cv2.COLOR_HSV2RGB)
+    return out
+def _composite_frame_pro(fg_rgb: np.ndarray, alpha: np.ndarray, bg_rgb: np.ndarray,
+                         erode_px: int = None, dilate_px: int = None, blur_px: float = None,
+                         lw_radius: int = None, lw_amount: float = None,
+                         despill_amount: float = None) -> np.ndarray:
+    """Gamma-aware composite + edge refinement + light wrap + boundary de-spill."""
+    erode_px = erode_px if erode_px is not None else int(os.environ.get("EDGE_ERODE", "1"))
+    dilate_px = dilate_px if dilate_px is not None else int(os.environ.get("EDGE_DILATE", "2"))
+    blur_px   = blur_px   if blur_px   is not None else float(os.environ.get("EDGE_BLUR", "1.5"))
+    lw_radius = lw_radius if lw_radius is not None else int(os.environ.get("LIGHTWRAP_RADIUS", "5"))
+    lw_amount = lw_amount if lw_amount is not None else float(os.environ.get("LIGHTWRAP_AMOUNT", "0.18"))
+    despill_amount = despill_amount if despill_amount is not None else float(os.environ.get("DESPILL_AMOUNT", "0.35"))
+    # refine alpha [0,1]
+    a = _refine_alpha(alpha, erode_px=erode_px, dilate_px=dilate_px, blur_px=blur_px)
+    # edge de-spill: temper saturation where a≈0.5
+    fg_rgb = _despill_edges(fg_rgb, a, amount=despill_amount)
+    # linearize for better blending
+    fg_lin = _to_linear(fg_rgb)
+    bg_lin = _to_linear(bg_rgb)
+    # light wrap
+    lw = _light_wrap(bg_rgb, a, radius=lw_radius, amount=lw_amount)
+    lw_lin = _to_linear(np.clip(lw, 0, 255).astype(np.uint8))
+    comp_lin = fg_lin * a[..., None] + bg_lin * (1.0 - a[..., None]) + lw_lin
+    comp = _to_srgb(comp_lin)
+    return comp
+# --------------------------------------------------------------------------------------
+# SAM2 Integration
+# --------------------------------------------------------------------------------------
+def _resolve_sam2_cfg(cfg_str: str) -> str:
+    """Make the SAM2 config path absolute (prefer inside TP_SAM2)."""
+    cfg_path = Path(cfg_str)
+    if not cfg_path.is_absolute():
+        candidate = TP_SAM2 / cfg_path
+        if candidate.exists():
+            return str(candidate)
+    if cfg_path.exists():
+        return str(cfg_path)
+    # Last resort: common defaults inside the repo
+    for name in ["configs/sam2/sam2_hiera_l.yaml", "configs/sam2/sam2_hiera_b.yaml", "configs/sam2/sam2_hiera_s.yaml"]:
+        p = TP_SAM2 / name
+        if p.exists():
+            return str(p)
+    return str(cfg_str)  # let build_sam2 raise a clear error
+def _find_hiera_config_if_hieradet(cfg_path: str) -> Optional[str]:
+    """If config references 'hieradet', try to find a 'hiera' config."""
+    try:
+        with open(cfg_path, "r") as f:
+            data = yaml.safe_load(f)
+        target = None
+        model = data.get("model", {})
+        enc = (model.get("image_encoder") or {})
+        trunk = (enc.get("trunk") or {})
+        target = trunk.get("_target_") or trunk.get("target")
+        if isinstance(target, str) and "hieradet" in target:
+            for y in TP_SAM2.rglob("*.yaml"):
+                try:
+                    with open(y, "r") as f2:
+                        d2 = yaml.safe_load(f2)
+                    m2 = (d2 or {}).get("model", {})
+                    e2 = (m2.get("image_encoder") or {})
+                    t2 = (e2.get("trunk") or {})
+                    tgt2 = t2.get("_target_") or t2.get("target")
+                    if isinstance(tgt2, str) and ".hiera." in tgt2:
+                        logger.info(f"SAM2: switching config from 'hieradet' → 'hiera': {y}")
+                        return str(y)
+                except Exception:
+                    continue
+    except Exception:
+        pass
+    return None
+def load_sam2() -> Tuple[Optional[object], bool, Dict[str, Any]]:
+    """Robust SAM2 loader with config resolution and error handling."""
+    meta = {"sam2_import_ok": False, "sam2_init_ok": False}
+    try:
+        from sam2.build_sam import build_sam2             # type: ignore
+        from sam2.sam2_image_predictor import SAM2ImagePredictor  # type: ignore
+        meta["sam2_import_ok"] = True
+    except Exception as e:
+        logger.warning(f"SAM2 import failed: {e}")
+        return None, False, meta
+    device = _pick_device("SAM2_DEVICE")
+    cfg_env = os.environ.get("SAM2_MODEL_CFG", "configs/sam2/sam2_hiera_l.yaml")
+    cfg = _resolve_sam2_cfg(cfg_env)
+    ckpt = os.environ.get("SAM2_CHECKPOINT", "")
+    def _try_build(cfg_path: str):
+        params = set(inspect.signature(build_sam2).parameters.keys())
+        kwargs = {}
+        if "config_file" in params:
+            kwargs["config_file"] = cfg_path
+        elif "model_cfg" in params:
+            kwargs["model_cfg"] = cfg_path
+        if ckpt:
+            if "checkpoint" in params:
+                kwargs["checkpoint"] = ckpt
+            elif "ckpt_path" in params:
+                kwargs["ckpt_path"] = ckpt
+            elif "weights" in params:
+                kwargs["weights"] = ckpt
+        if "device" in params:
+            kwargs["device"] = device
+        try:
+            return build_sam2(**kwargs)
+        except TypeError:
+            pos = [cfg_path]
+            if ckpt:
+                pos.append(ckpt)
+            if "device" not in kwargs:
+                pos.append(device)
+            return build_sam2(*pos)
+    try:
+        try:
+            sam = _try_build(cfg)
+        except Exception as e1:
+            alt_cfg = _find_hiera_config_if_hieradet(cfg)
+            if alt_cfg:
+                logger.info(f"SAM2: retrying with alt config: {alt_cfg}")
+                sam = _try_build(alt_cfg)
+                cfg = alt_cfg
+            else:
+                raise
+        predictor = SAM2ImagePredictor(sam)
+        meta.update({
+            "sam2_init_ok": True,
+            "sam2_device": device,
+            "sam2_cfg": cfg,
+            "sam2_ckpt": ckpt or "(repo default)"
+        })
+        return predictor, True, meta
+    except Exception as e:
+        logger.error(f"SAM2 init failed: {e}")
+        return None, False, meta
+def run_sam2_mask(predictor: object,
+                  first_frame_bgr: np.ndarray,
+                  point: Optional[Tuple[int, int]] = None,
+                  auto: bool = False) -> Tuple[Optional[np.ndarray], bool]:
+    """Return (mask_uint8_0_255, ok)."""
+    if predictor is None:
+        return None, False
+    try:
+        rgb = cv2.cvtColor(first_frame_bgr, cv2.COLOR_BGR2RGB)
+        predictor.set_image(rgb)
+        if auto:
+            h, w = rgb.shape[:2]
+            box = np.array([int(0.05*w), int(0.05*h), int(0.95*w), int(0.95*h)])
+            masks, _, _ = predictor.predict(box=box)
+        elif point is not None:
+            x, y = int(point[0]), int(point[1])
+            pts = np.array([[x, y]], dtype=np.int32)
+            labels = np.array([1], dtype=np.int32)
+            masks, _, _ = predictor.predict(point_coords=pts, point_labels=labels)
+        else:
+            h, w = rgb.shape[:2]
+            box = np.array([int(0.1*w), int(0.1*h), int(0.9*w), int(0.9*h)])
+            masks, _, _ = predictor.predict(box=box)
+        if masks is None or len(masks) == 0:
+            return None, False
+        m = masks[0].astype(np.uint8) * 255
+        return m, True
+    except Exception as e:
+        logger.warning(f"SAM2 mask failed: {e}")
+        return None, False
+def _refine_mask_grabcut(image_bgr: np.ndarray,
+                         mask_u8: np.ndarray,
+                         iters: int = None,
+                         trimap_erode: int = None,
+                         trimap_dilate: int = None) -> np.ndarray:
+    """Use SAM2 seed as initialization for GrabCut refinement."""
+    iters = int(os.environ.get("REFINE_GRABCUT_ITERS", "2")) if iters is None else int(iters)
+    e = int(os.environ.get("REFINE_TRIMAP_ERODE", "3")) if trimap_erode is None else int(trimap_erode)
+    d = int(os.environ.get("REFINE_TRIMAP_DILATE", "6")) if trimap_dilate is None else int(trimap_dilate)
+    h, w = mask_u8.shape[:2]
+    m = (mask_u8 > 127).astype(np.uint8) * 255
+    sure_fg = cv2.erode(m, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (max(1, e), max(1, e))), iterations=1)
+    sure_bg = cv2.erode(255 - m, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (max(1, d), max(1, d))), iterations=1)
+    gc_mask = np.full((h, w), cv2.GC_PR_BGD, dtype=np.uint8)
+    gc_mask[sure_bg > 0] = cv2.GC_BGD
+    gc_mask[sure_fg > 0] = cv2.GC_FGD
+    bgdModel = np.zeros((1, 65), np.float64)
+    fgdModel = np.zeros((1, 65), np.float64)
+    try:
+        cv2.grabCut(image_bgr, gc_mask, None, bgdModel, fgdModel, iters, cv2.GC_INIT_WITH_MASK)
+        out = np.where((gc_mask == cv2.GC_FGD) | (gc_mask == cv2.GC_PR_FGD), 255, 0).astype(np.uint8)
+        out = cv2.medianBlur(out, 5)
+        return out
+    except Exception as e:
+        logger.warning(f"GrabCut refinement failed; using original mask. Reason: {e}")
+        return m
+# --------------------------------------------------------------------------------------
+# MatAnyone Integration
+# --------------------------------------------------------------------------------------
+def load_matany() -> Tuple[Optional[object], bool, Dict[str, Any]]:
+    """MatAnyone loader with disable switch and error handling."""
+    meta = {"matany_import_ok": False, "matany_init_ok": False}
+    enable_env = os.environ.get("ENABLE_MATANY", "1").strip().lower()
+    if enable_env in {"0", "false", "off", "no"}:
+        logger.info("MatAnyone disabled by ENABLE_MATANY=0.")
+        meta["disabled"] = True
+        return None, False, meta
+    try:
+        try:
+            from inference_core import InferenceCore  # type: ignore
+        except Exception:
+            from matanyone.inference.inference_core import InferenceCore  # type: ignore
+        meta["matany_import_ok"] = True
+    except Exception as e:
+        logger.warning(f"MatAnyone import failed: {e}")
+        return None, False, meta
+    device = _pick_device("MATANY_DEVICE")
+    repo_id = os.environ.get("MATANY_REPO_ID", "")
+    ckpt = os.environ.get("MATANY_CHECKPOINT", "")
+    # Check if this fork needs a prebuilt network
+    try:
+        sig = inspect.signature(InferenceCore)
+        if "network" in sig.parameters and sig.parameters["network"].default is inspect._empty:
+            logger.error(
+                "This MatAnyone fork expects `InferenceCore(network=...)`. "
+                "Pin a fork/commit that supplies a checkpoint-based constructor, "
+                "or set ENABLE_MATANY=0 to skip."
+            )
+            meta["needs_network_arg"] = True
+            return None, False, meta
+    except Exception:
+        pass
+    candidates = [
+        {"kwargs": {"repo_id": repo_id or None, "checkpoint": ckpt or None, "device": device}},
+        {"kwargs": {"checkpoint": ckpt or None, "device": device}},
+        {"args": (), "kwargs": {"device": device}},
+    ]
+    last_err = None
+    for cand in candidates:
+        try:
+            matany = InferenceCore(*cand.get("args", ()), **cand.get("kwargs", {}))
+            meta["matany_init_ok"] = True
+            meta["matany_device"] = device
+            meta["matany_repo_id"] = repo_id or "(unset)"
+            meta["matany_checkpoint"] = ckpt or "(unset)"
+            return matany, True, meta
+        except Exception as e:
+            last_err = e
+            continue
+    logger.error(f"MatAnyone init failed with all fallbacks: {last_err}")
+    return None, False, meta
+def run_matany(matany: object,
+               video_path: Union[str, Path],
+               first_mask_path: Union[str, Path],
+               work_dir: Union[str, Path]) -> Tuple[Optional[str], Optional[str], bool]:
+    """Return (foreground_video_path, alpha_video_path, ok)."""
+    if matany is None:
+        return None, None, False
+    try:
+        if hasattr(matany, "process_video"):
+            out = matany.process_video(input_path=str(video_path), mask_path=str(first_mask_path), output_dir=str(work_dir))
+            if isinstance(out, (list, tuple)) and len(out) >= 2:
+                return str(out[0]), str(out[1]), True
+            if isinstance(out, dict):
+                fg = out.get("foreground") or out.get("fg") or out.get("foreground_path")
+                al = out.get("alpha") or out.get("alpha_path")
+                if fg and al:
+                    return str(fg), str(al), True
+        if hasattr(matany, "run"):
+            out = matany.run(video_path=str(video_path), seed_mask=str(first_mask_path), out_dir=str(work_dir))
+            if isinstance(out, dict):
+                fg = out.get("foreground") or out.get("fg") or out.get("foreground_path")
+                al = out.get("alpha") or out.get("alpha_path")
+                if fg and al:
+                    return str(fg), str(al), True
+        logger.error("MatAnyone returned no usable paths.")
+        return None, None, False
+    except Exception as e:
+        logger.warning(f"MatAnyone processing failed: {e}")
+        return None, None, False
+# --------------------------------------------------------------------------------------
+# Fallback Functions
+# --------------------------------------------------------------------------------------
+def fallback_mask(first_frame_bgr: np.ndarray) -> np.ndarray:
+    """Prefer MediaPipe; fallback to GrabCut. Returns uint8 mask 0/255."""
+    h, w = first_frame_bgr.shape[:2]
+    if _HAS_MEDIAPIPE:
+        try:
+            mp_selfie = mp.solutions.selfie_segmentation
+            with mp_selfie.SelfieSegmentation(model_selection=1) as segmenter:
+                rgb = cv2.cvtColor(first_frame_bgr, cv2.COLOR_BGR2RGB)
+                res = segmenter.process(rgb)
+                m = (np.clip(res.segmentation_mask, 0, 1) > 0.5).astype(np.uint8) * 255
+                m = cv2.medianBlur(m, 5)
+                return m
+        except Exception as e:
+            logger.warning(f"MediaPipe fallback failed: {e}")
+    # Ultimate fallback: GrabCut
+    mask = np.zeros((h, w), np.uint8)
+    rect = (int(0.1*w), int(0.1*h), int(0.8*w), int(0.8*h))
+    bgdModel = np.zeros((1, 65), np.float64)
+    fgdModel = np.zeros((1, 65), np.float64)
+    try:
+        cv2.grabCut(first_frame_bgr, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
+        mask_bin = np.where((mask == cv2.GC_FGD) | (mask == cv2.GC_PR_FGD), 255, 0).astype(np.uint8)
+        return mask_bin
+    except Exception as e:
+        logger.warning(f"GrabCut failed: {e}")
+        return np.zeros((h, w), dtype=np.uint8)
+def composite_video(fg_path: Union[str, Path],
+                    alpha_path: Union[str, Path],
+                    bg_image_path: Union[str, Path],
+                    out_path: Union[str, Path],
+                    fps: int,
+                    size: Tuple[int, int]) -> bool:
+    """Blend MatAnyone FG+ALPHA over background using pro compositor."""
+    fg_cap = cv2.VideoCapture(str(fg_path))
+    al_cap = cv2.VideoCapture(str(alpha_path))
+    if not fg_cap.isOpened() or not al_cap.isOpened():
+        return False
+    w, h = size
+    bg = cv2.imread(str(bg_image_path), cv2.IMREAD_COLOR)
+    if bg is None:
+        bg = np.full((h, w, 3), 127, dtype=np.uint8)
+    bg_f = _resize_keep_ar(bg, (w, h))
+    if _probe_ffmpeg():
+        tmp_out = Path(str(out_path) + ".tmp.mp4")
+        writer = _video_writer(tmp_out, fps, (w, h))
+        post_h264 = True
+    else:
+        writer = _video_writer(Path(out_path), fps, (w, h))
+        post_h264 = False
+    ok_any = False
+    try:
+        while True:
+            ok_fg, fg = fg_cap.read()
+            ok_al, al = al_cap.read()
+            if not ok_fg or not ok_al:
+                break
+            fg = cv2.resize(fg, (w, h), interpolation=cv2.INTER_CUBIC)
+            al_gray = cv2.cvtColor(cv2.resize(al, (w, h)), cv2.COLOR_BGR2GRAY)
+            comp = _composite_frame_pro(
+                cv2.cvtColor(fg, cv2.COLOR_BGR2RGB),
+                al_gray,
+                cv2.cvtColor(bg_f, cv2.COLOR_BGR2RGB)
+            )
+            writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
+            ok_any = True
+    finally:
+        fg_cap.release()
+        al_cap.release()
+        writer.release()
+    if post_h264 and ok_any:
+        try:
+            cmd = [
+                _ffmpeg_bin(), "-y",
+                "-i", str(tmp_out),
+                "-c:v", "libx264", "-pix_fmt", "yuv420p", "-movflags", "+faststart",
+                str(out_path)
+            ]
+            subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            tmp_out.unlink(missing_ok=True)
+        except Exception as e:
+            logger.warning(f"ffmpeg finalize failed: {e}")
+            Path(out_path).unlink(missing_ok=True)
+            tmp_out.replace(out_path)
+    return ok_any
+def fallback_composite(video_path: Union[str, Path],
+                       mask_path: Union[str, Path],
+                       bg_image_path: Union[str, Path],
+                       out_path: Union[str, Path]) -> bool:
+    """Static-mask compositing using pro compositor."""
+    mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)
+    cap = cv2.VideoCapture(str(video_path))
+    if mask is None or not cap.isOpened():
+        return False
+    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) or 0)
+    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or 0)
+    fps = int(round(cap.get(cv2.CAP_PROP_FPS) or 25))
+    bg = cv2.imread(str(bg_image_path), cv2.IMREAD_COLOR)
+    if bg is None:
+        bg = np.full((h, w, 3), 127, dtype=np.uint8)
+    mask_resized = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
+    bg_f = _resize_keep_ar(bg, (w, h))
+    if _probe_ffmpeg():
+        tmp_out = Path(str(out_path) + ".tmp.mp4")
+        writer = _video_writer(tmp_out, fps, (w, h))
+        use_post_ffmpeg = True
+    else:
+        writer = _video_writer(Path(out_path), fps, (w, h))
+        use_post_ffmpeg = False
+    ok_any = False
+    try:
+        while True:
+            ok, frame = cap.read()
+            if not ok:
+                break
+            comp = _composite_frame_pro(
+                cv2.cvtColor(frame, cv2.COLOR_BGR2RGB),
+                mask_resized,
+                cv2.cvtColor(bg_f, cv2.COLOR_BGR2RGB)
+            )
+            writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
+            ok_any = True
+    finally:
+        cap.release()
+        writer.release()
+    if use_post_ffmpeg and ok_any:
+        try:
+            cmd = [
+                _ffmpeg_bin(), "-y",
+                "-i", str(tmp_out),
+                "-c:v", "libx264", "-pix_fmt", "yuv420p", "-movflags", "+faststart",
+                str(out_path)
+            ]
+            subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            tmp_out.unlink(missing_ok=True)
+        except Exception as e:
+            logger.warning(f"ffmpeg H.264 finalize failed: {e}")
+            Path(out_path).unlink(missing_ok=True)
+            tmp_out.replace(out_path)
+    return ok_any
+# --------------------------------------------------------------------------------------
+# Stage-A (Transparent Export) Functions
+# --------------------------------------------------------------------------------------
+def _checkerboard_bg(w: int, h: int, tile: int = 32) -> np.ndarray:
+    """RGB checkerboard for preview when no real alpha is possible."""
+    y, x = np.mgrid[0:h, 0:w]
+    c = ((x // tile) + (y // tile)) % 2
+    a = np.where(c == 0, 200, 150).astype(np.uint8)
+    return np.stack([a, a, a], axis=-1)
+def _build_stage_a_rgba_vp9_from_fg_alpha(
+    fg_path: Union[str, Path],
+    alpha_path: Union[str, Path],
+    out_webm: Union[str, Path],
+    fps: int,
+    size: Tuple[int, int],
+    src_audio: Optional[Union[str, Path]] = None,
+) -> bool:
+    """Merge FG+ALPHA → RGBA WebM (VP9 with alpha)."""
+    if not _probe_ffmpeg():
+        return False
+    w, h = size
+    try:
+        cmd = [_ffmpeg_bin(), "-y", "-i", str(fg_path), "-i", str(alpha_path)]
+        if src_audio:
+            cmd += ["-i", str(src_audio)]
+        fcx = f"[1:v]format=gray,scale={w}:{h},fps={fps}[al];" \
+              f"[0:v]scale={w}:{h},fps={fps}[fg];" \
+              f"[fg][al]alphamerge[outv]"
+        cmd += ["-filter_complex", fcx, "-map", "[outv]"]
+        if src_audio:
+            cmd += ["-map", "2:a:0?", "-c:a", "libopus", "-b:a", "128k"]
+        cmd += [
+            "-c:v", "libvpx-vp9", "-pix_fmt", "yuva420p",
+            "-crf", os.environ.get("STAGEA_VP9_CRF", "28"),
+            "-b:v", "0", "-row-mt", "1", "-shortest", str(out_webm),
+        ]
+        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        return True
+    except Exception as e:
+        logger.warning(f"Stage-A VP9(alpha) build failed: {e}")
+        return False
+def _build_stage_a_rgba_vp9_from_mask(
+    video_path: Union[str, Path],
+    mask_png: Union[str, Path],
+    out_webm: Union[str, Path],
+    fps: int,
+    size: Tuple[int, int],
+) -> bool:
+    """Merge original video + static mask → RGBA WebM (VP9 with alpha)."""
+    if not _probe_ffmpeg():
+        return False
+    w, h = size
+    try:
+        cmd = [
+            _ffmpeg_bin(), "-y",
+            "-i", str(video_path),
+            "-loop", "1", "-i", str(mask_png),
+            "-filter_complex",
+            f"[1:v]format=gray,scale={w}:{h},fps={fps}[al];"
+            f"[0:v]scale={w}:{h},fps={fps}[fg];"
+            f"[fg][al]alphamerge[outv]",
+            "-map", "[outv]",
+            "-c:v", "libvpx-vp9", "-pix_fmt", "yuva420p",
+            "-crf", os.environ.get("STAGEA_VP9_CRF", "28"),
+            "-b:v", "0", "-row-mt", "1", "-shortest", str(out_webm),
+        ]
+        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        return True
+    except Exception as e:
+        logger.warning(f"Stage-A VP9(alpha) (mask) build failed: {e}")
+        return False
+def _build_stage_a_checkerboard_from_fg_alpha(
+    fg_path: Union[str, Path],
+    alpha_path: Union[str, Path],
+    out_mp4: Union[str, Path],
+    fps: int,
+    size: Tuple[int, int],
+) -> bool:
+    """Preview: FG+ALPHA over checkerboard → MP4 (no real alpha)."""
+    fg_cap = cv2.VideoCapture(str(fg_path))
+    al_cap = cv2.VideoCapture(str(alpha_path))
+    if not fg_cap.isOpened() or not al_cap.isOpened():
+        return False
+    w, h = size
+    writer = _video_writer(Path(out_mp4), fps, (w, h))
+    bg = _checkerboard_bg(w, h)
+    ok_any = False
+    try:
+        while True:
+            okf, fg = fg_cap.read()
+            oka, al = al_cap.read()
+            if not okf or not oka:
+                break
+            fg = cv2.resize(fg, (w, h))
+            al = cv2.cvtColor(cv2.resize(al, (w, h)), cv2.COLOR_BGR2GRAY)
+            comp = _composite_frame_pro(cv2.cvtColor(fg, cv2.COLOR_BGR2RGB), al, bg)
+            writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
+            ok_any = True
+    finally:
+        fg_cap.release()
+        al_cap.release()
+        writer.release()
+    return ok_any
+def _build_stage_a_checkerboard_from_mask(
+    video_path: Union[str, Path],
+    mask_png: Union[str, Path],
+    out_mp4: Union[str, Path],
+    fps: int,
+    size: Tuple[int, int],
+) -> bool:
+    """Preview: original video + static mask over checkerboard → MP4."""
+    cap = cv2.VideoCapture(str(video_path))
+    if not cap.isOpened():
+        return False
+    w, h = size
+    mask = cv2.imread(str(mask_png), cv2.IMREAD_GRAYSCALE)
+    if mask is None:
+        return False
+    mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
+    writer = _video_writer(Path(out_mp4), fps, (w, h))
+    bg = _checkerboard_bg(w, h)
+    ok_any = False
+    try:
+        while True:
+            ok, frame = cap.read()
+            if not ok:
+                break
+            frame = cv2.resize(frame, (w, h))
+            comp = _composite_frame_pro(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), mask, bg)
+            writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
+            ok_any = True
+    finally:
+        cap.release()
+        writer.release()
+    return ok_any