Spaces:

MogensR
/

VideoBackgroundReplacer2

Paused

App Files Files Community

MogensR commited on Sep 14, 2025

Commit

c137c1a

1 Parent(s): 2769fce

lunch done

Browse files

Files changed (1) hide show

models/matanyone_loader.py +106 -60

models/matanyone_loader.py CHANGED Viewed

@@ -1,72 +1,118 @@
-# models/matanyone_loader.py
-import os, logging, torch, gc
-import numpy as np
-from typing import Optional, Tuple
-log = logging.getLogger("matany_loader")
-def _import_inference_core():
-    try:
-        # Check the actual import path from pq-yang/MatAnyone repo
-        from matanyone.inference_core import InferenceCore
-        return InferenceCore
-    except Exception as e:
-        log.error("MatAnyone import failed (vendoring/repo path?): %s", e)
-        return None
-def _to_chw01(img):
-    # img: HWC uint8 or float01 -> CHW float01
-    if img.dtype != np.float32:
-        img = img.astype("float32")/255.0
-    return np.transpose(img, (2,0,1))
-def _to_1hw01(mask):
-    # mask: HxW [0,1]
-    m = mask.astype("float32")
-    return m[None, ...]
 class MatAnyoneSession:
-    def __init__(self, device: torch.device, precision: str = "fp16"):
-        self.device = device
-        self.precision = precision
         self.core = None
-    def load(self, ckpt_path: Optional[str] = None, repo_id: Optional[str] = None, filename: Optional[str] = None):
-        InferenceCore = _import_inference_core()
-        if InferenceCore is None:
-            raise RuntimeError("MatAnyone not importable")
-        if ckpt_path is None and repo_id and filename:
-            from huggingface_hub import hf_hub_download
-            ckpt_path = hf_hub_download(repo_id=repo_id, filename=filename, local_dir=os.environ.get("HF_HOME"))
-        # init model
-        self.core = InferenceCore(ckpt_path, device=str(self.device))
-        return self
-    @torch.inference_mode()
-    def step(self, image_rgb, seed_mask: Optional[np.ndarray]=None):
-        """
-        image_rgb: HxWx3 uint8/float01
-        seed_mask: HxW float01 for first frame, else None
-        returns alpha HxW float01
-        """
-        assert self.core is not None, "MatAnyone not loaded"
-        img = _to_chw01(image_rgb)    # CHW
-        if seed_mask is not None:
-            mask = _to_1hw01(seed_mask)  # 1HW
-            alpha = self.core.step(img, mask)
-        else:
-            alpha = self.core.step(img, None)
-        # ensure HxW
-        if isinstance(alpha, np.ndarray):
-            return alpha.astype("float32")
-        if torch.is_tensor(alpha):
-            return alpha.detach().float().cpu().numpy()
-        raise RuntimeError("MatAnyone returned unknown alpha type")
-    def reset(self):
-        if self.core and hasattr(self.core, "reset"):
-            self.core.reset()
-        torch.cuda.empty_cache()
-        gc.collect()

+#!/usr/bin/env python3
+"""
+MatAnyone Loader (compact)
+- Uses top-level wrapper: `from matanyone import InferenceCore`
+- Constructor takes a model/repo id string (e.g. "PeiqingYang/MatAnyone")
+- Normalizes inputs: image -> CHW float32 [0,1], mask -> 1HW float32 [0,1]
+"""
+from __future__ import annotations
+import os, logging, time
+from typing import Iterable, Optional
+import numpy as np
+import torch
+logger = logging.getLogger("backgroundfx_pro")
+# ---------- tiny helpers ----------
+def _to_chw_float01(x: np.ndarray | torch.Tensor) -> torch.Tensor:
+    if isinstance(x, np.ndarray):
+        t = torch.from_numpy(x)
+    else:
+        t = x
+    if t.ndim == 3 and t.shape[-1] in (1, 3, 4):  # HWC
+        t = t.permute(2, 0, 1)                     # -> CHW
+    elif t.ndim == 2:                              # HW -> 1HW
+        t = t.unsqueeze(0)
+    elif t.ndim != 3:
+        raise ValueError(f"image: bad shape {tuple(t.shape)}")
+    t = t.contiguous().to(torch.float32)
+    with torch.no_grad():
+        if t.numel() and (torch.nanmax(t) > 1.0 or torch.nanmin(t) < 0.0):
+            t = t / 255.0
+        t.clamp_(0.0, 1.0)
+    return t
+def _to_1hw_float01(m: np.ndarray | torch.Tensor) -> torch.Tensor:
+    if isinstance(m, np.ndarray):
+        t = torch.from_numpy(m)
+    else:
+        t = m
+    if t.ndim == 2:              # HW
+        t = t.unsqueeze(0)       # -> 1HW
+    elif t.ndim == 3:
+        if t.shape[0] in (1, 3): # CHW
+            t = t[:1, ...]
+        elif t.shape[-1] in (1, 3):  # HWC
+            t = t[..., 0]
+            t = t.unsqueeze(0)
+        else:
+            raise ValueError(f"mask: bad shape {tuple(t.shape)}")
+    else:
+        raise ValueError(f"mask: bad shape {tuple(t.shape)}")
+    t = t.contiguous().to(torch.float32)
+    with torch.no_grad():
+        if t.numel() and (torch.nanmax(t) > 1.0 or torch.nanmin(t) < 0.0):
+            t = t / 255.0
+        t.clamp_(0.0, 1.0)
+    return t
+# ---------- session ----------
 class MatAnyoneSession:
+    def __init__(self, device: Optional[str] = None, repo_id: Optional[str] = None) -> None:
+        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        self.repo_id = repo_id or os.getenv("MATANY_REPO_ID", "PeiqingYang/MatAnyone")
         self.core = None
+        self.loaded = False
+    def load(self) -> bool:
+        t0 = time.time()
+        try:
+            # ✅ top-level wrapper (accepts model/repo id string)
+            from matanyone import InferenceCore
+            logger.info("[MatA] init: repo_id=%s device=%s", self.repo_id, self.device)
+            self.core = InferenceCore(self.repo_id)
+            self.loaded = True
+            logger.info("[MatA] init OK (%.2fs)", time.time() - t0)
+            return True
+        except TypeError as e:
+            logger.error("MatAnyone constructor mismatch: %s (fork expects network=...)", e)
+        except Exception as e:
+            logger.error("MatAnyone init error: %s", e)
+        self.loaded = False
+        return False
+    def step(self, image: np.ndarray | torch.Tensor, seed_mask: np.ndarray | torch.Tensor) -> np.ndarray:
+        if not self.loaded or self.core is None:
+            raise RuntimeError("MatAnyone not loaded")
+        img = _to_chw_float01(image).to(self.device, non_blocking=True)
+        msk = _to_1hw_float01(seed_mask).to(self.device, non_blocking=True)
+        out = self.core.step(img, msk)
+        alpha = out[0] if isinstance(out, (tuple, list)) else out
+        if not isinstance(alpha, torch.Tensor):
+            alpha = torch.as_tensor(alpha)
+        if alpha.ndim == 3 and alpha.shape[0] == 1:
+            alpha = alpha[0]
+        if alpha.ndim != 2:
+            raise ValueError(f"alpha: bad shape {tuple(alpha.shape)}")
+        return alpha.detach().to("cpu", torch.float32).clamp_(0.0, 1.0).contiguous().numpy()
+    def process_video(self, frames: Iterable[np.ndarray | torch.Tensor], seed_mask_hw, every: int = 50):
+        for i, f in enumerate(frames, 1):
+            yield self.step(f, seed_mask_hw)
+            if every and (i % every == 0):
+                logger.info("[MatA] processed %d frames", i)
+    def close(self) -> None:
+        self.core = None
+        self.loaded = False
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+# ---------- factory ----------
+def get_matanyone_session(enable: bool = True) -> Optional[MatAnyoneSession]:
+    if not enable:
+        logger.info("[MatA] disabled.")
+        return None
+    s = MatAnyoneSession()
+    return s if s.load() else None