Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 28, 2025

Commit

c30a2cc

1 Parent(s): 47f3540

Update models/loaders/sam2_loader.py

Browse files

Files changed (1) hide show

models/loaders/sam2_loader.py +96 -184

models/loaders/sam2_loader.py CHANGED Viewed

@@ -1,12 +1,6 @@
 #!/usr/bin/env python3
 """
-SAM2 Loader + Guarded Predictor Adapter (VRAM-friendly, shape-safe)
-- Loads a SAM2 image predictor on the desired device.
-- set_image(): accepts RGB/BGR, uint8/float; optional model-only downscale to save VRAM.
-- predict(): forwards prompts, upsamples masks back to original size, normalizes outputs.
-- Uses torch.inference_mode + optional autocast on CUDA.
-- Returns shapes compatible with utils.cv_processing.segment_person_hq logic.
 """
 from __future__ import annotations
@@ -20,12 +14,10 @@
 import numpy as np
 import torch
 import cv2
 logger = logging.getLogger(__name__)
-# -------------------------- helpers --------------------------
 def _select_device(pref: str) -> str:
     pref = (pref or "").lower()
     if pref.startswith("cuda"):
@@ -34,21 +26,12 @@ def _select_device(pref: str) -> str:
         return "cpu"
     return "cuda" if torch.cuda.is_available() else "cpu"
 def _ensure_rgb_uint8(img: np.ndarray, force_bgr_to_rgb: bool = False) -> np.ndarray:
-    """
-    Accept BGR/RGB, 3ch/4ch, uint8/float; return RGB uint8 [H,W,3].
-    We DO NOT blindly swap channels; cv_processing already feeds RGB.
-    Set force_bgr_to_rgb=True only if you know inputs are BGR.
-    """
     if img is None:
         raise ValueError("set_image received None image")
     arr = np.asarray(img)
     if arr.ndim != 3 or arr.shape[2] < 3:
         raise ValueError(f"Expected HxWxC image with C>=3, got shape={arr.shape}")
-    # If float, clamp + scale to uint8
     if np.issubdtype(arr.dtype, np.floating):
         arr = np.clip(arr, 0.0, 1.0)
         arr = (arr * 255.0 + 0.5).astype(np.uint8)
@@ -57,17 +40,12 @@ def _ensure_rgb_uint8(img: np.ndarray, force_bgr_to_rgb: bool = False) -> np.nda
             arr = (arr / 257).astype(np.uint8)
         else:
             arr = arr.astype(np.uint8)
-    # If 4-channel, drop alpha
     if arr.shape[2] == 4:
         arr = arr[:, :, :3]
     if force_bgr_to_rgb:
         arr = cv2.cvtColor(arr, cv2.COLOR_BGR2RGB)
     return arr
 def _compute_scaled_size(h: int, w: int, max_edge: int, target_pixels: int) -> Tuple[int, int, float]:
     if h <= 0 or w <= 0:
         return h, w, 1.0
@@ -78,17 +56,12 @@ def _compute_scaled_size(h: int, w: int, max_edge: int, target_pixels: int) -> T
     nw = max(1, int(round(w * s)))
     return nh, nw, s
 def _ladder(nh: int, nw: int) -> List[Tuple[int, int]]:
-    """
-    Progressive smaller sizes for OOM fallback.
-    """
     sizes = [(nh, nw)]
     sizes.append((max(1, int(nh * 0.85)), max(1, int(nw * 0.85))))
     sizes.append((max(1, int(nh * 0.70)), max(1, int(nw * 0.70))))
     sizes.append((max(1, int(nh * 0.50)), max(1, int(nw * 0.50))))
     sizes.append((max(1, int(nh * 0.35)), max(1, int(nw * 0.35))))
-    # de-duplicate and keep order
     uniq = []
     seen = set()
     for s in sizes:
@@ -96,11 +69,7 @@ def _ladder(nh: int, nw: int) -> List[Tuple[int, int]]:
             uniq.append(s); seen.add(s)
     return uniq
 def _upsample_stack(masks: np.ndarray, out_hw: Tuple[int, int]) -> np.ndarray:
-    """
-    masks: (N,h,w) float → bilinear → (N,H,W) float [0..1]
-    """
     if masks.ndim != 3:
         masks = np.asarray(masks)
         if masks.ndim == 2:
@@ -108,7 +77,6 @@ def _upsample_stack(masks: np.ndarray, out_hw: Tuple[int, int]) -> np.ndarray:
         elif masks.ndim == 4 and masks.shape[1] == 1:
             masks = masks[:, 0, :, :]
         else:
-            # try to squeeze to N,H,W
             masks = np.squeeze(masks)
             if masks.ndim == 2:
                 masks = masks[None, ...]
@@ -121,14 +89,12 @@ def _upsample_stack(masks: np.ndarray, out_hw: Tuple[int, int]) -> np.ndarray:
         out[i] = cv2.resize(masks[i].astype(np.float32), (W, H), interpolation=cv2.INTER_LINEAR)
     return np.clip(out, 0.0, 1.0)
 def _normalize_masks_dtype(x: np.ndarray) -> np.ndarray:
     x = np.asarray(x)
     if x.dtype == np.uint8:
         return (x.astype(np.float32) / 255.0)
     return x.astype(np.float32, copy=False)
 # -------------------------- adapter --------------------------
 class _SAM2Adapter:
@@ -138,22 +104,16 @@ class _SAM2Adapter:
       - model-only downscale on set_image
       - OOM-aware predict with retry at smaller sizes
       - upsample masks back to original size
     """
     def __init__(self, predictor, device: str):
         self.pred = predictor
         self.device = device
-        # original image size (for upsample)
         self.orig_hw: Tuple[int, int] = (0, 0)
-        # env tunables
         self.max_edge = int(os.environ.get("SAM2_MAX_EDGE", "1024"))
         self.target_pixels = int(os.environ.get("SAM2_TARGET_PIXELS", "900000"))
         self.force_bgr_to_rgb = os.environ.get("SAM2_ASSUME_BGR", "0") == "1"
-        # precision
         self.use_autocast = (device == "cuda")
-        # prefer bf16 if available, else fp16; it's only a hint for the internal ops
         self.autocast_dtype = None
         if self.use_autocast:
             try:
@@ -164,138 +124,103 @@ def __init__(self, predictor, device: str):
                     self.autocast_dtype = torch.float16 if cc[0] >= 7 else None
             except Exception:
                 self.autocast_dtype = None
-        # cached current working image (RGB uint8) and its size
         self._current_rgb: Optional[np.ndarray] = None
         self._current_hw: Tuple[int, int] = (0, 0)
-    # --- API mirror ---
     def set_image(self, image: np.ndarray):
-        """
-        Accept RGB or BGR, uint8 or float, any resolution.
-        Model-only downscale; keep orig H,W for upsample later.
-        """
-        rgb = _ensure_rgb_uint8(image, force_bgr_to_rgb=self.force_bgr_to_rgb)
-        H, W = rgb.shape[:2]
-        self.orig_hw = (H, W)
-        nh, nw, s = _compute_scaled_size(H, W, self.max_edge, self.target_pixels)
-        if s < 1.0:
-            work = cv2.resize(rgb, (nw, nh), interpolation=cv2.INTER_AREA)
-            self._current_rgb = work
-            self._current_hw = (nh, nw)
-        else:
-            self._current_rgb = rgb
-            self._current_hw = (H, W)
-        # prime embeddings on predictor
-        self.pred.set_image(self._current_rgb)
     def predict(self, **kwargs) -> Dict[str, Any]:
-        """
-        Forwards prompts to underlying predictor; retries smaller if OOM.
-        Always returns:
-          {"masks": (N,H,W) float32 [0..1], "scores": (N,), "logits": optional}
-        where (H,W) are the ORIGINAL image size provided to set_image().
-        """
-        if self._current_rgb is None or self.orig_hw == (0, 0):
-            raise RuntimeError("SAM2Adapter.predict called before set_image()")
-        H, W = self.orig_hw
-        nh, nw = self._current_hw
-        sizes = _ladder(nh, nw)
-        last_exc: Optional[BaseException] = None
-        for (th, tw) in sizes:
-            try:
-                # if we need a smaller embedding, rebuild set_image()
-                if (th, tw) != (nh, nw):
-                    small = cv2.resize(self._current_rgb, (tw, th), interpolation=cv2.INTER_AREA)
-                    self.pred.set_image(small)
-                # inference guard
-                class _NoOp:
-                    def __enter__(self): return None
-                    def __exit__(self, *a): return False
-                amp_ctx = _NoOp()
-                if self.use_autocast and self.autocast_dtype is not None:
-                    amp_ctx = torch.cuda.amp.autocast(dtype=self.autocast_dtype)
-                with torch.inference_mode():
-                    with amp_ctx:
-                        out = self.pred.predict(**kwargs)
-                # normalize outputs to dict
-                masks = None
-                scores = None
-                logits = None
-                if isinstance(out, dict):
-                    masks = out.get("masks", None)
-                    scores = out.get("scores", None)
-                    logits = out.get("logits", None)
-                elif isinstance(out, (tuple, list)):
-                    if len(out) >= 1: masks = out[0]
-                    if len(out) >= 2: scores = out[1]
-                    if len(out) >= 3: logits = out[2]
-                else:
-                    masks = out
-                if masks is None:
-                    raise RuntimeError("SAM2 returned no masks")
-                masks = np.asarray(masks)
-                # SAM2 variants: (N,H,W) or (N,1,H,W) or (H,W)
-                if masks.ndim == 2:
-                    masks = masks[None, ...]
-                elif masks.ndim == 4 and masks.shape[1] == 1:
-                    masks = masks[:, 0, :, :]
-                masks = _normalize_masks_dtype(masks)
-                # upsample to original resolution
-                masks_up = _upsample_stack(masks, (H, W))
-                # standardize scores
-                if scores is None:
-                    scores = np.ones((masks_up.shape[0],), dtype=np.float32) * 0.5
-                else:
-                    scores = np.asarray(scores).astype(np.float32, copy=False).reshape(-1)
-                out_dict = {"masks": masks_up, "scores": scores}
-                if logits is not None:
-                    # best-effort: resize per-channel to (H,W)
-                    lg = np.asarray(logits)
-                    if lg.ndim == 3:
-                        lg = _upsample_stack(lg, (H, W))
-                    elif lg.ndim == 4 and lg.shape[1] == 1:
-                        lg = _upsample_stack(lg[:, 0, :, :], (H, W))
-                    out_dict["logits"] = lg.astype(np.float32, copy=False)
-                return out_dict
-            except torch.cuda.OutOfMemoryError as e:
-                last_exc = e
-                logger.warning(f"SAM2 OOM at {th}x{tw}; retrying smaller. {e}")
-                torch.cuda.empty_cache()
-                continue
-            except Exception as e:
-                last_exc = e
-                logger.debug(traceback.format_exc())
-                logger.warning(f"SAM2 predict failed at {th}x{tw}; retrying smaller. {e}")
-                torch.cuda.empty_cache()
-                continue
-        # All attempts failed → safe fallback (full mask)
-        logger.warning(f"SAM2 calls failed; returning fallback. {last_exc}")
-        return {
-            "masks": np.ones((1, H, W), dtype=np.float32),
-            "scores": np.array([0.5], dtype=np.float32),
-        }
 # -------------------------- Loader --------------------------
 class SAM2Loader:
@@ -306,7 +231,7 @@ def __init__(self, device: str = "cuda", cache_dir: str = "./checkpoints/sam2_ca
         self.cache_dir = cache_dir
         os.makedirs(self.cache_dir, exist_ok=True)
-        # Configure HF hub for spaces
         os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS", "1")
         os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "0")
@@ -325,20 +250,15 @@ def load(self, model_size: str = "auto") -> Optional[Any]:
         """
         if model_size == "auto":
             model_size = self._determine_optimal_size()
         model_map = {
             "tiny":  "facebook/sam2.1-hiera-tiny",
             "small": "facebook/sam2.1-hiera-small",
             "base":  "facebook/sam2.1-hiera-base-plus",
             "large": "facebook/sam2.1-hiera-large",
         }
         self.model_id = model_map.get(model_size, model_map["tiny"])
         logger.info(f"Loading SAM2 model: {self.model_id} (device={self.device})")
-        # Try the official loader
         strategies = [("official", self._load_official), ("fallback", self._load_fallback)]
         for name, fn in strategies:
             try:
                 t0 = time.time()
@@ -353,7 +273,6 @@ def load(self, model_size: str = "auto") -> Optional[Any]:
             except Exception as e:
                 logger.error(f"SAM2 {name} strategy failed: {e}")
                 logger.debug(traceback.format_exc())
         logger.error("All SAM2 loading strategies failed")
         return None
@@ -374,26 +293,21 @@ def _determine_optimal_size(self) -> str:
     def _load_official(self) -> Optional[Any]:
         """Load using official SAM2 API"""
         from sam2.sam2_image_predictor import SAM2ImagePredictor
         predictor = SAM2ImagePredictor.from_pretrained(
             self.model_id,
             cache_dir=self.cache_dir,
             local_files_only=False,
             trust_remote_code=True,
         )
-        # Move internal model to device if present
         if hasattr(predictor, "model"):
             predictor.model = predictor.model.to(self.device)
             predictor.model.eval()
         if hasattr(predictor, "device"):
             predictor.device = self.device
         return predictor
     def _load_fallback(self) -> Optional[Any]:
         """Create a tiny fallback predictor"""
         class FallbackSAM2:
             def __init__(self, device):
                 self.device = device
@@ -405,16 +319,15 @@ def predict(self, **kwargs):
                     h, w = self._img.shape[:2]
                 else:
                     h, w = 512, 512
                 return {
                     "masks": np.ones((1, h, w), dtype=np.float32),
                     "scores": np.array([0.5], dtype=np.float32),
                 }
         logger.warning("Using fallback SAM2 (no real segmentation)")
         return FallbackSAM2(self.device)
     def cleanup(self):
-        """Clean up resources"""
         self.adapter = None
         if self.model is not None:
             try:
@@ -426,7 +339,6 @@ def cleanup(self):
             torch.cuda.empty_cache()
     def get_info(self) -> Dict[str, Any]:
-        """Get loader information"""
         return {
             "loaded": self.adapter is not None,
             "model_id": self.model_id,

 #!/usr/bin/env python3
 """
+SAM2 Loader + Guarded Predictor Adapter (VRAM-friendly, shape-safe, thread-safe, PyTorch2-ready)
 """
 from __future__ import annotations
 import numpy as np
 import torch
 import cv2
+import threading
 logger = logging.getLogger(__name__)
 def _select_device(pref: str) -> str:
     pref = (pref or "").lower()
     if pref.startswith("cuda"):
         return "cpu"
     return "cuda" if torch.cuda.is_available() else "cpu"
 def _ensure_rgb_uint8(img: np.ndarray, force_bgr_to_rgb: bool = False) -> np.ndarray:
     if img is None:
         raise ValueError("set_image received None image")
     arr = np.asarray(img)
     if arr.ndim != 3 or arr.shape[2] < 3:
         raise ValueError(f"Expected HxWxC image with C>=3, got shape={arr.shape}")
     if np.issubdtype(arr.dtype, np.floating):
         arr = np.clip(arr, 0.0, 1.0)
         arr = (arr * 255.0 + 0.5).astype(np.uint8)
             arr = (arr / 257).astype(np.uint8)
         else:
             arr = arr.astype(np.uint8)
     if arr.shape[2] == 4:
         arr = arr[:, :, :3]
     if force_bgr_to_rgb:
         arr = cv2.cvtColor(arr, cv2.COLOR_BGR2RGB)
     return arr
 def _compute_scaled_size(h: int, w: int, max_edge: int, target_pixels: int) -> Tuple[int, int, float]:
     if h <= 0 or w <= 0:
         return h, w, 1.0
     nw = max(1, int(round(w * s)))
     return nh, nw, s
 def _ladder(nh: int, nw: int) -> List[Tuple[int, int]]:
     sizes = [(nh, nw)]
     sizes.append((max(1, int(nh * 0.85)), max(1, int(nw * 0.85))))
     sizes.append((max(1, int(nh * 0.70)), max(1, int(nw * 0.70))))
     sizes.append((max(1, int(nh * 0.50)), max(1, int(nw * 0.50))))
     sizes.append((max(1, int(nh * 0.35)), max(1, int(nw * 0.35))))
     uniq = []
     seen = set()
     for s in sizes:
             uniq.append(s); seen.add(s)
     return uniq
 def _upsample_stack(masks: np.ndarray, out_hw: Tuple[int, int]) -> np.ndarray:
     if masks.ndim != 3:
         masks = np.asarray(masks)
         if masks.ndim == 2:
         elif masks.ndim == 4 and masks.shape[1] == 1:
             masks = masks[:, 0, :, :]
         else:
             masks = np.squeeze(masks)
             if masks.ndim == 2:
                 masks = masks[None, ...]
         out[i] = cv2.resize(masks[i].astype(np.float32), (W, H), interpolation=cv2.INTER_LINEAR)
     return np.clip(out, 0.0, 1.0)
 def _normalize_masks_dtype(x: np.ndarray) -> np.ndarray:
     x = np.asarray(x)
     if x.dtype == np.uint8:
         return (x.astype(np.float32) / 255.0)
     return x.astype(np.float32, copy=False)
 # -------------------------- adapter --------------------------
 class _SAM2Adapter:
       - model-only downscale on set_image
       - OOM-aware predict with retry at smaller sizes
       - upsample masks back to original size
+      - now thread-safe
     """
     def __init__(self, predictor, device: str):
         self.pred = predictor
         self.device = device
         self.orig_hw: Tuple[int, int] = (0, 0)
         self.max_edge = int(os.environ.get("SAM2_MAX_EDGE", "1024"))
         self.target_pixels = int(os.environ.get("SAM2_TARGET_PIXELS", "900000"))
         self.force_bgr_to_rgb = os.environ.get("SAM2_ASSUME_BGR", "0") == "1"
         self.use_autocast = (device == "cuda")
         self.autocast_dtype = None
         if self.use_autocast:
             try:
                     self.autocast_dtype = torch.float16 if cc[0] >= 7 else None
             except Exception:
                 self.autocast_dtype = None
         self._current_rgb: Optional[np.ndarray] = None
         self._current_hw: Tuple[int, int] = (0, 0)
+        self._lock = threading.Lock()
     def set_image(self, image: np.ndarray):
+        with self._lock:
+            rgb = _ensure_rgb_uint8(image, force_bgr_to_rgb=self.force_bgr_to_rgb)
+            H, W = rgb.shape[:2]
+            self.orig_hw = (H, W)
+            nh, nw, s = _compute_scaled_size(H, W, self.max_edge, self.target_pixels)
+            if s < 1.0:
+                work = cv2.resize(rgb, (nw, nh), interpolation=cv2.INTER_AREA)
+                self._current_rgb = work
+                self._current_hw = (nh, nw)
+            else:
+                self._current_rgb = rgb
+                self._current_hw = (H, W)
+            self.pred.set_image(self._current_rgb)
     def predict(self, **kwargs) -> Dict[str, Any]:
+        with self._lock:
+            if self._current_rgb is None or self.orig_hw == (0, 0):
+                raise RuntimeError("SAM2Adapter.predict called before set_image()")
+            H, W = self.orig_hw
+            nh, nw = self._current_hw
+            sizes = _ladder(nh, nw)
+            last_exc: Optional[BaseException] = None
+            for (th, tw) in sizes:
+                try:
+                    if (th, tw) != (nh, nw):
+                        small = cv2.resize(self._current_rgb, (tw, th), interpolation=cv2.INTER_AREA)
+                        self.pred.set_image(small)
+                    class _NoOp:
+                        def __enter__(self): return None
+                        def __exit__(self, *a): return False
+                    # -------- PyTorch 2.x autocast signature --------
+                    if self.use_autocast and self.autocast_dtype is not None:
+                        amp_ctx = torch.autocast(device_type="cuda", dtype=self.autocast_dtype)
+                    else:
+                        amp_ctx = _NoOp()
+                    with torch.inference_mode():
+                        with amp_ctx:
+                            out = self.pred.predict(**kwargs)
+                    # normalize outputs to dict
+                    masks = None
+                    scores = None
+                    logits = None
+                    if isinstance(out, dict):
+                        masks = out.get("masks", None)
+                        scores = out.get("scores", None)
+                        logits = out.get("logits", None)
+                    elif isinstance(out, (tuple, list)):
+                        if len(out) >= 1: masks = out[0]
+                        if len(out) >= 2: scores = out[1]
+                        if len(out) >= 3: logits = out[2]
+                    else:
+                        masks = out
+                    if masks is None:
+                        raise RuntimeError("SAM2 returned no masks")
+                    masks = np.asarray(masks)
+                    if masks.ndim == 2:
+                        masks = masks[None, ...]
+                    elif masks.ndim == 4 and masks.shape[1] == 1:
+                        masks = masks[:, 0, :, :]
+                    masks = _normalize_masks_dtype(masks)
+                    masks_up = _upsample_stack(masks, (H, W))
+                    if scores is None:
+                        scores = np.ones((masks_up.shape[0],), dtype=np.float32) * 0.5
+                    else:
+                        scores = np.asarray(scores).astype(np.float32, copy=False).reshape(-1)
+                    out_dict = {"masks": masks_up, "scores": scores}
+                    if logits is not None:
+                        lg = np.asarray(logits)
+                        if lg.ndim == 3:
+                            lg = _upsample_stack(lg, (H, W))
+                        elif lg.ndim == 4 and lg.shape[1] == 1:
+                            lg = _upsample_stack(lg[:, 0, :, :], (H, W))
+                        out_dict["logits"] = lg.astype(np.float32, copy=False)
+                    return out_dict
+                except torch.cuda.OutOfMemoryError as e:
+                    last_exc = e
+                    if torch.cuda.is_available():
+                        torch.cuda.empty_cache()
+                    logger.warning(f"SAM2 OOM at {th}x{tw}; retrying smaller. {e}")
+                    continue
+                except Exception as e:
+                    last_exc = e
+                    if torch.cuda.is_available():
+                        torch.cuda.empty_cache()
+                    logger.debug(traceback.format_exc())
+                    logger.warning(f"SAM2 predict failed at {th}x{tw}; retrying smaller. {e}")
+                    continue
+            logger.warning(f"SAM2 calls failed; returning fallback. {last_exc}")
+            return {
+                "masks": np.ones((1, H, W), dtype=np.float32),
+                "scores": np.array([0.5], dtype=np.float32),
+            }
 # -------------------------- Loader --------------------------
 class SAM2Loader:
         self.cache_dir = cache_dir
         os.makedirs(self.cache_dir, exist_ok=True)
+        # HuggingFace Hub for spaces: avoid symlink errors
         os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS", "1")
         os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "0")
         """
         if model_size == "auto":
             model_size = self._determine_optimal_size()
         model_map = {
             "tiny":  "facebook/sam2.1-hiera-tiny",
             "small": "facebook/sam2.1-hiera-small",
             "base":  "facebook/sam2.1-hiera-base-plus",
             "large": "facebook/sam2.1-hiera-large",
         }
         self.model_id = model_map.get(model_size, model_map["tiny"])
         logger.info(f"Loading SAM2 model: {self.model_id} (device={self.device})")
         strategies = [("official", self._load_official), ("fallback", self._load_fallback)]
         for name, fn in strategies:
             try:
                 t0 = time.time()
             except Exception as e:
                 logger.error(f"SAM2 {name} strategy failed: {e}")
                 logger.debug(traceback.format_exc())
         logger.error("All SAM2 loading strategies failed")
         return None
     def _load_official(self) -> Optional[Any]:
         """Load using official SAM2 API"""
         from sam2.sam2_image_predictor import SAM2ImagePredictor
         predictor = SAM2ImagePredictor.from_pretrained(
             self.model_id,
             cache_dir=self.cache_dir,
             local_files_only=False,
             trust_remote_code=True,
         )
         if hasattr(predictor, "model"):
             predictor.model = predictor.model.to(self.device)
             predictor.model.eval()
         if hasattr(predictor, "device"):
             predictor.device = self.device
         return predictor
     def _load_fallback(self) -> Optional[Any]:
         """Create a tiny fallback predictor"""
         class FallbackSAM2:
             def __init__(self, device):
                 self.device = device
                     h, w = self._img.shape[:2]
                 else:
                     h, w = 512, 512
+                # Return a full-ones mask—**handled downstream!**
                 return {
                     "masks": np.ones((1, h, w), dtype=np.float32),
                     "scores": np.array([0.5], dtype=np.float32),
                 }
         logger.warning("Using fallback SAM2 (no real segmentation)")
         return FallbackSAM2(self.device)
     def cleanup(self):
         self.adapter = None
         if self.model is not None:
             try:
             torch.cuda.empty_cache()
     def get_info(self) -> Dict[str, Any]:
         return {
             "loaded": self.adapter is not None,
             "model_id": self.model_id,