Spaces:

BiasLab2025
/

perception

Running

Zhen Ye Claude Opus 4.6 commited on 1 day ago

Commit

1c6c619

1 Parent(s): 5749bd6

feat: add num_maskmem parameter to /benchmark endpoint

Thread num_maskmem override through the full pipeline so benchmark
runs can test reduced memory bank sizes (e.g. 3 instead of default 7).
Patches the SAM2 video predictor's maskmem_tpos_enc at runtime.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (4) hide show

app.py +4 -0
inference.py +5 -1
models/segmenters/grounded_sam2.py +43 -0
models/segmenters/model_loader.py +2 -2

app.py CHANGED Viewed

@@ -32,6 +32,7 @@ import uuid
 from contextlib import asynccontextmanager
 from datetime import timedelta
 from pathlib import Path
 import cv2
 import numpy as np
@@ -857,6 +858,7 @@ async def benchmark_endpoint(
     queries: str = Form("person,car,truck"),
     segmenter: str = Form("gsam2_large"),
     step: int = Form(20),
 ):
     """Run instrumented GSAM2 pipeline and return latency breakdown JSON.
@@ -898,6 +900,7 @@ async def benchmark_endpoint(
             enable_gpt=False,
             _perf_metrics=metrics,
             _perf_lock=lock,
         )
         # Read frame count and fps from output video
@@ -915,6 +918,7 @@ async def benchmark_endpoint(
             "total_frames": total_frames,
             "fps": fps,
             "num_gpus": num_gpus,
             "metrics": metrics,
         })

 from contextlib import asynccontextmanager
 from datetime import timedelta
 from pathlib import Path
+from typing import Optional
 import cv2
 import numpy as np
     queries: str = Form("person,car,truck"),
     segmenter: str = Form("gsam2_large"),
     step: int = Form(20),
+    num_maskmem: Optional[int] = Form(None),
 ):
     """Run instrumented GSAM2 pipeline and return latency breakdown JSON.
             enable_gpt=False,
             _perf_metrics=metrics,
             _perf_lock=lock,
+            num_maskmem=num_maskmem,
         )
         # Read frame count and fps from output video
             "total_frames": total_frames,
             "fps": fps,
             "num_gpus": num_gpus,
+            "num_maskmem": num_maskmem if num_maskmem is not None else 7,
             "metrics": metrics,
         })

inference.py CHANGED Viewed

@@ -1631,6 +1631,7 @@ def run_grounded_sam2_tracking(
     first_frame_gpt_results: Optional[Dict[str, Any]] = None,
     _perf_metrics: Optional[Dict[str, float]] = None,
     _perf_lock=None,
 ) -> str:
     """Run Grounded-SAM-2 video tracking pipeline.
@@ -1679,7 +1680,8 @@ def run_grounded_sam2_tracking(
         if num_gpus <= 1:
             # ---------- Single-GPU fallback ----------
             device_str = "cuda:0" if torch.cuda.is_available() else "cpu"
-            segmenter = load_segmenter_on_device(active_segmenter, device_str)
             _check_cancellation(job_id)
             if _perf_metrics is not None:
@@ -1710,11 +1712,13 @@ def run_grounded_sam2_tracking(
             # Phase 1: Load one segmenter per GPU (parallel)
             segmenters = []
             with ThreadPoolExecutor(max_workers=num_gpus) as pool:
                 futs = [
                     pool.submit(
                         load_segmenter_on_device,
                         active_segmenter,
                         f"cuda:{i}",
                     )
                     for i in range(num_gpus)
                 ]

     first_frame_gpt_results: Optional[Dict[str, Any]] = None,
     _perf_metrics: Optional[Dict[str, float]] = None,
     _perf_lock=None,
+    num_maskmem: Optional[int] = None,
 ) -> str:
     """Run Grounded-SAM-2 video tracking pipeline.
         if num_gpus <= 1:
             # ---------- Single-GPU fallback ----------
             device_str = "cuda:0" if torch.cuda.is_available() else "cpu"
+            _seg_kw = {"num_maskmem": num_maskmem} if num_maskmem is not None else {}
+            segmenter = load_segmenter_on_device(active_segmenter, device_str, **_seg_kw)
             _check_cancellation(job_id)
             if _perf_metrics is not None:
             # Phase 1: Load one segmenter per GPU (parallel)
             segmenters = []
             with ThreadPoolExecutor(max_workers=num_gpus) as pool:
+                _seg_kw_multi = {"num_maskmem": num_maskmem} if num_maskmem is not None else {}
                 futs = [
                     pool.submit(
                         load_segmenter_on_device,
                         active_segmenter,
                         f"cuda:{i}",
+                        **_seg_kw_multi,
                     )
                     for i in range(num_gpus)
                 ]

models/segmenters/grounded_sam2.py CHANGED Viewed

@@ -318,10 +318,12 @@ class GroundedSAM2Segmenter(Segmenter):
         device: Optional[str] = None,
         step: int = 20,
         iou_threshold: float = 0.5,
     ):
         self.model_size = model_size
         self.step = step
         self.iou_threshold = iou_threshold
         self.name = f"gsam2_{model_size}"
         if device:
@@ -370,6 +372,11 @@ class GroundedSAM2Segmenter(Segmenter):
         sam2_image_model = build_sam2_hf(hf_id, device=self.device)
         self._image_predictor = SAM2ImagePredictor(sam2_image_model)
         # Reuse existing Grounding DINO detector from our codebase
         from models.detectors.grounding_dino import GroundingDinoDetector
@@ -378,6 +385,42 @@ class GroundedSAM2Segmenter(Segmenter):
         self._models_loaded = True
         logging.info("Grounded-SAM-2 models loaded successfully.")
     # -- Single-frame interface (Segmenter.predict) -------------------------
     def predict(

         device: Optional[str] = None,
         step: int = 20,
         iou_threshold: float = 0.5,
+        num_maskmem: Optional[int] = None,
     ):
         self.model_size = model_size
         self.step = step
         self.iou_threshold = iou_threshold
+        self.num_maskmem = num_maskmem  # None = use default (7)
         self.name = f"gsam2_{model_size}"
         if device:
         sam2_image_model = build_sam2_hf(hf_id, device=self.device)
         self._image_predictor = SAM2ImagePredictor(sam2_image_model)
+        # Override num_maskmem if requested
+        if self.num_maskmem is not None:
+            self._patch_num_maskmem(self._video_predictor, self.num_maskmem)
+            logging.info("Patched video predictor num_maskmem → %d", self.num_maskmem)
         # Reuse existing Grounding DINO detector from our codebase
         from models.detectors.grounding_dino import GroundingDinoDetector
         self._models_loaded = True
         logging.info("Grounded-SAM-2 models loaded successfully.")
+    @staticmethod
+    def _patch_num_maskmem(predictor, num_maskmem: int):
+        """Override num_maskmem on a loaded SAM2 video predictor at runtime.
+        Slices the temporal positional encoding parameter to match the new
+        memory size so the model runs without shape mismatches.
+        """
+        import torch.nn as nn
+        # The underlying model may be predictor itself or predictor.model
+        model = getattr(predictor, "model", predictor)
+        old = getattr(model, "num_maskmem", None)
+        if old is None:
+            logging.warning("Cannot patch num_maskmem: attribute not found on model")
+            return
+        if num_maskmem == old:
+            return
+        model.num_maskmem = num_maskmem
+        # Slice or pad maskmem_tpos_enc (shape: [num_maskmem, 1, 1, mem_dim])
+        if hasattr(model, "maskmem_tpos_enc") and model.maskmem_tpos_enc is not None:
+            old_enc = model.maskmem_tpos_enc
+            if num_maskmem <= old_enc.shape[0]:
+                model.maskmem_tpos_enc = nn.Parameter(
+                    old_enc[:num_maskmem].clone()
+                )
+            else:
+                # Pad with zeros for the extra slots
+                pad = torch.zeros(
+                    num_maskmem - old_enc.shape[0], *old_enc.shape[1:],
+                    device=old_enc.device, dtype=old_enc.dtype,
+                )
+                model.maskmem_tpos_enc = nn.Parameter(
+                    torch.cat([old_enc, pad], dim=0)
+                )
+        logging.info("num_maskmem changed from %d to %d", old, num_maskmem)
     # -- Single-frame interface (Segmenter.predict) -------------------------
     def predict(

models/segmenters/model_loader.py CHANGED Viewed

@@ -46,6 +46,6 @@ def load_segmenter(name: Optional[str] = None) -> Segmenter:
     return _get_cached_segmenter(segmenter_name)
-def load_segmenter_on_device(name: str, device: str) -> Segmenter:
     """Create a new segmenter instance on the specified device (no caching)."""
-    return _create_segmenter(name, device=device)

     return _get_cached_segmenter(segmenter_name)
+def load_segmenter_on_device(name: str, device: str, **kwargs) -> Segmenter:
     """Create a new segmenter instance on the specified device (no caching)."""
+    return _create_segmenter(name, device=device, **kwargs)