Spaces:

BiasLab2025
/

detection_base

Paused

Zhen Ye Claude Opus 4.6 commited on Feb 20

Commit

4c36e1e

1 Parent(s): 63684e4

fix: add runtime fallback for torch.compile inductor/triton failures

torch.compile wrapping succeeds immediately but actual compilation
happens lazily on first forward pass. Now catches runtime errors on
the first propagate_in_video yield, reverts all compiled forwards
to eager originals, and retries the propagation transparently.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show

models/segmenters/grounded_sam2.py +44 -4

models/segmenters/grounded_sam2.py CHANGED Viewed

@@ -389,7 +389,9 @@ class GroundedSAM2Segmenter(Segmenter):
         """Compile SAM2 sub-modules with torch.compile (max-autotune).
         Compiles 5 components matching Facebook's official VOS recipe.
-        Falls back silently to eager mode on any compilation error.
         """
         vp = self._video_predictor
         components = [
@@ -399,20 +401,35 @@ class GroundedSAM2Segmenter(Segmenter):
             ("sam_prompt_encoder", dict(mode="max-autotune", fullgraph=True, dynamic=False)),
             ("sam_mask_decoder", dict(mode="max-autotune", fullgraph=True, dynamic=False)),
         ]
         compiled = []
         for attr, kwargs in components:
             module = getattr(vp, attr, None)
             if module is None:
                 continue
             try:
                 module.forward = torch.compile(module.forward, **kwargs)
                 compiled.append(attr)
             except Exception as e:
-                logging.warning("torch.compile failed for %s: %s", attr, e)
         if compiled:
             logging.info("torch.compile applied to: %s", ", ".join(compiled))
         else:
             logging.info("torch.compile not available, using eager mode.")
     # -- Single-frame interface (Segmenter.predict) -------------------------
@@ -596,9 +613,32 @@ class GroundedSAM2Segmenter(Segmenter):
         class_names_list: List[str] = []
         cursor = 0
-        for out_frame_idx, out_obj_ids, out_mask_logits in self._video_predictor.propagate_in_video(
             inference_state, max_frame_num_to_track=step, start_frame_idx=start_idx,
-        ):
             bool_masks = (out_mask_logits[:, 0] > 0.0)  # (N, H, W) GPU async
             n = bool_masks.shape[0]

         """Compile SAM2 sub-modules with torch.compile (max-autotune).
         Compiles 5 components matching Facebook's official VOS recipe.
+        torch.compile wraps succeed immediately; actual Triton/inductor
+        compilation happens lazily on first forward pass.  We store
+        original forwards so propagate_segment can fall back on error.
         """
         vp = self._video_predictor
         components = [
             ("sam_prompt_encoder", dict(mode="max-autotune", fullgraph=True, dynamic=False)),
             ("sam_mask_decoder", dict(mode="max-autotune", fullgraph=True, dynamic=False)),
         ]
+        self._original_forwards: Dict[str, Any] = {}
         compiled = []
         for attr, kwargs in components:
             module = getattr(vp, attr, None)
             if module is None:
                 continue
             try:
+                self._original_forwards[attr] = module.forward
                 module.forward = torch.compile(module.forward, **kwargs)
                 compiled.append(attr)
             except Exception as e:
+                logging.warning("torch.compile wrapping failed for %s: %s", attr, e)
         if compiled:
             logging.info("torch.compile applied to: %s", ", ".join(compiled))
+            self._torch_compiled = True
         else:
             logging.info("torch.compile not available, using eager mode.")
+            self._torch_compiled = False
+    def _revert_torch_compile(self):
+        """Revert compiled forwards back to eager originals."""
+        vp = self._video_predictor
+        for attr, orig_fwd in self._original_forwards.items():
+            module = getattr(vp, attr, None)
+            if module is not None:
+                module.forward = orig_fwd
+        self._original_forwards.clear()
+        self._torch_compiled = False
+        logging.warning("Reverted torch.compile — falling back to eager mode.")
     # -- Single-frame interface (Segmenter.predict) -------------------------
         class_names_list: List[str] = []
         cursor = 0
+        # Wrap generator to catch torch.compile runtime failures on first frame.
+        # If inductor/triton fails, revert to eager and restart propagation.
+        _generator = self._video_predictor.propagate_in_video(
             inference_state, max_frame_num_to_track=step, start_frame_idx=start_idx,
+        )
+        if getattr(self, '_torch_compiled', False) and not getattr(self, '_compile_verified', False):
+            try:
+                _first = next(_generator)
+            except Exception as e:
+                logging.warning("torch.compile runtime error, reverting to eager: %s", e)
+                self._revert_torch_compile()
+                # Re-init propagation with eager forwards
+                self._video_predictor.reset_state(inference_state)
+                for obj_id, obj_info in mask_dict.labels.items():
+                    self._video_predictor.add_new_mask(
+                        inference_state, start_idx, obj_id, obj_info.mask,
+                    )
+                _generator = self._video_predictor.propagate_in_video(
+                    inference_state, max_frame_num_to_track=step, start_frame_idx=start_idx,
+                )
+                _first = next(_generator)
+            import itertools
+            self._compile_verified = True
+            _generator = itertools.chain([_first], _generator)
+        for out_frame_idx, out_obj_ids, out_mask_logits in _generator:
             bool_masks = (out_mask_logits[:, 0] > 0.0)  # (N, H, W) GPU async
             n = bool_masks.shape[0]