Spaces:

contacthamza91
/

SAM_three_UI

Sleeping

App Files Files Community

AI Agent commited on Mar 28

Commit

d8802bd

1 Parent(s): 273261c

Add roi_align + layer_norm interceptors to fix HalfTensor/FloatTensor mismatch in geometry encoder

Browse files

Files changed (1) hide show

app.py +23 -0

app.py CHANGED Viewed

@@ -46,6 +46,29 @@ if torch.cuda.is_available():
         return orig_conv2d(input, weight, bias, stride, padding, dilation, groups)
     F.conv2d = patched_conv2d
 # ── Ensure SAM 3 Checkpoint is downloaded ────────────────────────
 # (HuggingFace Spaces can use the hf_hub_download mechanism)
 from huggingface_hub import hf_hub_download

         return orig_conv2d(input, weight, bias, stride, padding, dilation, groups)
     F.conv2d = patched_conv2d
+    # 3. Patch torchvision.ops.roi_align — Meta's geometry_encoders.py
+    #    calls boxes_xyxy.float() which creates float32 while img_feats is float16.
+    try:
+        import torchvision.ops
+        orig_roi_align = torchvision.ops.roi_align
+        def patched_roi_align(input, boxes, output_size, spatial_scale=1.0, sampling_ratio=-1, aligned=False):
+            if isinstance(boxes, torch.Tensor) and input.is_floating_point() and boxes.dtype != input.dtype:
+                boxes = boxes.to(input.dtype)
+            elif isinstance(boxes, list):
+                boxes = [b.to(input.dtype) if isinstance(b, torch.Tensor) and b.dtype != input.dtype else b for b in boxes]
+            return orig_roi_align(input, boxes, output_size, spatial_scale, sampling_ratio, aligned)
+        torchvision.ops.roi_align = patched_roi_align
+    except ImportError:
+        pass
+    # 4. Patch layer_norm / group_norm — common ViT dtype mismatch points
+    orig_layer_norm = F.layer_norm
+    def patched_layer_norm(input, normalized_shape, weight=None, bias=None, eps=1e-5):
+        if weight is not None and input.is_floating_point() and input.dtype != weight.dtype:
+            input = input.to(weight.dtype)
+        return orig_layer_norm(input, normalized_shape, weight, bias, eps)
+    F.layer_norm = patched_layer_norm
 # ── Ensure SAM 3 Checkpoint is downloaded ────────────────────────
 # (HuggingFace Spaces can use the hf_hub_download mechanism)
 from huggingface_hub import hf_hub_download