Spaces:

contacthamza91
/

SAM_three_UI

Sleeping

AI Agent commited on Mar 28

Commit

32691c0

1 Parent(s): a998b70

Cast model explicitly to bfloat16 on T4 to override AMP dtype mismatch

Files changed (1) hide show

app.py CHANGED Viewed

@@ -31,17 +31,6 @@ if not torch.cuda.is_available():
                 return __orig_fn(*args, **kwargs)
             setattr(torch, name, patched_fn)
-# Intercept Meta's hardcoded BFloat16 autocast (T4 Turing GPUs don't support BFloat16 hardware math)
-original_autocast = torch.autocast
-class PatchedAutocast(original_autocast):
-    def __init__(self, device_type, dtype=None, *args, **kwargs):
-        if dtype == torch.bfloat16 and torch.cuda.is_available() and not torch.cuda.is_bf16_supported():
-            dtype = torch.float16  # Fallback to fp16, supported perfectly by T4 Turing NVidia cards
-        if device_type == 'cuda' and not torch.cuda.is_available():
-            device_type = 'cpu'
-        super().__init__(device_type, dtype, *args, **kwargs)
-torch.autocast = PatchedAutocast
 # ── SAM 3 Imports ────────────────────────────────────────────────
 try:
     from sam3.model_builder import build_sam3_image_model
@@ -75,8 +64,13 @@ if model_installed:
     model.load_state_dict(image_state_dict, strict=False)
     model.to(device)
-    if not torch.cuda.is_available():
         model.to(torch.float32)  # Force upcast from checkpoint's native bfloat16 to float32 for CPU inference
     processor = Sam3Processor(model)
     if not torch.cuda.is_available():
         processor.device = "cpu"

                 return __orig_fn(*args, **kwargs)
             setattr(torch, name, patched_fn)
 # ── SAM 3 Imports ────────────────────────────────────────────────
 try:
     from sam3.model_builder import build_sam3_image_model
     model.load_state_dict(image_state_dict, strict=False)
     model.to(device)
+    # BFloat16 alignment for T4 / Turing GPUs
+    if torch.cuda.is_available():
+        model.to(torch.bfloat16)
+    else:
         model.to(torch.float32)  # Force upcast from checkpoint's native bfloat16 to float32 for CPU inference
     processor = Sam3Processor(model)
     if not torch.cuda.is_available():
         processor.device = "cpu"