Spaces:

jboth
/

sam3d-objects-fixed

Paused

App Files Files Community

jboth commited on 25 days ago

Commit

ccebd17

verified ·

1 Parent(s): 37b8f65

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +36 -9

app.py CHANGED Viewed

@@ -76,6 +76,42 @@ patch = SAM3D_PATH / "patching" / "hydra"
 if patch.exists():
     subprocess.run(["bash", str(patch)], capture_output=True, cwd=str(SAM3D_PATH))
 sys.path.insert(0, str(SAM3D_PATH))
 sys.path.insert(0, str(SAM3D_PATH / "notebook"))
@@ -291,15 +327,6 @@ def test_sam3d_only(image: np.ndarray):
         print(f"  Mask created: {mask.sum()} pixels ({time.time()-t0:.0f}s)")
         from inference import Inference
-        # SAM3D's inference_pipeline.py auto-detects H200 and sets ATTN_BACKEND=flash_attn
-        # We must override BACK to sdpa since flash_attn is not available
-        import sam3d_objects.model.backbone.tdfy_dit.modules.attention as _attn_mod
-        import sam3d_objects.model.backbone.tdfy_dit.modules.sparse as _sparse_mod
-        _attn_mod.BACKEND = "sdpa"
-        _sparse_mod.ATTN = "sdpa"
-        os.environ["ATTN_BACKEND"] = "sdpa"
-        os.environ["SPARSE_ATTN_BACKEND"] = "sdpa"
-        print(f"  Attention backends forced to sdpa")
         print(f"  Loading SAM3D... VRAM: {torch.cuda.memory_allocated()/1e9:.1f}GB")
         sam3d = Inference(CONFIG_PATH, compile=False)
         print(f"  SAM3D loaded ({time.time()-t0:.0f}s, VRAM: {torch.cuda.memory_allocated()/1e9:.1f}GB)")

 if patch.exists():
     subprocess.run(["bash", str(patch)], capture_output=True, cwd=str(SAM3D_PATH))
+# CRITICAL PATCH: Prevent SAM3D from overriding ATTN_BACKEND to flash_attn
+# inference_pipeline.py auto-detects H200/A100 and forces flash_attn,
+# but we don't have the real flash_attn package.
+ip_file = SAM3D_PATH / "sam3d_objects" / "pipeline" / "inference_pipeline.py"
+if ip_file.exists():
+    ip_src = ip_file.read_text()
+    # Replace the set_attention_backend function to respect our env vars
+    old_fn = """def set_attention_backend():
+    if torch.cuda.is_available():
+        gpu_name = torch.cuda.get_device_name(0)
+    else:
+        gpu_name = "CPU"
+    logger.info(f"GPU name is {gpu_name}")
+    if "A100" in gpu_name or "H100" in gpu_name or "H200" in gpu_name:
+        # logger.info("Use flash_attn")
+        os.environ["ATTN_BACKEND"] = "flash_attn"
+        os.environ["SPARSE_ATTN_BACKEND"] = "flash_attn""""
+    new_fn = """def set_attention_backend():
+    if torch.cuda.is_available():
+        gpu_name = torch.cuda.get_device_name(0)
+    else:
+        gpu_name = "CPU"
+    logger.info(f"GPU name is {gpu_name}")
+    # PATCHED: Always use sdpa backend (flash_attn not available on ZeroGPU)
+    logger.info("Using sdpa backend (patched for ZeroGPU)")
+    os.environ.setdefault("ATTN_BACKEND", "sdpa")
+    os.environ.setdefault("SPARSE_ATTN_BACKEND", "sdpa")""""
+    if old_fn in ip_src:
+        ip_src = ip_src.replace(old_fn, new_fn)
+        ip_file.write_text(ip_src)
+        print("PATCHED: inference_pipeline.py - forced sdpa backend")
+    else:
+        print("WARNING: Could not patch inference_pipeline.py")
 sys.path.insert(0, str(SAM3D_PATH))
 sys.path.insert(0, str(SAM3D_PATH / "notebook"))
         print(f"  Mask created: {mask.sum()} pixels ({time.time()-t0:.0f}s)")
         from inference import Inference
         print(f"  Loading SAM3D... VRAM: {torch.cuda.memory_allocated()/1e9:.1f}GB")
         sam3d = Inference(CONFIG_PATH, compile=False)
         print(f"  SAM3D loaded ({time.time()-t0:.0f}s, VRAM: {torch.cuda.memory_allocated()/1e9:.1f}GB)")