Spaces:

dev-bjoern
/

sam3d-objects-mcp

Running on Zero

App Files Files Community

dev-bjoern commited on Dec 9, 2025

Commit

98cf79e

1 Parent(s): 0e828b5

Fix SAM 3D Objects import: use notebook/inference.py

Browse files

Files changed (1) hide show

app.py +39 -21

app.py CHANGED Viewed

@@ -30,9 +30,10 @@ if not SAM3D_PATH.exists():
         "https://github.com/facebookresearch/sam-3d-objects.git",
         str(SAM3D_PATH)
     ], check=True)
-    sys.path.insert(0, str(SAM3D_PATH))
 sys.path.insert(0, str(SAM3D_PATH))
 # Global models
 SAM3D_MODEL = None
@@ -66,15 +67,21 @@ def load_sam3d():
     import torch
     print("Loading SAM 3D Objects model...")
     checkpoint_dir = snapshot_download(
         repo_id="facebook/sam-3d-objects",
         token=os.environ.get("HF_TOKEN")
     )
-    from sam_3d_objects import Sam3dObjects
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    SAM3D_MODEL = Sam3dObjects.from_pretrained(checkpoint_dir, device=device)
     print("✓ SAM 3D Objects loaded")
     return SAM3D_MODEL
@@ -101,7 +108,7 @@ def reconstruct_objects(image: np.ndarray):
         # Load models
         generator = load_sam2()
-        sam3d = load_sam3d()
         # Convert to PIL if needed
         if isinstance(image, np.ndarray):
@@ -110,9 +117,9 @@ def reconstruct_objects(image: np.ndarray):
             pil_image = image
             image = np.array(pil_image)
-        # Auto-detect all objects
         print("Detecting objects...")
-        masks = generator.generate(pil_image)
         if not masks or len(masks) == 0:
             return None, image, "⚠️ No objects detected"
@@ -125,26 +132,37 @@ def reconstruct_objects(image: np.ndarray):
         preview = image.copy()
         preview[best_mask] = (preview[best_mask] * 0.5 + np.array([0, 255, 0]) * 0.5).astype(np.uint8)
-        # Run 3D reconstruction on largest object
         print("Reconstructing 3D...")
-        mask_uint8 = best_mask.astype(np.uint8)
-        outputs = sam3d.predict(image, mask_uint8)
-        if outputs is None:
             return None, preview, "⚠️ 3D reconstruction failed"
-        # Export as GLB
         output_dir = tempfile.mkdtemp()
-        glb_path = f"{output_dir}/object_{uuid.uuid4().hex[:8]}.glb"
-        # Get vertices from gaussian splat
-        vertices = outputs.get_xyz().cpu().numpy()
-        # Export as point cloud GLB
-        cloud = trimesh.PointCloud(vertices)
-        cloud.export(glb_path, file_type='glb')
-        return glb_path, preview, f"✓ Detected {len(masks)} objects, reconstructed largest ({len(vertices)} points)"
     except Exception as e:
         import traceback

         "https://github.com/facebookresearch/sam-3d-objects.git",
         str(SAM3D_PATH)
     ], check=True)
+# Add both repo root and notebook folder to path
 sys.path.insert(0, str(SAM3D_PATH))
+sys.path.insert(0, str(SAM3D_PATH / "notebook"))
 # Global models
 SAM3D_MODEL = None
     import torch
     print("Loading SAM 3D Objects model...")
+    # Download checkpoints
     checkpoint_dir = snapshot_download(
         repo_id="facebook/sam-3d-objects",
         token=os.environ.get("HF_TOKEN")
     )
+    # Import from notebook/inference.py
+    from inference import Inference
+    # Config path in the repo
+    config_path = str(SAM3D_PATH / "sam3d_objects" / "configs" / "default.yaml")
+    SAM3D_MODEL = Inference(config_path, compile=False)
+    # Point to downloaded checkpoints
+    SAM3D_MODEL.checkpoint_dir = checkpoint_dir
     print("✓ SAM 3D Objects loaded")
     return SAM3D_MODEL
         # Load models
         generator = load_sam2()
+        inference = load_sam3d()
         # Convert to PIL if needed
         if isinstance(image, np.ndarray):
             pil_image = image
             image = np.array(pil_image)
+        # Auto-detect all objects with SAM2
         print("Detecting objects...")
+        masks = generator.generate(image)
         if not masks or len(masks) == 0:
             return None, image, "⚠️ No objects detected"
         preview = image.copy()
         preview[best_mask] = (preview[best_mask] * 0.5 + np.array([0, 255, 0]) * 0.5).astype(np.uint8)
+        # Convert mask to PIL
+        mask_pil = PILImage.fromarray((best_mask * 255).astype(np.uint8))
+        # Run 3D reconstruction
         print("Reconstructing 3D...")
+        result = inference(image=pil_image, mask=mask_pil)
+        if result is None:
             return None, preview, "⚠️ 3D reconstruction failed"
+        # Export as PLY (gaussian splat format)
         output_dir = tempfile.mkdtemp()
+        ply_path = f"{output_dir}/object_{uuid.uuid4().hex[:8]}.ply"
+        # Save the gaussian splat
+        if hasattr(result, 'save_ply'):
+            result.save_ply(ply_path)
+        elif 'gaussians' in result:
+            result['gaussians'].save_ply(ply_path)
+        else:
+            # Try to extract vertices and save as point cloud
+            vertices = result.get('xyz', result.get('points', None))
+            if vertices is not None:
+                if torch.is_tensor(vertices):
+                    vertices = vertices.cpu().numpy()
+                cloud = trimesh.PointCloud(vertices)
+                cloud.export(ply_path)
+            else:
+                return None, preview, "⚠️ Could not extract 3D data"
+        return ply_path, preview, f"✓ Detected {len(masks)} objects, reconstructed largest"
     except Exception as e:
         import traceback