Spaces:

dev-bjoern
/

sam3d-objects-mcp

Running on Zero

App Files Files Community

dev-bjoern commited on Dec 8, 2025

Commit

af69327

1 Parent(s): 696b8f4

Add SAM3 for auto-segmentation, GLB export

Browse files

Files changed (2) hide show

app.py +125 -24
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
 SAM 3D Objects MCP Server
-Image + Mask → 3D Object (PLY)
 """
 import os
 import sys
@@ -33,16 +33,41 @@ if not SAM3D_PATH.exists():
 # Add to path
 sys.path.insert(0, str(SAM3D_PATH))
-# Global model
-MODEL = None
-def load_model():
     """Load SAM 3D Objects model"""
-    global MODEL
-    if MODEL is not None:
-        return MODEL
     import torch
     print("Loading SAM 3D Objects model...")
@@ -57,10 +82,57 @@ def load_model():
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    MODEL = Sam3dObjects.from_pretrained(checkpoint_dir, device=device)
-    print("✓ Model loaded")
-    return MODEL
 @spaces.GPU(duration=120)
@@ -73,17 +145,17 @@ def reconstruct_object(image: np.ndarray, mask: np.ndarray) -> tuple:
         mask: Binary mask indicating object region
     Returns:
-        tuple: (ply_path, status)
     """
     if image is None:
         return None, "❌ No image provided"
     if mask is None:
-        return None, "❌ No mask provided"
     try:
         import torch
         import trimesh
-        model = load_model()
         # Process image
         if isinstance(image, Image.Image):
@@ -104,14 +176,20 @@ def reconstruct_object(image: np.ndarray, mask: np.ndarray) -> tuple:
         if outputs is None:
             return None, "⚠️ Reconstruction failed"
-        # Export as PLY
         output_dir = tempfile.mkdtemp()
-        ply_path = f"{output_dir}/object_{uuid.uuid4().hex[:8]}.ply"
-        # Save gaussian splat as PLY
-        outputs.save_ply(ply_path)
-        return ply_path, "✓ Object reconstructed"
     except Exception as e:
         import traceback
@@ -121,19 +199,42 @@ def reconstruct_object(image: np.ndarray, mask: np.ndarray) -> tuple:
 # Gradio Interface
 with gr.Blocks(title="SAM 3D Objects MCP") as demo:
-    gr.Markdown("# 📦 SAM 3D Objects MCP Server\n**Image + Mask → 3D Object (PLY)**")
     with gr.Row():
         with gr.Column():
-            input_image = gr.Image(label="Input Image", type="numpy")
-            input_mask = gr.Image(label="Object Mask", type="numpy")
-            btn = gr.Button("🎯 Reconstruct", variant="primary")
         with gr.Column():
-            output_file = gr.File(label="3D Object (PLY)")
             status = gr.Textbox(label="Status")
-    btn.click(reconstruct_object, inputs=[input_image, input_mask], outputs=[output_file, status])
     gr.Markdown("""
     ---

 """
 SAM 3D Objects MCP Server
+Image + Click → 3D Object (GLB)
 """
 import os
 import sys
 # Add to path
 sys.path.insert(0, str(SAM3D_PATH))
+# Global models
+SAM3D_MODEL = None
+SAM_PREDICTOR = None
+def load_sam_model():
+    """Load SAM3 model for segmentation"""
+    global SAM_PREDICTOR
+    if SAM_PREDICTOR is not None:
+        return SAM_PREDICTOR
+    import torch
+    from sam3 import SAM3ImagePredictor
+    print("Loading SAM3 model...")
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    SAM_PREDICTOR = SAM3ImagePredictor.from_pretrained(
+        "facebook/sam3-hiera-large",
+        device=device,
+        token=os.environ.get("HF_TOKEN")
+    )
+    print("✓ SAM3 model loaded")
+    return SAM_PREDICTOR
+def load_sam3d_model():
     """Load SAM 3D Objects model"""
+    global SAM3D_MODEL
+    if SAM3D_MODEL is not None:
+        return SAM3D_MODEL
     import torch
     print("Loading SAM 3D Objects model...")
     device = "cuda" if torch.cuda.is_available() else "cpu"
+    SAM3D_MODEL = Sam3dObjects.from_pretrained(checkpoint_dir, device=device)
+    print("✓ SAM 3D Objects model loaded")
+    return SAM3D_MODEL
+@spaces.GPU(duration=60)
+def segment_object(image: np.ndarray, evt: gr.SelectData) -> np.ndarray:
+    """
+    Segment object at clicked point using SAM2.
+    Args:
+        image: Input RGB image
+        evt: Click event with coordinates
+    Returns:
+        Image with mask overlay
+    """
+    if image is None:
+        return None
+    try:
+        predictor = load_sam_model()
+        # Get click coordinates
+        point = np.array([[evt.index[0], evt.index[1]]])
+        label = np.array([1])  # 1 = foreground
+        # Set image
+        predictor.set_image(image)
+        # Predict mask
+        masks, scores, _ = predictor.predict(
+            point_coords=point,
+            point_labels=label,
+            multimask_output=True
+        )
+        # Use best mask
+        best_mask = masks[np.argmax(scores)]
+        # Create overlay
+        overlay = image.copy()
+        overlay[best_mask] = overlay[best_mask] * 0.5 + np.array([0, 255, 0]) * 0.5
+        return overlay, best_mask.astype(np.uint8) * 255
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        return image, None
 @spaces.GPU(duration=120)
         mask: Binary mask indicating object region
     Returns:
+        tuple: (glb_path, status)
     """
     if image is None:
         return None, "❌ No image provided"
     if mask is None:
+        return None, "❌ No mask provided - click on object first"
     try:
         import torch
         import trimesh
+        model = load_sam3d_model()
         # Process image
         if isinstance(image, Image.Image):
         if outputs is None:
             return None, "⚠️ Reconstruction failed"
+        # Export as GLB via trimesh
         output_dir = tempfile.mkdtemp()
+        glb_path = f"{output_dir}/object_{uuid.uuid4().hex[:8]}.glb"
+        # Get vertices and faces from gaussian splat
+        # Convert to mesh and export as GLB
+        vertices = outputs.get_xyz().cpu().numpy()
+        # Create point cloud mesh (gaussian splats don't have faces directly)
+        # We'll export as a point cloud GLB
+        cloud = trimesh.PointCloud(vertices)
+        cloud.export(glb_path, file_type='glb')
+        return glb_path, f"✓ Object reconstructed ({len(vertices)} points)"
     except Exception as e:
         import traceback
 # Gradio Interface
 with gr.Blocks(title="SAM 3D Objects MCP") as demo:
+    gr.Markdown("# 📦 SAM 3D Objects MCP Server\n**Click on object → 3D Reconstruction (GLB)**")
+    # State for mask
+    mask_state = gr.State(None)
     with gr.Row():
         with gr.Column():
+            input_image = gr.Image(label="Input Image (click on object)", type="numpy")
+            gr.Markdown("*Click on the object you want to reconstruct*")
         with gr.Column():
+            preview_image = gr.Image(label="Segmentation Preview", type="numpy", interactive=False)
+    with gr.Row():
+        btn = gr.Button("🎯 Reconstruct 3D", variant="primary", size="lg")
+    with gr.Row():
+        with gr.Column():
+            output_model = gr.Model3D(label="3D Object")
+            output_file = gr.File(label="Download GLB")
+        with gr.Column():
             status = gr.Textbox(label="Status")
+    # Click to segment
+    input_image.select(
+        segment_object,
+        inputs=[input_image],
+        outputs=[preview_image, mask_state]
+    )
+    # Reconstruct
+    btn.click(
+        reconstruct_object,
+        inputs=[input_image, mask_state],
+        outputs=[output_file, status]
+    )
     gr.Markdown("""
     ---

requirements.txt CHANGED Viewed

@@ -20,3 +20,4 @@ jaxtyping
 rich
 kaolin==0.17.0
 gsplat

 rich
 kaolin==0.17.0
 gsplat
+sam3 @ git+https://github.com/facebookresearch/sam3.git