Spaces:

mmrech
/

NeuroSAM3

Running on Zero

mmrech commited on Dec 8, 2025

Commit

3233b61

1 Parent(s): adc6eda

Fix SAM 3 implementation to match official akhaliq/sam3

- Update imports: Use Sam3Processor and Sam3Model (not AutoImageProcessor/AutoModel)
- Update model loading: Use facebook/sam3 with proper torch_dtype (float16 for GPU)
- Create run_sam3_inference() helper matching official implementation
- Update all inference calls to use processor.post_process_instance_segmentation()
- Fix mask handling to work with SAM 3 output format (list of masks + scores)

Matches official implementation from: https://huggingface.co/spaces/akhaliq/sam3/blob/main/app.py

Files changed (1) hide show

app.py +207 -370

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ import torch
 import pydicom
 import numpy as np
 from PIL import Image, ImageEnhance, ImageDraw
-from transformers import AutoImageProcessor, AutoModel
 import matplotlib.pyplot as plt
 from matplotlib.patches import Rectangle
 from scipy import ndimage
@@ -46,33 +46,71 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 model = None
 processor = None
-# SAM 3 model identifier - using AutoImageProcessor/AutoModel for SAM 3
-SAM_MODEL_ID = "facebook/sam3-hiera-large"
 try:
-    processor = AutoImageProcessor.from_pretrained(SAM_MODEL_ID, token=hf_token)
-    model = AutoModel.from_pretrained(SAM_MODEL_ID, token=hf_token)
-    model = model.to(device)
     model.eval()
     print(f"✅ SAM 3 Model Loaded Successfully! ({SAM_MODEL_ID})")
 except Exception as e:
-    print(f"⚠️ Model Load Warning: {e}")
-    print("Trying alternative SAM 3 model identifier...")
     try:
-        # Fallback: try without hiera suffix
-        SAM_MODEL_ID = "facebook/sam3"
-        processor = AutoImageProcessor.from_pretrained(SAM_MODEL_ID, token=hf_token)
-        model = AutoModel.from_pretrained(SAM_MODEL_ID, token=hf_token)
-        model = model.to(device)
-        model.eval()
-        print(f"✅ SAM 3 Model Loaded Successfully! ({SAM_MODEL_ID})")
-    except Exception as e2:
-        print(f"❌ Failed to load SAM 3 model: {e2}")
-        print("Ensure you have:")
-        print("  1. transformers>=4.45.0 for SAM 3 support")
-        print("  2. Valid Hugging Face token with access to SAM 3")
-        print("  3. Sufficient memory for the model")
-        raise
 # Create Sample DICOM File for Demo
 demo_dicom_path = "demo_brain_mri.dcm"
@@ -304,90 +342,40 @@ def process_medical_image(image_file, prompt_text, modality, window_type, return
             pil_image = Image.fromarray(img_uint8.astype(np.uint8))
-        # Run SAM 3 Inference
-        try:
-            # Prepare inputs
-            inputs = processor(images=pil_image, text=prompt_text, return_tensors="pt")
-            # Move inputs to device
-            inputs = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
-            with torch.no_grad():
-                outputs = model(**inputs)
-            # Extract masks from outputs - handle different output formats
-            masks = None
-            if hasattr(outputs, 'pred_masks'):
-                masks = outputs.pred_masks
-            elif isinstance(outputs, dict):
-                # Try common mask keys
-                masks = outputs.get('pred_masks') or outputs.get('masks') or outputs.get('segmentation_masks')
-                if masks is None and len(outputs) > 0:
-                    # Get first tensor value if no standard key found
-                    first_value = list(outputs.values())[0]
-                    if isinstance(first_value, torch.Tensor) and len(first_value.shape) >= 2:
-                        masks = first_value
-            elif isinstance(outputs, (list, tuple)) and len(outputs) > 0:
-                masks = outputs[0]
-            else:
-                masks = outputs
-            # Convert to numpy and process
-            if masks is not None:
-                if isinstance(masks, torch.Tensor):
-                    masks = masks.cpu().numpy()
-                # Handle batch dimension if present
-                if len(masks.shape) == 4:  # [batch, num_masks, H, W]
-                    masks = masks[0]  # Take first batch
-                elif len(masks.shape) == 3:  # [num_masks, H, W] or [H, W, channels]
-                    if masks.shape[0] < masks.shape[-1]:  # Likely [num_masks, H, W]
-                        masks = masks  # Keep as is
-                    else:  # Likely [H, W, channels]
-                        masks = masks[..., 0] if masks.shape[-1] == 1 else masks
-                # Ensure boolean mask - threshold if needed
-                if masks.dtype != bool:
-                    if len(masks.shape) == 3:  # Multiple masks
-                        masks = masks > 0.5
-                        # Combine all masks into one
-                        masks = np.any(masks, axis=0)
-                    else:  # Single mask
-                        masks = masks > 0.5
-                results = {'masks': masks}
-            else:
-                print("⚠️ Warning: No masks found in model output")
-                results = {'masks': None}
-        except Exception as e:
-            print(f"❌ Error during model inference: {e}")
-            import traceback
-            traceback.print_exc()
             return None
-        # Draw Masks on Image
         plt.figure(figsize=(10, 10))
         plt.imshow(pil_image)
         final_mask = None
         if 'masks' in results and results['masks'] is not None:
-            masks = results['masks']
-            # Handle different mask formats
-            if isinstance(masks, np.ndarray):
-                if len(masks.shape) == 3:  # Multiple masks [num_masks, H, W]
-                    final_mask = np.any(masks, axis=0)
-                elif len(masks.shape) == 2:  # Single mask [H, W]
-                    final_mask = masks
-                else:
-                    print(f"⚠️ Warning: Unexpected mask shape: {masks.shape}")
-                    final_mask = None
-                if final_mask is not None:
                     plt.imshow(final_mask, alpha=0.5, cmap='spring')
                 else:
-                    print("⚠️ Warning: Could not process mask format.")
             else:
-                print(f"⚠️ Warning: Masks is not a numpy array: {type(masks)}")
         else:
             print("⚠️ Warning: No masks in results.")
@@ -549,90 +537,39 @@ def process_medical_image_enhanced(image_file, prompt_text, modality, window_typ
         enhancer = ImageEnhance.Contrast(pil_image)
         pil_image = enhancer.enhance(contrast)
-        # Run SAM 3 Inference
-        try:
-            # Prepare inputs
-            inputs = processor(images=pil_image, text=prompt_text, return_tensors="pt")
-            # Move inputs to device
-            inputs = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
-            with torch.no_grad():
-                outputs = model(**inputs)
-            # Extract masks from outputs - handle different output formats
-            masks = None
-            if hasattr(outputs, 'pred_masks'):
-                masks = outputs.pred_masks
-            elif isinstance(outputs, dict):
-                # Try common mask keys
-                masks = outputs.get('pred_masks') or outputs.get('masks') or outputs.get('segmentation_masks')
-                if masks is None and len(outputs) > 0:
-                    # Get first tensor value if no standard key found
-                    first_value = list(outputs.values())[0]
-                    if isinstance(first_value, torch.Tensor) and len(first_value.shape) >= 2:
-                        masks = first_value
-            elif isinstance(outputs, (list, tuple)) and len(outputs) > 0:
-                masks = outputs[0]
-            else:
-                masks = outputs
-            # Convert to numpy and process
-            if masks is not None:
-                if isinstance(masks, torch.Tensor):
-                    masks = masks.cpu().numpy()
-                # Handle batch dimension if present
-                if len(masks.shape) == 4:  # [batch, num_masks, H, W]
-                    masks = masks[0]  # Take first batch
-                elif len(masks.shape) == 3:  # [num_masks, H, W] or [H, W, channels]
-                    if masks.shape[0] < masks.shape[-1]:  # Likely [num_masks, H, W]
-                        masks = masks  # Keep as is
-                    else:  # Likely [H, W, channels]
-                        masks = masks[..., 0] if masks.shape[-1] == 1 else masks
-                # Ensure boolean mask - threshold if needed
-                if masks.dtype != bool:
-                    if len(masks.shape) == 3:  # Multiple masks
-                        masks = masks > 0.5
-                        # Combine all masks into one
-                        masks = np.any(masks, axis=0)
-                    else:  # Single mask
-                        masks = masks > 0.5
-                results = {'masks': masks}
-            else:
-                print("⚠️ Warning: No masks found in model output")
-                results = {'masks': None}
-        except Exception as e:
-            print(f"❌ Error during model inference: {e}")
-            import traceback
-            traceback.print_exc()
             return None
-        # Draw Masks on Image with enhanced visualization
         plt.figure(figsize=(10, 10))
         plt.imshow(pil_image)
         final_mask = None
         if 'masks' in results and results['masks'] is not None:
-            masks = results['masks']
-            # Handle different mask formats
-            if isinstance(masks, np.ndarray):
-                if len(masks.shape) == 3:  # Multiple masks [num_masks, H, W]
-                    final_mask = np.any(masks, axis=0)
-                elif len(masks.shape) == 2:  # Single mask [H, W]
-                    final_mask = masks
-                else:
-                    print(f"⚠️ Warning: Unexpected mask shape: {masks.shape}")
-                    final_mask = None
-                if final_mask is not None:
                     plt.imshow(final_mask, alpha=transparency, cmap=colormap)
                 else:
-                    print("⚠️ Warning: Could not process mask format.")
             else:
-                print(f"⚠️ Warning: Masks is not a numpy array: {type(masks)}")
         else:
             print("⚠️ Warning: No masks in results.")
@@ -925,51 +862,35 @@ def process_with_point_prompt(image_file, point_x, point_y, modality, window_typ
         point_y = max(0, min(int(point_y), h - 1))
         # Create a prompt based on the point location
-        # Use the point's neighborhood intensity as a hint for segmentation
         prompt_text = f"segment region at point"
-        # Process with SAM
-        inputs = processor(images=pil_image, text=prompt_text, return_tensors="pt")
-        inputs = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
-        with torch.no_grad():
-            outputs = model(**inputs)
-        # Extract masks
-        masks = None
-        if hasattr(outputs, 'pred_masks'):
-            masks = outputs.pred_masks
-        elif isinstance(outputs, dict):
-            masks = outputs.get('pred_masks') or outputs.get('masks')
-        if masks is not None:
-            if isinstance(masks, torch.Tensor):
-                masks = masks.cpu().numpy()
-            if len(masks.shape) == 4:
-                masks = masks[0]
-            if masks.dtype != bool:
-                masks = masks > 0.5
-            if len(masks.shape) == 3:
-                # Select mask containing the point
-                best_mask = None
-                for i in range(masks.shape[0]):
-                    mask_resized = np.array(Image.fromarray(masks[i].astype(np.uint8) * 255).resize((w, h))) > 127
-                    if mask_resized[point_y, point_x]:
-                        best_mask = mask_resized
-                        break
-                if best_mask is None:
-                    best_mask = np.any(masks, axis=0)
-                    best_mask = np.array(Image.fromarray(best_mask.astype(np.uint8) * 255).resize((w, h))) > 127
-                final_mask = best_mask
-            else:
-                final_mask = np.array(Image.fromarray(masks.astype(np.uint8) * 255).resize((w, h))) > 127
-        else:
-            final_mask = None
         # Draw result with point marker
         plt.figure(figsize=(10, 10))
@@ -1039,43 +960,30 @@ def process_with_box_prompt(image_file, x1, y1, x2, y2, modality, window_type, c
         prompt_text = "segment region in bounding box"
-        # Process with SAM
-        inputs = processor(images=pil_image, text=prompt_text, return_tensors="pt")
-        inputs = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
-        with torch.no_grad():
-            outputs = model(**inputs)
-        # Extract and filter masks by box region
-        masks = None
-        if hasattr(outputs, 'pred_masks'):
-            masks = outputs.pred_masks
-        elif isinstance(outputs, dict):
-            masks = outputs.get('pred_masks') or outputs.get('masks')
         final_mask = None
-        if masks is not None:
-            if isinstance(masks, torch.Tensor):
-                masks = masks.cpu().numpy()
-            if len(masks.shape) == 4:
-                masks = masks[0]
-            if masks.dtype != bool:
-                masks = masks > 0.5
-            if len(masks.shape) == 3:
-                combined = np.any(masks, axis=0)
-            else:
-                combined = masks
-            # Resize to image size
-            combined_resized = np.array(Image.fromarray(combined.astype(np.uint8) * 255).resize((w, h))) > 127
-            # Create box mask and intersect
-            box_mask = np.zeros((h, w), dtype=bool)
-            box_mask[y1:y2, x1:x2] = True
-            final_mask = combined_resized & box_mask
         # Draw result with box
         plt.figure(figsize=(10, 10))
@@ -1136,79 +1044,40 @@ def process_multi_mask(image_file, prompt_text, modality, window_type, num_masks
         if not prompt_text or not prompt_text.strip():
             prompt_text = "brain"
-        # Process with SAM
-        inputs = processor(images=pil_image, text=prompt_text, return_tensors="pt")
-        inputs = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
-        with torch.no_grad():
-            outputs = model(**inputs)
-        # Extract masks
-        masks = None
-        scores = None
-        if hasattr(outputs, 'pred_masks'):
-            masks = outputs.pred_masks
-        elif isinstance(outputs, dict):
-            masks = outputs.get('pred_masks') or outputs.get('masks')
-            scores = outputs.get('iou_scores') or outputs.get('scores')
         results = []
         mask_info = []
-        if masks is not None:
-            if isinstance(masks, torch.Tensor):
-                masks = masks.cpu().numpy()
-            if scores is not None and isinstance(scores, torch.Tensor):
-                scores = scores.cpu().numpy().flatten()
-            if len(masks.shape) == 4:
-                masks = masks[0]
-            if len(masks.shape) == 3:
-                num_available = masks.shape[0]
-                num_to_show = min(num_masks, num_available)
-                # Generate confidence scores if not available
-                if scores is None:
-                    scores = [1.0 / (i + 1) for i in range(num_available)]  # Simulated scores
-                colormaps = ['spring', 'cool', 'hot', 'viridis', 'plasma']
-                for i in range(num_to_show):
-                    mask = masks[i]
-                    if mask.dtype != bool:
-                        mask = mask > 0.5
-                    score = scores[i] if i < len(scores) else 0.5
-                    # Create visualization
-                    plt.figure(figsize=(8, 8))
-                    plt.imshow(pil_image)
-                    plt.imshow(mask, alpha=0.5, cmap=colormaps[i % len(colormaps)])
-                    plt.axis('off')
-                    plt.title(f"Mask {i+1} - Confidence: {score:.2%}", fontsize=12)
-                    output_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
-                    output_path = output_file.name
-                    output_file.close()
-                    plt.savefig(output_path, bbox_inches='tight', pad_inches=0, dpi=100)
-                    plt.close()
-                    results.append(output_path)
-                    mask_info.append(f"Mask {i+1}: {score:.2%} confidence, {np.sum(mask):,} pixels")
-            else:
-                # Single mask case
-                mask = masks
-                if mask.dtype != bool:
-                    mask = mask > 0.5
                 plt.figure(figsize=(8, 8))
                 plt.imshow(pil_image)
-                plt.imshow(mask, alpha=0.5, cmap='spring')
                 plt.axis('off')
-                plt.title(f"Single Mask Output", fontsize=12)
                 output_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
                 output_path = output_file.name
@@ -1218,7 +1087,7 @@ def process_multi_mask(image_file, prompt_text, modality, window_type, num_masks
                 plt.close()
                 results.append(output_path)
-                mask_info.append(f"Single mask: {np.sum(mask):,} pixels")
         status = f"✅ Generated {len(results)} mask candidate(s)"
         info = "\n".join(mask_info) if mask_info else "No mask information available"
@@ -1582,48 +1451,28 @@ def automatic_mask_generator(image_file, modality, window_type,
             progress(0.3 + 0.5 * (prompt_idx / len(prompts)), desc=f"Processing prompt: {prompt}...")
             try:
-                inputs = processor(images=pil_image, text=prompt, return_tensors="pt")
-                inputs = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
-                with torch.no_grad():
-                    outputs = model(**inputs)
-                masks = None
-                if hasattr(outputs, 'pred_masks'):
-                    masks = outputs.pred_masks
-                elif isinstance(outputs, dict):
-                    masks = outputs.get('pred_masks') or outputs.get('masks')
-                if masks is not None:
-                    if isinstance(masks, torch.Tensor):
-                        masks = masks.cpu().numpy()
-                    if len(masks.shape) == 4:
-                        masks = masks[0]
-                    if len(masks.shape) == 3:
-                        for i in range(masks.shape[0]):
-                            mask = masks[i]
-                            if mask.dtype != bool:
-                                mask = mask > 0.5
-                            # Filter by minimum area
-                            mask_area = np.sum(mask)
-                            if mask_area >= min_mask_area:
-                                # Resize mask to image size
-                                mask_resized = np.array(
-                                    Image.fromarray(mask.astype(np.uint8) * 255).resize((w, h))
-                                ) > 127
-                                all_masks.append(mask_resized)
-                                all_scores.append(mask_area)
-                    elif len(masks.shape) == 2:
-                        mask = masks
-                        if mask.dtype != bool:
-                            mask = mask > 0.5
-                        mask_area = np.sum(mask)
                         if mask_area >= min_mask_area:
                             mask_resized = np.array(
-                                Image.fromarray(mask.astype(np.uint8) * 255).resize((w, h))
                             ) > 127
                             all_masks.append(mask_resized)
                             all_scores.append(mask_area)
@@ -1781,35 +1630,23 @@ def process_with_advanced_transforms(image_file, prompt_text, modality, window_t
         if not prompt_text or not prompt_text.strip():
             prompt_text = "brain"
-        # Process with SAM
-        inputs = processor(images=pil_image, text=prompt_text, return_tensors="pt")
-        inputs = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
-        with torch.no_grad():
-            outputs = model(**inputs)
-        # Extract masks
-        masks = None
-        if hasattr(outputs, 'pred_masks'):
-            masks = outputs.pred_masks
-        elif isinstance(outputs, dict):
-            masks = outputs.get('pred_masks') or outputs.get('masks')
         final_mask = None
-        if masks is not None:
-            if isinstance(masks, torch.Tensor):
-                masks = masks.cpu().numpy()
-            if len(masks.shape) == 4:
-                masks = masks[0]
-            if masks.dtype != bool:
-                masks = masks > 0.5
-            if len(masks.shape) == 3:
-                final_mask = np.any(masks, axis=0)
-            else:
-                final_mask = masks
         # Visualize
         plt.figure(figsize=(12, 6))

 import pydicom
 import numpy as np
 from PIL import Image, ImageEnhance, ImageDraw
+from transformers import Sam3Processor, Sam3Model
 import matplotlib.pyplot as plt
 from matplotlib.patches import Rectangle
 from scipy import ndimage
 model = None
 processor = None
+# SAM 3 model identifier - matching official implementation
+SAM_MODEL_ID = "facebook/sam3"
 try:
+    # Load model with proper dtype (float16 for GPU, float32 for CPU) - matching official implementation
+    model = Sam3Model.from_pretrained(
+        SAM_MODEL_ID,
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+        token=hf_token
+    ).to(device)
+    processor = Sam3Processor.from_pretrained(SAM_MODEL_ID, token=hf_token)
     model.eval()
     print(f"✅ SAM 3 Model Loaded Successfully! ({SAM_MODEL_ID})")
 except Exception as e:
+    print(f"❌ Failed to load SAM 3 model: {e}")
+    print("Ensure you have:")
+    print("  1. transformers>=4.45.0 for SAM 3 support")
+    print("  2. Valid Hugging Face token with access to SAM 3")
+    print("  3. Sufficient memory for the model")
+    raise
+def run_sam3_inference(pil_image, prompt_text, threshold=0.5, mask_threshold=0.5):
+    """
+    Run SAM 3 inference - matching official implementation from akhaliq/sam3.
+    Args:
+        pil_image: PIL Image to segment
+        prompt_text: Text prompt for segmentation
+        threshold: Detection threshold (higher = fewer detections)
+        mask_threshold: Mask threshold (higher = sharper masks)
+    Returns:
+        results dict with 'masks' and 'scores' keys, or None if failed
+    """
+    if model is None or processor is None:
+        print("❌ Model not loaded")
+        return None
     try:
+        # Prepare inputs - matching official implementation
+        inputs = processor(images=pil_image, text=prompt_text.strip(), return_tensors="pt").to(device)
+        # Convert float32 inputs to model dtype (float16 for GPU) - matching official implementation
+        for key in inputs:
+            if isinstance(inputs[key], torch.Tensor) and inputs[key].dtype == torch.float32:
+                inputs[key] = inputs[key].to(model.dtype)
+        with torch.no_grad():
+            outputs = model(**inputs)
+        # Post-process using processor method - matching official implementation
+        results = processor.post_process_instance_segmentation(
+            outputs,
+            threshold=threshold,
+            mask_threshold=mask_threshold,
+            target_sizes=inputs.get("original_sizes").tolist() if "original_sizes" in inputs else [pil_image.size[::-1]]
+        )[0]  # Get first batch result
+        return results
+    except Exception as e:
+        print(f"❌ Error during SAM 3 inference: {e}")
+        import traceback
+        traceback.print_exc()
+        return None
 # Create Sample DICOM File for Demo
 demo_dicom_path = "demo_brain_mri.dcm"
             pil_image = Image.fromarray(img_uint8.astype(np.uint8))
+        # Run SAM 3 Inference - using helper function matching official implementation
+        results = run_sam3_inference(pil_image, prompt_text, threshold=0.5, mask_threshold=0.5)
+        if results is None:
             return None
+        # Draw Masks on Image - matching official implementation format
         plt.figure(figsize=(10, 10))
         plt.imshow(pil_image)
         final_mask = None
         if 'masks' in results and results['masks'] is not None:
+            masks = results['masks']  # List of mask tensors from post_process_instance_segmentation
+            scores = results.get('scores', [])
+            if len(masks) > 0:
+                # Combine all masks into one (or use first mask)
+                # Convert tensors to numpy and combine
+                mask_arrays = []
+                for mask in masks:
+                    if isinstance(mask, torch.Tensor):
+                        mask_np = mask.cpu().numpy()
+                    else:
+                        mask_np = np.array(mask)
+                    mask_arrays.append(mask_np)
+                # Combine all masks
+                if len(mask_arrays) > 0:
+                    final_mask = np.any(mask_arrays, axis=0)
                     plt.imshow(final_mask, alpha=0.5, cmap='spring')
                 else:
+                    print("⚠️ Warning: No valid masks found.")
             else:
+                print("⚠️ Warning: No masks in results.")
         else:
             print("⚠️ Warning: No masks in results.")
         enhancer = ImageEnhance.Contrast(pil_image)
         pil_image = enhancer.enhance(contrast)
+        # Run SAM 3 Inference - using helper function matching official implementation
+        results = run_sam3_inference(pil_image, prompt_text, threshold=0.5, mask_threshold=0.5)
+        if results is None:
             return None
+        # Draw Masks on Image with enhanced visualization - matching official implementation format
         plt.figure(figsize=(10, 10))
         plt.imshow(pil_image)
         final_mask = None
         if 'masks' in results and results['masks'] is not None:
+            masks = results['masks']  # List of mask tensors from post_process_instance_segmentation
+            scores = results.get('scores', [])
+            if len(masks) > 0:
+                # Combine all masks into one
+                mask_arrays = []
+                for mask in masks:
+                    if isinstance(mask, torch.Tensor):
+                        mask_np = mask.cpu().numpy()
+                    else:
+                        mask_np = np.array(mask)
+                    mask_arrays.append(mask_np)
+                # Combine all masks
+                if len(mask_arrays) > 0:
+                    final_mask = np.any(mask_arrays, axis=0)
                     plt.imshow(final_mask, alpha=transparency, cmap=colormap)
                 else:
+                    print("⚠️ Warning: No valid masks found.")
             else:
+                print("⚠️ Warning: No masks in results.")
         else:
             print("⚠️ Warning: No masks in results.")
         point_y = max(0, min(int(point_y), h - 1))
         # Create a prompt based on the point location
         prompt_text = f"segment region at point"
+        # Process with SAM 3 - using helper function
+        results = run_sam3_inference(pil_image, prompt_text, threshold=0.5, mask_threshold=0.5)
+        final_mask = None
+        if results and 'masks' in results and results['masks'] is not None:
+            masks = results['masks']
+            # Select mask containing the point
+            for mask in masks:
+                if isinstance(mask, torch.Tensor):
+                    mask_np = mask.cpu().numpy()
+                else:
+                    mask_np = np.array(mask)
+                # Resize to image size
+                mask_resized = np.array(Image.fromarray((mask_np * 255).astype(np.uint8)).resize((w, h))) > 127
+                if mask_resized[point_y, point_x]:
+                    final_mask = mask_resized
+                    break
+            # If no mask contains the point, use first mask
+            if final_mask is None and len(masks) > 0:
+                mask = masks[0]
+                if isinstance(mask, torch.Tensor):
+                    mask_np = mask.cpu().numpy()
+                else:
+                    mask_np = np.array(mask)
+                final_mask = np.array(Image.fromarray((mask_np * 255).astype(np.uint8)).resize((w, h))) > 127
         # Draw result with point marker
         plt.figure(figsize=(10, 10))
         prompt_text = "segment region in bounding box"
+        # Process with SAM 3 - using helper function
+        results = run_sam3_inference(pil_image, prompt_text, threshold=0.5, mask_threshold=0.5)
         final_mask = None
+        if results and 'masks' in results and results['masks'] is not None:
+            masks = results['masks']
+            # Combine all masks
+            mask_arrays = []
+            for mask in masks:
+                if isinstance(mask, torch.Tensor):
+                    mask_np = mask.cpu().numpy()
+                else:
+                    mask_np = np.array(mask)
+                # Resize to image size
+                mask_resized = np.array(Image.fromarray((mask_np * 255).astype(np.uint8)).resize((w, h))) > 127
+                mask_arrays.append(mask_resized)
+            if len(mask_arrays) > 0:
+                combined = np.any(mask_arrays, axis=0)
+                # Create box mask and intersect
+                box_mask = np.zeros((h, w), dtype=bool)
+                box_mask[y1:y2, x1:x2] = True
+                final_mask = combined & box_mask
         # Draw result with box
         plt.figure(figsize=(10, 10))
         if not prompt_text or not prompt_text.strip():
             prompt_text = "brain"
+        # Process with SAM 3 - using helper function
+        sam_results = run_sam3_inference(pil_image, prompt_text, threshold=0.5, mask_threshold=0.5)
         results = []
         mask_info = []
+        if sam_results and 'masks' in sam_results and sam_results['masks'] is not None:
+            masks = sam_results['masks']  # List of mask tensors
+            scores = sam_results.get('scores', [])  # List of scores
+            num_available = len(masks)
+            num_to_show = min(num_masks, num_available)
+            colormaps = ['spring', 'cool', 'hot', 'viridis', 'plasma']
+            for i in range(num_to_show):
+                mask = masks[i]
+                if isinstance(mask, torch.Tensor):
+                    mask_np = mask.cpu().numpy()
+                else:
+                    mask_np = np.array(mask)
+                # Convert to boolean
+                if mask_np.dtype != bool:
+                    mask_np = mask_np > 0.5
+                score = scores[i].item() if i < len(scores) and isinstance(scores[i], torch.Tensor) else (scores[i] if i < len(scores) else 0.5)
+                # Create visualization
                 plt.figure(figsize=(8, 8))
                 plt.imshow(pil_image)
+                plt.imshow(mask_np, alpha=0.5, cmap=colormaps[i % len(colormaps)])
                 plt.axis('off')
+                plt.title(f"Mask {i+1} - Confidence: {score:.2%}", fontsize=12)
                 output_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
                 output_path = output_file.name
                 plt.close()
                 results.append(output_path)
+                mask_info.append(f"Mask {i+1}: {score:.2%} confidence, {np.sum(mask_np):,} pixels")
         status = f"✅ Generated {len(results)} mask candidate(s)"
         info = "\n".join(mask_info) if mask_info else "No mask information available"
             progress(0.3 + 0.5 * (prompt_idx / len(prompts)), desc=f"Processing prompt: {prompt}...")
             try:
+                # Process with SAM 3 - using helper function
+                sam_results = run_sam3_inference(pil_image, prompt, threshold=0.5, mask_threshold=0.5)
+                if sam_results and 'masks' in sam_results and sam_results['masks'] is not None:
+                    masks = sam_results['masks']  # List of mask tensors
+                    for mask in masks:
+                        if isinstance(mask, torch.Tensor):
+                            mask_np = mask.cpu().numpy()
+                        else:
+                            mask_np = np.array(mask)
+                        # Convert to boolean
+                        if mask_np.dtype != bool:
+                            mask_np = mask_np > 0.5
+                        # Filter by minimum area
+                        mask_area = np.sum(mask_np)
                         if mask_area >= min_mask_area:
+                            # Resize mask to image size
                             mask_resized = np.array(
+                                Image.fromarray((mask_np * 255).astype(np.uint8)).resize((w, h))
                             ) > 127
                             all_masks.append(mask_resized)
                             all_scores.append(mask_area)
         if not prompt_text or not prompt_text.strip():
             prompt_text = "brain"
+        # Process with SAM 3 - using helper function
+        results = run_sam3_inference(pil_image, prompt_text, threshold=0.5, mask_threshold=0.5)
         final_mask = None
+        if results and 'masks' in results and results['masks'] is not None:
+            masks = results['masks']
+            # Combine all masks
+            mask_arrays = []
+            for mask in masks:
+                if isinstance(mask, torch.Tensor):
+                    mask_np = mask.cpu().numpy()
+                else:
+                    mask_np = np.array(mask)
+                mask_arrays.append(mask_np)
+            if len(mask_arrays) > 0:
+                final_mask = np.any(mask_arrays, axis=0)
         # Visualize
         plt.figure(figsize=(12, 6))