medsam-inference

Runtime error

App Files Files Community

Anigor66 commited on Dec 16, 2025

Commit

d61084a

1 Parent(s): 57b395c

Use MedSAM for both interactive and automatic mask generation

Browse files

Files changed (1) hide show

app.py +202 -8

app.py CHANGED Viewed

@@ -13,13 +13,13 @@ import json
 import base64
 # Import MedSAM components
-from segment_anything import sam_model_registry, SamPredictor
 # Initialize model
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
-# Load your MedSAM model
 MODEL_CHECKPOINT = "medsam_vit_b.pth"
 MODEL_TYPE = "vit_b"
@@ -35,11 +35,30 @@ def patched_torch_load(f, *args, **kwargs):
 torch.load = patched_torch_load
 try:
-    sam = sam_model_registry[MODEL_TYPE](checkpoint=MODEL_CHECKPOINT)
-    sam.to(device=device)
-    sam.eval()
-    predictor = SamPredictor(sam)
-    print("✓ MedSAM model loaded successfully!")
 finally:
     # Restore original torch.load
     torch.load = original_torch_load
@@ -324,6 +343,118 @@ def segment_multiple_boxes(image, request_json):
         })
 # =============================================================================
 # LEGACY API FUNCTIONS (kept for backwards compatibility with test scripts)
 # =============================================================================
@@ -670,7 +801,70 @@ with gr.Blocks(title="MedSAM Inference API") as demo:
                 api_name="segment_with_box"  # Keep old API name for compatibility
             )
-        # Tab 5: Simple UI Interface
         with gr.Tab("Simple Interface"):
             gr.Markdown("## Click-based Segmentation")
             gr.Markdown("Enter X, Y coordinates to segment")

 import base64
 # Import MedSAM components
+from segment_anything import sam_model_registry, SamPredictor, SamAutomaticMaskGenerator
 # Initialize model
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
+# Model configuration - using MedSAM (vit_b) for both interactive and automatic segmentation
 MODEL_CHECKPOINT = "medsam_vit_b.pth"
 MODEL_TYPE = "vit_b"
 torch.load = patched_torch_load
 try:
+    # Load MedSAM model (vit_b) - used for both interactive and automatic segmentation
+    print(f"Loading MedSAM model ({MODEL_TYPE})...")
+sam = sam_model_registry[MODEL_TYPE](checkpoint=MODEL_CHECKPOINT)
+sam.to(device=device)
+sam.eval()
+    # SamPredictor for interactive segmentation (point/box prompts)
+predictor = SamPredictor(sam)
+    print("✓ SamPredictor initialized for interactive segmentation")
+    # SamAutomaticMaskGenerator for automatic mask generation
+    # Uses the same model but with automatic grid-based prompting
+    mask_generator = SamAutomaticMaskGenerator(
+        model=sam,
+        points_per_side=32,  # Grid density (32x32 = 1024 points)
+        pred_iou_thresh=0.88,  # IoU threshold for filtering
+        stability_score_thresh=0.95,  # Stability threshold
+        crop_n_layers=1,  # Number of crop layers for multi-scale
+        crop_n_points_downscale_factor=2,  # Downscale factor for crops
+        min_mask_region_area=100  # Minimum mask area in pixels
+    )
+    print("✓ SamAutomaticMaskGenerator initialized for automatic segmentation")
+print("✓ MedSAM model loaded successfully!")
 finally:
     # Restore original torch.load
     torch.load = original_torch_load
         })
+# =============================================================================
+# AUTO MASK GENERATION API (replaces local mask_generator.generate())
+# =============================================================================
+def generate_auto_masks(image, request_json):
+    """
+    Automatically generate all masks for an image using SAM-H model.
+    This is equivalent to `mask_generator.generate(img_np)` in enhanced_preprocessing.py
+    Args:
+        image: PIL Image
+        request_json: JSON string with optional parameters:
+            {
+                "points_per_side": 32,  # Grid density (default: 32)
+                "pred_iou_thresh": 0.88,  # IoU threshold (default: 0.88)
+                "stability_score_thresh": 0.95,  # Stability threshold (default: 0.95)
+                "min_mask_region_area": 0  # Minimum mask area (default: 0)
+            }
+    Returns:
+        JSON string with format matching SamAutomaticMaskGenerator output:
+        {
+            "success": true,
+            "masks": [
+                {
+                    "segmentation": [[...2D boolean array...]],
+                    "area": 12345,
+                    "bbox": [x, y, width, height],
+                    "predicted_iou": 0.95,
+                    "point_coords": [[x, y]],
+                    "stability_score": 0.98,
+                    "crop_box": [x, y, width, height]
+                },
+                ...
+            ],
+            "num_masks": 42,
+            "image_size": [height, width]
+        }
+    """
+    try:
+        if mask_generator is None:
+            return json.dumps({
+                'success': False,
+                'error': 'MedSAM model not loaded. Please ensure medsam_vit_b.pth is available.',
+                'available': False
+            })
+        # Parse optional parameters
+        params = {}
+        if request_json:
+            try:
+                params = json.loads(request_json) if request_json.strip() else {}
+            except:
+                params = {}
+        # Convert PIL to numpy
+        image_array = np.array(image)
+        H, W = image_array.shape[:2]
+        print(f"Generating automatic masks for image of size {W}x{H}...")
+        # Generate masks using SAM automatic mask generator
+        masks = mask_generator.generate(image_array)
+        print(f"Generated {len(masks)} masks")
+        # Convert masks to JSON-serializable format
+        masks_output = []
+        for m in masks:
+            mask_data = {
+                'segmentation': m['segmentation'].astype(np.uint8).tolist(),
+                'area': int(m['area']),
+                'bbox': [int(x) for x in m['bbox']],  # [x, y, width, height]
+                'predicted_iou': float(m['predicted_iou']),
+                'point_coords': [[int(p[0]), int(p[1])] for p in m['point_coords']] if m['point_coords'] is not None else [],
+                'stability_score': float(m['stability_score']),
+                'crop_box': [int(x) for x in m['crop_box']]  # [x, y, width, height]
+            }
+            masks_output.append(mask_data)
+        result = {
+            'success': True,
+            'masks': masks_output,
+            'num_masks': len(masks_output),
+            'image_size': [H, W]
+        }
+        print(f"Auto mask generation complete: {len(masks_output)} masks")
+        return json.dumps(result)
+    except Exception as e:
+        import traceback
+        return json.dumps({
+            'success': False,
+            'error': str(e),
+            'traceback': traceback.format_exc()
+        })
+def check_auto_mask_status():
+    """
+    Check if automatic mask generation is available
+    """
+    return json.dumps({
+        'available': mask_generator is not None,
+        'model': 'medsam_vit_b' if mask_generator else None,
+        'model_type': MODEL_TYPE,
+        'device': str(device)
+    })
 # =============================================================================
 # LEGACY API FUNCTIONS (kept for backwards compatibility with test scripts)
 # =============================================================================
                 api_name="segment_with_box"  # Keep old API name for compatibility
             )
+        # Tab 5: Auto Mask Generation (for preprocessing)
+        with gr.Tab("Auto Mask Generation"):
+            gr.Markdown("""
+            ## Automatic Mask Generation (MedSAM)
+            **Replaces `mask_generator.generate(img_np)` in preprocessing pipeline**
+            Uses MedSAM (ViT-B) model with `SamAutomaticMaskGenerator` to automatically
+            segment all objects in an image. This is used for initial preprocessing
+            of scientific/medical images.
+            Uses the same `medsam_vit_b.pth` model as interactive segmentation.
+            **Output Format:**
+            ```json
+            {
+                "success": true,
+                "masks": [
+                    {
+                        "segmentation": [[...2D array...]],
+                        "area": 12345,
+                        "bbox": [x, y, width, height],
+                        "predicted_iou": 0.95,
+                        "point_coords": [[x, y]],
+                        "stability_score": 0.98,
+                        "crop_box": [x, y, width, height]
+                    }
+                ],
+                "num_masks": 42
+            }
+            ```
+            """)
+            with gr.Row():
+                with gr.Column():
+                    auto_image = gr.Image(type="pil", label="Input Image")
+                    auto_params = gr.Textbox(
+                        label="Parameters (optional)",
+                        placeholder='{"points_per_side": 32, "pred_iou_thresh": 0.88}',
+                        lines=2
+                    )
+                    with gr.Row():
+                        auto_button = gr.Button("Generate All Masks", variant="primary")
+                        status_button = gr.Button("Check Status", variant="secondary")
+                with gr.Column():
+                    auto_output = gr.Textbox(label="Result JSON", lines=20)
+                    status_output = gr.Textbox(label="Status", lines=3)
+            auto_button.click(
+                fn=generate_auto_masks,
+                inputs=[auto_image, auto_params],
+                outputs=auto_output,
+                api_name="generate_auto_masks"
+            )
+            status_button.click(
+                fn=check_auto_mask_status,
+                inputs=[],
+                outputs=status_output,
+                api_name="check_auto_mask_status"
+            )
+        # Tab 6: Simple UI Interface
         with gr.Tab("Simple Interface"):
             gr.Markdown("## Click-based Segmentation")
             gr.Markdown("Enter X, Y coordinates to segment")