medsam-inference

Runtime error

App Files Files Community

Anigor66 commited on Dec 15, 2025

Commit

6b32938

1 Parent(s): f61a56b

Update API to match backend format - add segment_points, segment_box, segment_multiple_boxes

Browse files

Files changed (1) hide show

app.py +483 -158

app.py CHANGED Viewed

@@ -1,5 +1,7 @@
 """
-HuggingFace Space for MedSAM Inference with Point Prompts
 Deploy this to: https://huggingface.co/spaces/YOUR_USERNAME/medsam-inference
 """
 import gradio as gr
@@ -33,61 +35,331 @@ def patched_torch_load(f, *args, **kwargs):
 torch.load = patched_torch_load
 try:
-    sam = sam_model_registry[MODEL_TYPE](checkpoint=MODEL_CHECKPOINT)
-    sam.to(device=device)
-    sam.eval()
-    predictor = SamPredictor(sam)
-    print("✓ MedSAM model loaded successfully!")
 finally:
     # Restore original torch.load
     torch.load = original_torch_load
-def segment_with_points(image, points_json):
     """
-    Segment image with point prompts
     Args:
         image: PIL Image
-        points_json: JSON string with format:
             {
-                "coords": [[x1, y1], [x2, y2], ...],
-                "labels": [1, 0, ...],  # 1=foreground, 0=background
-                "multimask_output": true/false
             }
     Returns:
-        JSON string with masks and scores
     """
     try:
         # Parse input
         points_data = json.loads(points_json)
         coords = np.array(points_data["coords"])
         labels = np.array(points_data["labels"])
         multimask_output = points_data.get("multimask_output", True)
-        # Convert PIL to numpy
         image_array = np.array(image)
-        # Set image in predictor
         predictor.set_image(image_array)
-        # Run prediction
         masks, scores, logits = predictor.predict(
             point_coords=coords,
             point_labels=labels,
             multimask_output=multimask_output
         )
-        # Convert masks to lists (JSON serializable)
         masks_list = []
         scores_list = []
         for i, (mask, score) in enumerate(zip(masks, scores)):
-            # Convert boolean mask to uint8
             mask_uint8 = (mask * 255).astype(np.uint8)
-            # Encode mask as base64 PNG
             mask_image = Image.fromarray(mask_uint8)
             buffer = io.BytesIO()
             mask_image.save(buffer, format='PNG')
@@ -96,55 +368,37 @@ def segment_with_points(image, points_json):
             masks_list.append({
                 'mask_base64': mask_base64,
                 'mask_shape': mask.shape,
-                'mask_data': mask.tolist()  # Also include raw data for processing
             })
             scores_list.append(float(score))
-        result = {
             'success': True,
             'masks': masks_list,
             'scores': scores_list,
             'num_masks': len(masks_list)
-        }
-        return json.dumps(result)
     except Exception as e:
-        error_result = {
-            'success': False,
-            'error': str(e)
-        }
-        return json.dumps(error_result)
-def segment_with_box(image, box_json):
     """
-    Segment image with box prompt
     Args:
-        image: PIL Image
         box_json: JSON string with format:
-            {
-                "box": [x1, y1, x2, y2],  # Top-left and bottom-right corners
-                "multimask_output": true/false
-            }
-    Returns:
-        JSON string with masks and scores
     """
     try:
-        # Parse input
         box_data = json.loads(box_json)
-        box = np.array(box_data["box"])  # [x1, y1, x2, y2]
-        multimask_output = box_data.get("multimask_output", False)  # Usually False for box
-        # Convert PIL to numpy
         image_array = np.array(image)
-        # Set image in predictor
         predictor.set_image(image_array)
-        # Run prediction with box
         masks, scores, logits = predictor.predict(
             point_coords=None,
             point_labels=None,
@@ -152,15 +406,11 @@ def segment_with_box(image, box_json):
             multimask_output=multimask_output
         )
-        # Convert masks to lists (JSON serializable)
         masks_list = []
         scores_list = []
         for i, (mask, score) in enumerate(zip(masks, scores)):
-            # Convert boolean mask to uint8
             mask_uint8 = (mask * 255).astype(np.uint8)
-            # Encode mask as base64 PNG
             mask_image = Image.fromarray(mask_uint8)
             buffer = io.BytesIO()
             mask_image.save(buffer, format='PNG')
@@ -173,40 +423,25 @@ def segment_with_box(image, box_json):
             })
             scores_list.append(float(score))
-        result = {
             'success': True,
             'masks': masks_list,
             'scores': scores_list,
             'num_masks': len(masks_list),
             'box': box.tolist()
-        }
-        return json.dumps(result)
     except Exception as e:
         import traceback
-        error_result = {
             'success': False,
             'error': str(e),
             'traceback': traceback.format_exc()
-        }
-        return json.dumps(error_result)
 def segment_simple(image, x, y, label=1, multimask=True):
-    """
-    Simple single-point segmentation interface for Gradio UI
-    Args:
-        image: PIL Image
-        x: X coordinate
-        y: Y coordinate
-        label: 1 for foreground, 0 for background
-        multimask: Whether to output multiple masks
-    Returns:
-        Mask image and score
-    """
     try:
         points_json = json.dumps({
             "coords": [[int(x), int(y)]],
@@ -214,18 +449,16 @@ def segment_simple(image, x, y, label=1, multimask=True):
             "multimask_output": multimask
         })
-        result_json = segment_with_points(image, points_json)
         result = json.loads(result_json)
         if not result['success']:
             return None, f"Error: {result['error']}"
-        # Get best mask (highest score)
         best_idx = np.argmax(result['scores'])
         best_mask_base64 = result['masks'][best_idx]['mask_base64']
         best_score = result['scores'][best_idx]
-        # Decode mask
         mask_bytes = base64.b64decode(best_mask_base64)
         mask_image = Image.open(io.BytesIO(mask_bytes))
@@ -235,115 +468,209 @@ def segment_simple(image, x, y, label=1, multimask=True):
         return None, f"Error: {str(e)}"
-# Create Gradio interface with two tabs
 with gr.Blocks(title="MedSAM Inference API") as demo:
     gr.Markdown("# 🏥 MedSAM Inference API")
-    gr.Markdown("Point-based segmentation using Fine-Tuned MedSAM")
     with gr.Tabs():
-        # Tab 1: API Interface (for programmatic access)
-        with gr.Tab("API Interface"):
             gr.Markdown("""
-            ## JSON API for Programmatic Access
             **Input Format:**
             ```json
             {
-                "coords": [[x1, y1], [x2, y2]],
-                "labels": [1, 0],
-                "multimask_output": true
             }
             ```
-            **Output Format:**
             ```json
             {
                 "success": true,
-                "masks": [...],
-                "scores": [0.95, 0.88, 0.76],
-                "num_masks": 3
             }
             ```
             """)
             with gr.Row():
                 with gr.Column():
-                    api_image = gr.Image(type="pil", label="Input Image")
-                    api_points = gr.Textbox(
-                        label="Points JSON",
-                        placeholder='{"coords": [[100, 150]], "labels": [1], "multimask_output": true}',
                         lines=3
                     )
-                    api_button = gr.Button("Run Segmentation", variant="primary")
                 with gr.Column():
-                    api_output = gr.Textbox(label="Result JSON", lines=15)
-            api_button.click(
-                fn=segment_with_points,
-                inputs=[api_image, api_points],
-                outputs=api_output
             )
-            # Example
-            gr.Examples(
-                examples=[
-                    [
-                        "example_image.jpg",
-                        '{"coords": [[200, 200]], "labels": [1], "multimask_output": true}'
-                    ]
-                ],
-                inputs=[api_image, api_points]
             )
-        # Tab 2: Box-based Segmentation
-        with gr.Tab("Box Segmentation"):
             gr.Markdown("""
-            ## Box-based Segmentation
-            Segment using a bounding box (rectangle).
             **Input Format:**
             ```json
             {
-                "box": [x1, y1, x2, y2],
-                "multimask_output": false
             }
             ```
-            Where (x1, y1) is top-left corner and (x2, y2) is bottom-right corner.
             """)
             with gr.Row():
                 with gr.Column():
                     box_image = gr.Image(type="pil", label="Input Image")
-                    box_json = gr.Textbox(
-                        label="Box JSON",
-                        placeholder='{"box": [100, 100, 300, 300], "multimask_output": false}',
                         lines=3
                     )
-                    box_button = gr.Button("Run Box Segmentation", variant="primary")
                 with gr.Column():
                     box_output = gr.Textbox(label="Result JSON", lines=15)
             box_button.click(
-                fn=segment_with_box,
-                inputs=[box_image, box_json],
-                outputs=box_output
             )
-            # Example
-            gr.Examples(
-                examples=[
-                    [
-                        "example_image.jpg",
-                        '{"box": [150, 150, 350, 350], "multimask_output": false}'
-                    ]
-                ],
-                inputs=[box_image, box_json]
             )
-        # Tab 3: Simple UI Interface
         with gr.Tab("Simple Interface"):
             gr.Markdown("## Click-based Segmentation")
             gr.Markdown("Enter X, Y coordinates to segment")
@@ -378,41 +705,39 @@ with gr.Blocks(title="MedSAM Inference API") as demo:
     gr.Markdown("""
     ---
-    ### 📡 API Usage from Python
     ```python
-    import requests
     import json
-    import base64
-    from PIL import Image
-    # Your Space URL
-    API_URL = "https://YOUR_USERNAME-medsam-inference.hf.space/api/predict"
-    # Prepare image
-    with open("image.jpg", "rb") as f:
-        img_base64 = base64.b64encode(f.read()).decode()
-    # Prepare points
-    points_json = json.dumps({
-        "coords": [[150, 200]],
-        "labels": [1],
-        "multimask_output": True
-    })
-    # Call API
-    response = requests.post(
-        API_URL,
-        json={
-            "data": [
-                f"data:image/jpeg;base64,{img_base64}",
-                points_json
-            ]
-        }
     )
-    result = response.json()
-    print(result)
     ```
     """)
@@ -422,5 +747,5 @@ if __name__ == "__main__":
         server_name="0.0.0.0",
         server_port=7860,
         share=False,
-        show_error=True  # Enable verbose error reporting
     )

 """
+HuggingFace Space for MedSAM Inference
+API-compatible with Dense-Captioning-Toolkit backend
 Deploy this to: https://huggingface.co/spaces/YOUR_USERNAME/medsam-inference
 """
 import gradio as gr
 torch.load = patched_torch_load
 try:
+sam = sam_model_registry[MODEL_TYPE](checkpoint=MODEL_CHECKPOINT)
+sam.to(device=device)
+sam.eval()
+predictor = SamPredictor(sam)
+print("✓ MedSAM model loaded successfully!")
 finally:
     # Restore original torch.load
     torch.load = original_torch_load
+# =============================================================================
+# API FUNCTIONS - MATCHING BACKEND FORMAT (backend/app.py)
+# =============================================================================
+def segment_points(image, request_json):
     """
+    Segment image with point prompts - MATCHES BACKEND /api/medsam/segment_points
+    Each point gets its own small segment (converted to small bounding box).
+    This matches the backend behavior where points are converted to small boxes.
     Args:
         image: PIL Image
+        request_json: JSON string with format:
             {
+                "points": [[x1, y1], [x2, y2], ...],
+                "labels": [1, 0, ...]  # 1=foreground, 0=background
+            }
+    Returns:
+        JSON string matching backend response format:
+        {
+            "success": true,
+            "masks": [{"mask": [[...]], "confidence": 0.95}, ...],
+            "confidences": [0.95, ...],
+            "method": "medsam_points_individual"
+        }
+    """
+    try:
+        # Parse input
+        data = json.loads(request_json)
+        points = data.get("points", [])
+        labels = data.get("labels", [])
+        if not points:
+            return json.dumps({'success': False, 'error': 'At least one point is required'})
+        # Convert PIL to numpy
+        image_array = np.array(image)
+        H, W = image_array.shape[:2]
+        # Set image in predictor
+        predictor.set_image(image_array)
+        # Process each point individually (like backend does)
+        box_size = 20  # Small box size for point-based segmentation
+        masks_list = []
+        confidences_list = []
+        for i, pt in enumerate(points):
+            x, y = pt
+            # Create a small bounding box centered on the point (matching backend behavior)
+            x1 = max(0, x - box_size // 2)
+            y1 = max(0, y - box_size // 2)
+            x2 = min(W - 1, x + box_size // 2)
+            y2 = min(H - 1, y + box_size // 2)
+            bbox = np.array([x1, y1, x2, y2])
+            print(f"Processing point {i+1}/{len(points)}: ({x}, {y}) -> bbox: {bbox.tolist()}")
+            # Run prediction with box
+            masks, scores, logits = predictor.predict(
+                point_coords=None,
+                point_labels=None,
+                box=bbox,
+                multimask_output=False
+            )
+            if len(masks) > 0:
+                # Take the best mask
+                best_idx = np.argmax(scores)
+                mask = masks[best_idx]
+                score = float(scores[best_idx])
+                masks_list.append({
+                    'mask': mask.astype(np.uint8).tolist(),
+                    'confidence': score
+                })
+                confidences_list.append(score)
+                print(f"Point {i+1} segmentation successful, confidence: {score:.4f}")
+            else:
+                print(f"Point {i+1} segmentation failed")
+        if masks_list:
+            result = {
+                'success': True,
+                'masks': masks_list,
+                'confidences': confidences_list,
+                'method': 'medsam_points_individual'
+            }
+        else:
+            result = {'success': False, 'error': 'All point segmentations failed'}
+        return json.dumps(result)
+    except Exception as e:
+        import traceback
+        return json.dumps({
+            'success': False,
+            'error': str(e),
+            'traceback': traceback.format_exc()
+        })
+def segment_box(image, request_json):
+    """
+    Segment image with a single bounding box - MATCHES BACKEND /api/medsam/segment_box
+    Args:
+        image: PIL Image
+        request_json: JSON string with format:
+            {
+                "bbox": [x1, y1, x2, y2]  # Can be array or object with x1,y1,x2,y2
+            }
+    Returns:
+        JSON string matching backend response format:
+        {
+            "success": true,
+            "mask": [[...]],
+            "confidence": 0.95,
+            "method": "medsam_box"
+        }
+    """
+    try:
+        # Parse input
+        data = json.loads(request_json)
+        bbox = data.get("bbox", [])
+        # Handle both array format [x1,y1,x2,y2] and object format {x1,y1,x2,y2}
+        if isinstance(bbox, dict):
+            bbox = [bbox.get('x1', 0), bbox.get('y1', 0), bbox.get('x2', 0), bbox.get('y2', 0)]
+        if not bbox or len(bbox) != 4:
+            return json.dumps({'success': False, 'error': 'Valid bounding box required [x1, y1, x2, y2]'})
+        box = np.array(bbox)
+        # Convert PIL to numpy
+        image_array = np.array(image)
+        # Set image in predictor
+        predictor.set_image(image_array)
+        # Run prediction with box
+        masks, scores, logits = predictor.predict(
+            point_coords=None,
+            point_labels=None,
+            box=box,
+            multimask_output=False
+        )
+        if len(masks) > 0:
+            best_idx = np.argmax(scores)
+            mask = masks[best_idx]
+            score = float(scores[best_idx])
+            result = {
+                'success': True,
+                'mask': mask.astype(np.uint8).tolist(),
+                'confidence': score,
+                'method': 'medsam_box'
+            }
+        else:
+            result = {'success': False, 'error': 'Segmentation failed'}
+        return json.dumps(result)
+    except Exception as e:
+        import traceback
+        return json.dumps({
+            'success': False,
+            'error': str(e),
+            'traceback': traceback.format_exc()
+        })
+def segment_multiple_boxes(image, request_json):
+    """
+    Segment image with multiple bounding boxes - MATCHES BACKEND /api/medsam/segment_multiple_boxes
+    This is the main API endpoint used by the frontend for box-based segmentation.
+    Args:
+        image: PIL Image
+        request_json: JSON string with format:
+            {
+                "bboxes": [
+                    [x1, y1, x2, y2],  # Array format
+                    {"x1": 10, "y1": 20, "x2": 100, "y2": 200}  # Object format (also supported)
+                ]
             }
     Returns:
+        JSON string matching backend response format:
+        {
+            "success": true,
+            "masks": [{"mask": [[...]], "confidence": 0.95}, ...],
+            "confidences": [0.95, ...],
+            "method": "medsam_multiple_boxes"
+        }
     """
     try:
         # Parse input
+        data = json.loads(request_json)
+        bboxes = data.get("bboxes", [])
+        if not bboxes:
+            return json.dumps({'success': False, 'error': 'At least one bounding box is required'})
+        # Convert PIL to numpy
+        image_array = np.array(image)
+        # Set image in predictor
+        predictor.set_image(image_array)
+        print(f"Processing {len(bboxes)} boxes for segmentation")
+        masks_list = []
+        confidences_list = []
+        for i, bbox in enumerate(bboxes):
+            # Handle both array format [x1,y1,x2,y2] and object format {x1,y1,x2,y2}
+            if isinstance(bbox, dict):
+                box = np.array([
+                    bbox.get('x1', 0),
+                    bbox.get('y1', 0),
+                    bbox.get('x2', 0),
+                    bbox.get('y2', 0)
+                ])
+            else:
+                box = np.array(bbox)
+            print(f"Processing box {i+1}/{len(bboxes)}: {box.tolist()}")
+            # Run prediction with box
+            masks, scores, logits = predictor.predict(
+                point_coords=None,
+                point_labels=None,
+                box=box,
+                multimask_output=False
+            )
+            if len(masks) > 0:
+                best_idx = np.argmax(scores)
+                mask = masks[best_idx]
+                score = float(scores[best_idx])
+                masks_list.append({
+                    'mask': mask.astype(np.uint8).tolist(),
+                    'confidence': score
+                })
+                confidences_list.append(score)
+                print(f"Box {i+1} segmentation successful, confidence: {score:.4f}")
+            else:
+                print(f"Box {i+1} segmentation failed")
+        if masks_list:
+            result = {
+                'success': True,
+                'masks': masks_list,
+                'confidences': confidences_list,
+                'method': 'medsam_multiple_boxes'
+            }
+        else:
+            result = {'success': False, 'error': 'All segmentations failed'}
+        return json.dumps(result)
+    except Exception as e:
+        import traceback
+        return json.dumps({
+            'success': False,
+            'error': str(e),
+            'traceback': traceback.format_exc()
+        })
+# =============================================================================
+# LEGACY API FUNCTIONS (kept for backwards compatibility with test scripts)
+# =============================================================================
+def segment_with_points_legacy(image, points_json):
+    """
+    Legacy API - Segment with point prompts using true point-based segmentation
+    Args:
+        points_json: JSON string with format:
+            {
+                "coords": [[x1, y1], [x2, y2], ...],
+                "labels": [1, 0, ...],
+                "multimask_output": true/false
+            }
+    """
+    try:
         points_data = json.loads(points_json)
         coords = np.array(points_data["coords"])
         labels = np.array(points_data["labels"])
         multimask_output = points_data.get("multimask_output", True)
         image_array = np.array(image)
         predictor.set_image(image_array)
         masks, scores, logits = predictor.predict(
             point_coords=coords,
             point_labels=labels,
             multimask_output=multimask_output
         )
         masks_list = []
         scores_list = []
         for i, (mask, score) in enumerate(zip(masks, scores)):
             mask_uint8 = (mask * 255).astype(np.uint8)
             mask_image = Image.fromarray(mask_uint8)
             buffer = io.BytesIO()
             mask_image.save(buffer, format='PNG')
             masks_list.append({
                 'mask_base64': mask_base64,
                 'mask_shape': mask.shape,
+                'mask_data': mask.tolist()
             })
             scores_list.append(float(score))
+        return json.dumps({
             'success': True,
             'masks': masks_list,
             'scores': scores_list,
             'num_masks': len(masks_list)
+        })
     except Exception as e:
+        return json.dumps({'success': False, 'error': str(e)})
+def segment_with_box_legacy(image, box_json):
     """
+    Legacy API - Segment with box prompt
     Args:
         box_json: JSON string with format:
+            {"box": [x1, y1, x2, y2], "multimask_output": false}
     """
     try:
         box_data = json.loads(box_json)
+        box = np.array(box_data["box"])
+        multimask_output = box_data.get("multimask_output", False)
         image_array = np.array(image)
         predictor.set_image(image_array)
         masks, scores, logits = predictor.predict(
             point_coords=None,
             point_labels=None,
             multimask_output=multimask_output
         )
         masks_list = []
         scores_list = []
         for i, (mask, score) in enumerate(zip(masks, scores)):
             mask_uint8 = (mask * 255).astype(np.uint8)
             mask_image = Image.fromarray(mask_uint8)
             buffer = io.BytesIO()
             mask_image.save(buffer, format='PNG')
             })
             scores_list.append(float(score))
+        return json.dumps({
             'success': True,
             'masks': masks_list,
             'scores': scores_list,
             'num_masks': len(masks_list),
             'box': box.tolist()
+        })
     except Exception as e:
         import traceback
+        return json.dumps({
             'success': False,
             'error': str(e),
             'traceback': traceback.format_exc()
+        })
 def segment_simple(image, x, y, label=1, multimask=True):
+    """Simple single-point segmentation for Gradio UI"""
     try:
         points_json = json.dumps({
             "coords": [[int(x), int(y)]],
             "multimask_output": multimask
         })
+        result_json = segment_with_points_legacy(image, points_json)
         result = json.loads(result_json)
         if not result['success']:
             return None, f"Error: {result['error']}"
         best_idx = np.argmax(result['scores'])
         best_mask_base64 = result['masks'][best_idx]['mask_base64']
         best_score = result['scores'][best_idx]
         mask_bytes = base64.b64decode(best_mask_base64)
         mask_image = Image.open(io.BytesIO(mask_bytes))
         return None, f"Error: {str(e)}"
+# =============================================================================
+# GRADIO INTERFACE
+# =============================================================================
 with gr.Blocks(title="MedSAM Inference API") as demo:
     gr.Markdown("# 🏥 MedSAM Inference API")
+    gr.Markdown("Point and box-based segmentation using Fine-Tuned MedSAM")
+    gr.Markdown("**API-compatible with Dense-Captioning-Toolkit backend**")
     with gr.Tabs():
+        # Tab 1: Backend-Compatible API (Points)
+        with gr.Tab("Segment Points (Backend API)"):
             gr.Markdown("""
+            ## Point-based Segmentation - Backend Compatible
+            **Matches `/api/medsam/segment_points`**
+            Each point is converted to a small bounding box for segmentation.
             **Input Format:**
             ```json
             {
+                "points": [[x1, y1], [x2, y2], ...],
+                "labels": [1, 0, ...]
             }
             ```
+            **Output Format (matches backend):**
             ```json
             {
                 "success": true,
+                "masks": [{"mask": [[...]], "confidence": 0.95}, ...],
+                "confidences": [0.95, ...],
+                "method": "medsam_points_individual"
             }
             ```
             """)
             with gr.Row():
                 with gr.Column():
+                    points_image = gr.Image(type="pil", label="Input Image")
+                    points_json_input = gr.Textbox(
+                        label="Request JSON",
+                        placeholder='{"points": [[100, 150], [200, 200]], "labels": [1, 1]}',
                         lines=3
                     )
+                    points_button = gr.Button("Segment Points", variant="primary")
                 with gr.Column():
+                    points_output = gr.Textbox(label="Result JSON", lines=15)
+            points_button.click(
+                fn=segment_points,
+                inputs=[points_image, points_json_input],
+                outputs=points_output,
+                api_name="segment_points"
             )
+        # Tab 2: Backend-Compatible API (Multiple Boxes)
+        with gr.Tab("Segment Multiple Boxes (Backend API)"):
+            gr.Markdown("""
+            ## Multiple Box Segmentation - Backend Compatible
+            **Matches `/api/medsam/segment_multiple_boxes`** (main frontend API)
+            **Input Format:**
+            ```json
+            {
+                "bboxes": [
+                    [x1, y1, x2, y2],
+                    {"x1": 10, "y1": 20, "x2": 100, "y2": 200}
+                ]
+            }
+            ```
+            **Output Format (matches backend):**
+            ```json
+            {
+                "success": true,
+                "masks": [{"mask": [[...]], "confidence": 0.95}, ...],
+                "confidences": [0.95, ...],
+                "method": "medsam_multiple_boxes"
+            }
+            ```
+            """)
+            with gr.Row():
+                with gr.Column():
+                    multi_box_image = gr.Image(type="pil", label="Input Image")
+                    multi_box_json = gr.Textbox(
+                        label="Request JSON",
+                        placeholder='{"bboxes": [[100, 100, 300, 300], [400, 400, 600, 600]]}',
+                        lines=3
+                    )
+                    multi_box_button = gr.Button("Segment Multiple Boxes", variant="primary")
+                with gr.Column():
+                    multi_box_output = gr.Textbox(label="Result JSON", lines=15)
+            multi_box_button.click(
+                fn=segment_multiple_boxes,
+                inputs=[multi_box_image, multi_box_json],
+                outputs=multi_box_output,
+                api_name="segment_multiple_boxes"
             )
+        # Tab 3: Backend-Compatible API (Single Box)
+        with gr.Tab("Segment Box (Backend API)"):
             gr.Markdown("""
+            ## Single Box Segmentation - Backend Compatible
+            **Matches `/api/medsam/segment_box`**
             **Input Format:**
             ```json
             {
+                "bbox": [x1, y1, x2, y2]
+            }
+            ```
+            **Output Format (matches backend):**
+            ```json
+            {
+                "success": true,
+                "mask": [[...]],
+                "confidence": 0.95,
+                "method": "medsam_box"
             }
             ```
             """)
             with gr.Row():
                 with gr.Column():
                     box_image = gr.Image(type="pil", label="Input Image")
+                    box_json_input = gr.Textbox(
+                        label="Request JSON",
+                        placeholder='{"bbox": [100, 100, 300, 300]}',
                         lines=3
                     )
+                    box_button = gr.Button("Segment Box", variant="primary")
                 with gr.Column():
                     box_output = gr.Textbox(label="Result JSON", lines=15)
             box_button.click(
+                fn=segment_box,
+                inputs=[box_image, box_json_input],
+                outputs=box_output,
+                api_name="segment_box"
+            )
+        # Tab 4: Legacy API (for test scripts)
+        with gr.Tab("Legacy API"):
+            gr.Markdown("""
+            ## Legacy API (for backwards compatibility)
+            Original API format with `coords`, `mask_data`, `scores`, etc.
+            Use if you have existing scripts using the old format.
+            """)
+            with gr.Row():
+                with gr.Column():
+                    legacy_image = gr.Image(type="pil", label="Input Image")
+                    legacy_points = gr.Textbox(
+                        label="Points JSON (Legacy Format)",
+                        placeholder='{"coords": [[100, 150]], "labels": [1], "multimask_output": true}',
+                        lines=3
+                    )
+                    legacy_button = gr.Button("Run Segmentation (Legacy)", variant="secondary")
+                with gr.Column():
+                    legacy_output = gr.Textbox(label="Result JSON", lines=15)
+            legacy_button.click(
+                fn=segment_with_points_legacy,
+                inputs=[legacy_image, legacy_points],
+                outputs=legacy_output,
+                api_name="segment_with_points"  # Keep old API name for compatibility
             )
+            gr.Markdown("---")
+            with gr.Row():
+                with gr.Column():
+                    legacy_box_image = gr.Image(type="pil", label="Input Image")
+                    legacy_box_json = gr.Textbox(
+                        label="Box JSON (Legacy Format)",
+                        placeholder='{"box": [100, 100, 300, 300], "multimask_output": false}',
+                        lines=3
+                    )
+                    legacy_box_button = gr.Button("Run Box Segmentation (Legacy)", variant="secondary")
+                with gr.Column():
+                    legacy_box_output = gr.Textbox(label="Result JSON", lines=15)
+            legacy_box_button.click(
+                fn=segment_with_box_legacy,
+                inputs=[legacy_box_image, legacy_box_json],
+                outputs=legacy_box_output,
+                api_name="segment_with_box"  # Keep old API name for compatibility
             )
+        # Tab 5: Simple UI Interface
         with gr.Tab("Simple Interface"):
             gr.Markdown("## Click-based Segmentation")
             gr.Markdown("Enter X, Y coordinates to segment")
     gr.Markdown("""
     ---
+    ### 📡 API Usage from Python (Backend-Compatible)
     ```python
+    from gradio_client import Client, handle_file
     import json
+    client = Client("Aniketg6/medsam-inference")
+    # Point-based segmentation (matches backend format)
+    result = client.predict(
+        image=handle_file("image.jpg"),
+        request_json=json.dumps({
+            "points": [[150, 200], [300, 400]],
+            "labels": [1, 1]
+        }),
+        api_name="/segment_points"
+    )
+    # Multiple box segmentation (main frontend API)
+    result = client.predict(
+        image=handle_file("image.jpg"),
+        request_json=json.dumps({
+            "bboxes": [[100, 100, 300, 300], [400, 400, 600, 600]]
+        }),
+        api_name="/segment_multiple_boxes"
     )
+    # Parse response
+    data = json.loads(result)
+    print(f"Success: {data['success']}")
+    print(f"Masks: {len(data['masks'])}")
+    print(f"Confidences: {data['confidences']}")
+    print(f"Method: {data['method']}")
     ```
     """)
         server_name="0.0.0.0",
         server_port=7860,
         share=False,
+        show_error=True
     )