Spaces:

EnginDev
/

Grounded_sam_boostly

Runtime error

App Files Files Community

EnginDev commited on Oct 14, 2025

Commit

7a701fd

verified ·

1 Parent(s): 4d6a7f4

Create app.py

Browse files

Files changed (1) hide show

app.py +270 -0

app.py ADDED Viewed

	@@ -0,0 +1,270 @@

+import gradio as gr
+import torch
+import numpy as np
+from PIL import Image
+import cv2
+from groundingdino.util.inference import Model as GroundingDINOModel
+from segment_anything import sam_model_registry, SamPredictor
+import supervision as sv
+print("🚀 Starting Grounded SAM FishBoost Edition v5.0...")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"📱 Using device: {device}")
+grounding_dino_model = None
+sam_predictor = None
+def load_models():
+    """Load Grounding DINO + SAM models"""
+    global grounding_dino_model, sam_predictor
+    if grounding_dino_model is None:
+        print("📦 Loading Grounding DINO model...")
+        grounding_dino_model = GroundingDINOModel(
+            model_config_path="GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py",
+            model_checkpoint_path="weights/groundingdino_swint_ogc.pth",
+            device=device
+        )
+        print("✅ Grounding DINO loaded!")
+    if sam_predictor is None:
+        print("📦 Loading SAM model...")
+        sam_checkpoint = "weights/sam_vit_h_4b8939.pth"
+        model_type = "vit_h"
+        sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
+        sam.to(device=device)
+        sam_predictor = SamPredictor(sam)
+        print("✅ SAM loaded!")
+def detect_fish_with_grounded_sam(image_pil, text_prompt="fish", box_threshold=0.25, text_threshold=0.25):
+    """
+    Detect and segment fish using Grounding DINO + SAM
+    Args:
+        image_pil: PIL Image
+        text_prompt: Text prompt for detection (default: "fish")
+        box_threshold: Confidence threshold for boxes
+        text_threshold: Confidence threshold for text matching
+    Returns:
+        mask: Binary mask of detected fish
+        metadata: Detection metadata
+    """
+    load_models()
+    # Convert PIL to numpy
+    image_np = np.array(image_pil)
+    # 1. Grounding DINO: Detect fish boxes
+    print(f"🔍 Detecting '{text_prompt}' with Grounding DINO...")
+    detections = grounding_dino_model.predict_with_classes(
+        image=image_np,
+        classes=[text_prompt],
+        box_threshold=box_threshold,
+        text_threshold=text_threshold
+    )
+    print(f"📦 Found {len(detections.xyxy)} boxes")
+    if len(detections.xyxy) == 0:
+        print("❌ No fish detected!")
+        return None, {
+            "success": False,
+            "mode": "grounded_sam",
+            "detection_method": "grounding_dino",
+            "fish_detected": False,
+            "reason": "No fish found in image"
+        }
+    # Select best detection (highest confidence)
+    best_idx = np.argmax(detections.confidence)
+    best_box = detections.xyxy[best_idx]
+    best_conf = float(detections.confidence[best_idx])
+    print(f"🎯 Best detection: Confidence={best_conf:.2f}, Box={best_box}")
+    # 2. SAM: Segment the detected fish
+    print("✂️ Segmenting with SAM...")
+    sam_predictor.set_image(image_np)
+    # Convert box to SAM format
+    box_np = best_box.reshape(1, 4)
+    masks, scores, _ = sam_predictor.predict(
+        box=box_np,
+        multimask_output=False
+    )
+    mask = masks[0]  # Get best mask
+    # Calculate statistics
+    mask_area = int(np.sum(mask))
+    total_pixels = mask.shape[0] * mask.shape[1]
+    mask_percentage = (mask_area / total_pixels) * 100
+    # Get contours
+    contours, _ = cv2.findContours(
+        mask.astype(np.uint8),
+        cv2.RETR_EXTERNAL,
+        cv2.CHAIN_APPROX_SIMPLE
+    )
+    # Get fish center
+    if len(contours) > 0:
+        largest_contour = max(contours, key=cv2.contourArea)
+        M = cv2.moments(largest_contour)
+        if M["m00"] != 0:
+            cx = int(M["m10"] / M["m00"])
+            cy = int(M["m01"] / M["m00"])
+        else:
+            cx, cy = int(best_box[0] + best_box[2]) // 2, int(best_box[1] + best_box[3]) // 2
+    else:
+        cx, cy = int(best_box[0] + best_box[2]) // 2, int(best_box[1] + best_box[3]) // 2
+    # Convert contours to list format
+    contour_points = []
+    if len(contours) > 0:
+        for point in contours[0][:100]:  # Limit to 100 points
+            contour_points.append({
+                "x": int(point[0][0]),
+                "y": int(point[0][1])
+            })
+    metadata = {
+        "success": True,
+        "mode": "grounded_sam",
+        "detection_method": "grounding_dino_sam",
+        "fish_detected": True,
+        "grounding_dino": {
+            "confidence": best_conf,
+            "bounding_box": [int(x) for x in best_box],
+            "text_prompt": text_prompt,
+            "total_detections": len(detections.xyxy)
+        },
+        "mask_area": mask_area,
+        "mask_percentage": mask_percentage,
+        "num_contours": len(contours),
+        "fish_center": [cx, cy],
+        "image_size": list(mask.shape),
+        "device": device,
+        "contours": contour_points
+    }
+    print(f"✅ Segmentation complete! Mask: {mask_percentage:.2f}%")
+    return mask, metadata
+def process_image(image, quality="high"):
+    """Main processing function for Gradio interface"""
+    if image is None:
+        return None, "❌ No image provided"
+    try:
+        # Convert to PIL if needed
+        if isinstance(image, np.ndarray):
+            image_pil = Image.fromarray(image)
+        else:
+            image_pil = image
+        # Resize for faster processing on CPU
+        max_size = 1024 if quality == "high" else 768
+        image_pil.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
+        # Detect and segment fish
+        mask, metadata = detect_fish_with_grounded_sam(image_pil, text_prompt="fish")
+        if mask is None:
+            return None, f"❌ No fish detected!\n\n{metadata}"
+        # Create visualization
+        image_np = np.array(image_pil)
+        # Apply green overlay on fish
+        overlay = image_np.copy()
+        overlay[mask] = [0, 255, 0]  # Green
+        result = cv2.addWeighted(image_np, 0.7, overlay, 0.3, 0)
+        # Draw bounding box
+        box = metadata["grounding_dino"]["bounding_box"]
+        cv2.rectangle(result, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 2)
+        # Add confidence text
+        conf_text = f"Fish: {metadata['grounding_dino']['confidence']:.2f}"
+        cv2.putText(result, conf_text, (box[0], box[1] - 10),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
+        # Format metadata for display
+        meta_str = f"""✅ Fish detected successfully!
+🎯 Grounding DINO
+   Confidence: {metadata['grounding_dino']['confidence']:.2%}
+   Bounding Box: {metadata['grounding_dino']['bounding_box']}
+   Detections: {metadata['grounding_dino']['total_detections']}
+✂️ SAM Segmentation
+   Mask Area: {metadata['mask_percentage']:.2f}%
+   Fish Center: {metadata['fish_center']}
+   Contours: {metadata['num_contours']}
+⚙️ System
+   Device: {metadata['device']}
+   Image Size: {metadata['image_size']}
+"""
+        return result, meta_str
+    except Exception as e:
+        print(f"❌ Error: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        return None, f"❌ Error: {str(e)}"
+# Gradio Interface
+with gr.Blocks(title="🎣 FishBoost - Grounded SAM Edition") as demo:
+    gr.Markdown("""
+    # 🎣 FishBoost - Grounded SAM Fish Detector
+    ### Powered by Grounding DINO + SAM
+    Upload an image with a fish and watch the AI detect and segment it!
+    ⚠️ **CPU Mode**: First run downloads ~680MB models (2-3 min). Processing: ~30-60 sec per image.
+    """)
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(type="pil", label="📤 Upload Fish Image")
+            quality = gr.Radio(
+                choices=["high", "medium"],
+                value="high",
+                label="🎨 Quality",
+                info="High = 1024px, Medium = 768px (faster)"
+            )
+            process_btn = gr.Button("🚀 Detect Fish", variant="primary")
+        with gr.Column():
+            output_image = gr.Image(label="🎯 Detected Fish (Green = Mask, Blue = Box)")
+            output_meta = gr.Textbox(label="📊 Detection Metadata", lines=15)
+    process_btn.click(
+        fn=process_image,
+        inputs=[input_image, quality],
+        outputs=[output_image, output_meta]
+    )
+    gr.Markdown("""
+    ---
+    ### 🔧 How it works
+    1. **Grounding DINO** finds fish bounding boxes using text prompt "fish"
+    2. **SAM** segments the exact fish shape within the box
+    3. **Result**: Precise fish mask ignoring angler/background
+    ### 📝 Model Info
+    - Grounding DINO: Text-prompted object detection
+    - SAM (ViT-H): High-quality segmentation
+    - Total Model Size: ~680MB
+    """)
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)