Spaces:

JakeTurner616
/

MTG-Card-Segmentation

Sleeping

File size: 3,236 Bytes

import gradio as gr
import torch
from ultralytics import YOLO
import cv2
import numpy as np
from PIL import Image

# Load trained YOLO11 model
model_path = "best.pt"
model = YOLO(model_path)

# Class names
CLASS_NAMES = [
    "card_title", "card_art", "card_type", 
    "card_set_symbol", "card_mana_cost", 
    "card_oracle_text", "card_power_toughness"
]

# Define inference function
def segment_card(image):
    image = np.array(image)  # Convert PIL image to NumPy array
    results = model(image)   # Run YOLO inference

    # Convert to OpenCV format
    annotated_image = image.copy()

    # Dictionary to track the highest confidence detection per class
    best_detections = {}

    # Extract bounding boxes and labels
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])  # Bounding box coordinates
            class_id = int(box.cls[0])  # Class index
            confidence = box.conf[0].item()  # Confidence score
            
            # Check if this is the highest confidence detection for the class
            if class_id not in best_detections or confidence > best_detections[class_id]["confidence"]:
                best_detections[class_id] = {
                    "bbox": (x1, y1, x2, y2),
                    "confidence": confidence
                }

    # Draw the highest confidence detections
    for class_id, detection in best_detections.items():
        x1, y1, x2, y2 = detection["bbox"]
        label = CLASS_NAMES[class_id]
        confidence = detection["confidence"]

        # Draw bounding box **BELOW** text elements
        cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)

        # Set text properties
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 0.8  # Increased font size for better readability
        font_thickness = 2
        label_text = f"{label} ({confidence:.2f})"

        # Get text size for proper background padding
        text_size = cv2.getTextSize(label_text, font, font_scale, font_thickness)[0]
        text_x, text_y = x1, y1 - 10

        # Ensure text doesn't go out of bounds
        text_y = max(text_y, text_size[1] + 10)

        # Draw **filled rectangle background** for the text (above bounding box)
        cv2.rectangle(
            annotated_image, 
            (text_x, text_y - text_size[1] - 5), 
            (text_x + text_size[0] + 5, text_y + 5), 
            (0, 255, 0),  # Background color (Green)
            -1
        )

        # Draw the **text label above the rectangle**
        cv2.putText(
            annotated_image, 
            label_text, 
            (text_x, text_y), 
            font, 
            font_scale, 
            (0, 0, 0),  # Text color (Black for contrast)
            font_thickness
        )

    return Image.fromarray(annotated_image)  # Convert back to PIL Image

# Create Gradio UI
iface = gr.Interface(
    fn=segment_card,
    inputs=gr.Image(type="pil"),
    outputs=gr.Image(type="pil"),
    title="MTG Card Segmentation with YOLO11",
    description="Upload a Magic: The Gathering card image, and the model will segment key visual elements with labels. (Works best with card scans)"
)

# Launch the app
iface.launch()