File size: 3,236 Bytes
c6671de
 
 
 
 
 
 
 
945059c
c6671de
 
e4bdce6
945059c
 
 
 
 
 
e4bdce6
c6671de
945059c
 
 
 
 
c6671de
5120fbc
 
 
945059c
c6671de
 
945059c
 
e4bdce6
5120fbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f25d129
5120fbc
 
 
 
 
 
 
 
 
 
c6671de
945059c
c6671de
 
 
 
 
 
 
945059c
c6671de
 
 
f25d129
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import gradio as gr
import torch
from ultralytics import YOLO
import cv2
import numpy as np
from PIL import Image

# Load trained YOLO11 model
model_path = "best.pt"
model = YOLO(model_path)

# Class names
CLASS_NAMES = [
    "card_title", "card_art", "card_type", 
    "card_set_symbol", "card_mana_cost", 
    "card_oracle_text", "card_power_toughness"
]

# Define inference function
def segment_card(image):
    image = np.array(image)  # Convert PIL image to NumPy array
    results = model(image)   # Run YOLO inference

    # Convert to OpenCV format
    annotated_image = image.copy()

    # Dictionary to track the highest confidence detection per class
    best_detections = {}

    # Extract bounding boxes and labels
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])  # Bounding box coordinates
            class_id = int(box.cls[0])  # Class index
            confidence = box.conf[0].item()  # Confidence score
            
            # Check if this is the highest confidence detection for the class
            if class_id not in best_detections or confidence > best_detections[class_id]["confidence"]:
                best_detections[class_id] = {
                    "bbox": (x1, y1, x2, y2),
                    "confidence": confidence
                }

    # Draw the highest confidence detections
    for class_id, detection in best_detections.items():
        x1, y1, x2, y2 = detection["bbox"]
        label = CLASS_NAMES[class_id]
        confidence = detection["confidence"]

        # Draw bounding box **BELOW** text elements
        cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)

        # Set text properties
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 0.8  # Increased font size for better readability
        font_thickness = 2
        label_text = f"{label} ({confidence:.2f})"

        # Get text size for proper background padding
        text_size = cv2.getTextSize(label_text, font, font_scale, font_thickness)[0]
        text_x, text_y = x1, y1 - 10

        # Ensure text doesn't go out of bounds
        text_y = max(text_y, text_size[1] + 10)

        # Draw **filled rectangle background** for the text (above bounding box)
        cv2.rectangle(
            annotated_image, 
            (text_x, text_y - text_size[1] - 5), 
            (text_x + text_size[0] + 5, text_y + 5), 
            (0, 255, 0),  # Background color (Green)
            -1
        )

        # Draw the **text label above the rectangle**
        cv2.putText(
            annotated_image, 
            label_text, 
            (text_x, text_y), 
            font, 
            font_scale, 
            (0, 0, 0),  # Text color (Black for contrast)
            font_thickness
        )

    return Image.fromarray(annotated_image)  # Convert back to PIL Image

# Create Gradio UI
iface = gr.Interface(
    fn=segment_card,
    inputs=gr.Image(type="pil"),
    outputs=gr.Image(type="pil"),
    title="MTG Card Segmentation with YOLO11",
    description="Upload a Magic: The Gathering card image, and the model will segment key visual elements with labels. (Works best with card scans)"
)

# Launch the app
iface.launch()