Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from ultralytics import YOLO | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image | |
| # Load trained YOLO11 model | |
| model_path = "best.pt" | |
| model = YOLO(model_path) | |
| # Class names | |
| CLASS_NAMES = [ | |
| "card_title", "card_art", "card_type", | |
| "card_set_symbol", "card_mana_cost", | |
| "card_oracle_text", "card_power_toughness" | |
| ] | |
| # Define inference function | |
| def segment_card(image): | |
| image = np.array(image) # Convert PIL image to NumPy array | |
| results = model(image) # Run YOLO inference | |
| # Convert to OpenCV format | |
| annotated_image = image.copy() | |
| # Dictionary to track the highest confidence detection per class | |
| best_detections = {} | |
| # Extract bounding boxes and labels | |
| for result in results: | |
| for box in result.boxes: | |
| x1, y1, x2, y2 = map(int, box.xyxy[0]) # Bounding box coordinates | |
| class_id = int(box.cls[0]) # Class index | |
| confidence = box.conf[0].item() # Confidence score | |
| # Check if this is the highest confidence detection for the class | |
| if class_id not in best_detections or confidence > best_detections[class_id]["confidence"]: | |
| best_detections[class_id] = { | |
| "bbox": (x1, y1, x2, y2), | |
| "confidence": confidence | |
| } | |
| # Draw the highest confidence detections | |
| for class_id, detection in best_detections.items(): | |
| x1, y1, x2, y2 = detection["bbox"] | |
| label = CLASS_NAMES[class_id] | |
| confidence = detection["confidence"] | |
| # Draw bounding box **BELOW** text elements | |
| cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2) | |
| # Set text properties | |
| font = cv2.FONT_HERSHEY_SIMPLEX | |
| font_scale = 0.8 # Increased font size for better readability | |
| font_thickness = 2 | |
| label_text = f"{label} ({confidence:.2f})" | |
| # Get text size for proper background padding | |
| text_size = cv2.getTextSize(label_text, font, font_scale, font_thickness)[0] | |
| text_x, text_y = x1, y1 - 10 | |
| # Ensure text doesn't go out of bounds | |
| text_y = max(text_y, text_size[1] + 10) | |
| # Draw **filled rectangle background** for the text (above bounding box) | |
| cv2.rectangle( | |
| annotated_image, | |
| (text_x, text_y - text_size[1] - 5), | |
| (text_x + text_size[0] + 5, text_y + 5), | |
| (0, 255, 0), # Background color (Green) | |
| -1 | |
| ) | |
| # Draw the **text label above the rectangle** | |
| cv2.putText( | |
| annotated_image, | |
| label_text, | |
| (text_x, text_y), | |
| font, | |
| font_scale, | |
| (0, 0, 0), # Text color (Black for contrast) | |
| font_thickness | |
| ) | |
| return Image.fromarray(annotated_image) # Convert back to PIL Image | |
| # Create Gradio UI | |
| iface = gr.Interface( | |
| fn=segment_card, | |
| inputs=gr.Image(type="pil"), | |
| outputs=gr.Image(type="pil"), | |
| title="MTG Card Segmentation with YOLO11", | |
| description="Upload a Magic: The Gathering card image, and the model will segment key visual elements with labels. (Works best with card scans)" | |
| ) | |
| # Launch the app | |
| iface.launch() |