Spaces:

JakeTurner616
/

MTG-Card-Segmentation

Sleeping

App Files Files Community

MTG-Card-Segmentation / app.py

JakeTurner616

Update app.py

5120fbc verified 10 months ago

raw

history blame contribute delete

3.24 kB

	import gradio as gr
	import torch
	from ultralytics import YOLO
	import cv2
	import numpy as np
	from PIL import Image

	# Load trained YOLO11 model
	model_path = "best.pt"
	model = YOLO(model_path)

	# Class names
	CLASS_NAMES = [
	"card_title", "card_art", "card_type",
	"card_set_symbol", "card_mana_cost",
	"card_oracle_text", "card_power_toughness"
	]

	# Define inference function
	def segment_card(image):
	image = np.array(image) # Convert PIL image to NumPy array
	results = model(image) # Run YOLO inference

	# Convert to OpenCV format
	annotated_image = image.copy()

	# Dictionary to track the highest confidence detection per class
	best_detections = {}

	# Extract bounding boxes and labels
	for result in results:
	for box in result.boxes:
	x1, y1, x2, y2 = map(int, box.xyxy[0]) # Bounding box coordinates
	class_id = int(box.cls[0]) # Class index
	confidence = box.conf[0].item() # Confidence score

	# Check if this is the highest confidence detection for the class
	if class_id not in best_detections or confidence > best_detections[class_id]["confidence"]:
	best_detections[class_id] = {
	"bbox": (x1, y1, x2, y2),
	"confidence": confidence
	}

	# Draw the highest confidence detections
	for class_id, detection in best_detections.items():
	x1, y1, x2, y2 = detection["bbox"]
	label = CLASS_NAMES[class_id]
	confidence = detection["confidence"]

	# Draw bounding box BELOW text elements
	cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)

	# Set text properties
	font = cv2.FONT_HERSHEY_SIMPLEX
	font_scale = 0.8 # Increased font size for better readability
	font_thickness = 2
	label_text = f"{label} ({confidence:.2f})"

	# Get text size for proper background padding
	text_size = cv2.getTextSize(label_text, font, font_scale, font_thickness)[0]
	text_x, text_y = x1, y1 - 10

	# Ensure text doesn't go out of bounds
	text_y = max(text_y, text_size[1] + 10)

	# Draw filled rectangle background for the text (above bounding box)
	cv2.rectangle(
	annotated_image,
	(text_x, text_y - text_size[1] - 5),
	(text_x + text_size[0] + 5, text_y + 5),
	(0, 255, 0), # Background color (Green)
	-1
	)

	# Draw the text label above the rectangle
	cv2.putText(
	annotated_image,
	label_text,
	(text_x, text_y),
	font,
	font_scale,
	(0, 0, 0), # Text color (Black for contrast)
	font_thickness
	)

	return Image.fromarray(annotated_image) # Convert back to PIL Image

	# Create Gradio UI
	iface = gr.Interface(
	fn=segment_card,
	inputs=gr.Image(type="pil"),
	outputs=gr.Image(type="pil"),
	title="MTG Card Segmentation with YOLO11",
	description="Upload a Magic: The Gathering card image, and the model will segment key visual elements with labels. (Works best with card scans)"
	)

	# Launch the app
	iface.launch()