import gradio as gr from PIL import Image from ultralytics import YOLO import pandas as pd import numpy as np # COCO class names COCO_CLASSES = [ 'person','bicycle','car','motorcycle','airplane','bus','train','truck','boat', 'traffic light','fire hydrant','stop sign','parking meter','bench','bird','cat', 'dog','horse','sheep','cow','elephant','bear','zebra','giraffe','backpack', 'umbrella','handbag','tie','suitcase','frisbee','skis','snowboard','sports ball', 'kite','baseball bat','baseball glove','skateboard','surfboard','tennis racket', 'bottle','wine glass','cup','fork','knife','spoon','bowl','banana','apple', 'sandwich','orange','broccoli','carrot','hot dog','pizza','donut','cake', 'chair','couch','potted plant','bed','dining table','toilet','tv','laptop', 'mouse','remote','keyboard','cell phone','microwave','oven','toaster','sink', 'refrigerator','book','clock','vase','scissors','teddy bear','hair drier', 'toothbrush' ] # Load YOLOv8 models yolo_fast = YOLO("yolov8n.pt") # fast nano yolo_acc = YOLO("yolo12n.pt") # accurate small def detect_top3_with_image(image): image_rgb = image.convert("RGB") # YOLO fast detection results_fast = yolo_fast(image_rgb)[0] boxes_fast = results_fast.boxes top_fast = sorted(zip(boxes_fast.cls.cpu().numpy(), boxes_fast.conf.cpu().numpy()), key=lambda x: x[1], reverse=True)[:3] fast_results = [f"{COCO_CLASSES[int(cls)]} ({conf*100:.1f}%)" for cls, conf in top_fast] # Convert plot to RGB fast_plot = results_fast.plot() fast_img = Image.fromarray(np.array(fast_plot)[:,:,::-1]) # BGR to RGB # YOLO accurate detection results_acc = yolo_acc(image_rgb)[0] boxes_acc = results_acc.boxes top_acc = sorted(zip(boxes_acc.cls.cpu().numpy(), boxes_acc.conf.cpu().numpy()), key=lambda x: x[1], reverse=True)[:3] acc_results = [f"{COCO_CLASSES[int(cls)]} ({conf*100:.1f}%)" for cls, conf in top_acc] # Convert plot to RGB acc_plot = results_acc.plot() acc_img = Image.fromarray(np.array(acc_plot)[:,:,::-1]) # BGR to RGB # Top-3 table df = pd.DataFrame({ "Rank": [1,2,3], "YOLOv8n": fast_results + [""]*(3-len(fast_results)), "YOLOv12n": acc_results + [""]*(3-len(acc_results)) }) return df, fast_img, acc_img iface = gr.Interface( fn=detect_top3_with_image, inputs=gr.Image(type="pil"), outputs=[ gr.Dataframe(headers=["Rank","YOLOv8n","YOLOv12n"], type="pandas", label="Top-3 Detections"), gr.Image(label="YOLOv8n Detection Output"), gr.Image(label="YOLOv12n Detection Output") ], title="Image Object Detection Validator", description="Upload an AI-generated image to see the top-3 detected objects and the visual detection output for nano version of both YOLOv8 and YOLOv12 models." ) if __name__ == "__main__": iface.launch()