# -*- coding: utf-8 -*- """Judol Gradio YOLO11.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1oiuTAi-cys1ydtUhSDJSRdeA02mAmZQH """ !pip install ultralytics !pip install gradio import cv2 from ultralytics import YOLO import gradio as gr import imageio from google.colab import drive model = YOLO('https://huggingface.co/JrEasy/Judol-Detection-YOLO11/resolve/main/best.pt') confidence_threshold = 0.6 class_names = { 0: "BK8", 1: "Gate of Olympus", 2: "Princess", 3: "Starlight Princess", 4: "Zeus", } class_colors = { 0: (0, 255, 0), # Green for BK8 1: (255, 0, 0), # Blue for Gate of Olympus 2: (0, 0, 255), # Red for Princess 3: (255, 255, 0), # Cyan for Starlight Princess 4: (255, 0, 255), # Magenta for Zeus } def format_time_ranges(timestamps, classes): if not timestamps: return "" class_timestamps = {} for timestamp, class_id in zip(timestamps, classes): class_name = class_names.get(class_id, 'Unknown') if class_name not in class_timestamps: class_timestamps[class_name] = [] class_timestamps[class_name].append(timestamp) formatted_ranges = [] for class_name, timestamps in class_timestamps.items(): timestamps = sorted(timestamps) ranges = [] start = timestamps[0] for i in range(1, len(timestamps)): if timestamps[i] - timestamps[i - 1] <= 1: continue else: ranges.append(f"{int(start)}-{int(timestamps[i - 1])}") start = timestamps[i] ranges.append(f"{int(start)}-{int(timestamps[-1])}") formatted_ranges.append(f"{class_name} = {', '.join(ranges)}") return ", ".join(formatted_ranges) def process_video(input_video): cap = cv2.VideoCapture(input_video) if not cap.isOpened(): print("Error: Could not open input video.") return None, [] fps = cap.get(cv2.CAP_PROP_FPS) output_video_path = "/content/drive/MyDrive/Computer Vision YOLO-Judol Detection/processed_video.mp4" writer = imageio.get_writer(output_video_path, fps=fps, codec="libx264") frame_count = 0 timestamps = [] classes_detected = [] while cap.isOpened(): ret, frame = cap.read() if not ret: break timestamp = frame_count / fps frame_count += 1 gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) input_frame = cv2.merge([gray_frame, gray_frame, gray_frame]) results = model.predict(input_frame) for result in results: for box in result.boxes: if box.conf[0] >= confidence_threshold: x1, y1, x2, y2 = map(int, box.xyxy[0]) class_id = int(box.cls[0]) class_name = class_names.get(class_id, f"Class {class_id}") color = class_colors.get(class_id, (0, 255, 0)) cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2) text = f'{class_name}, Conf: {box.conf[0]:.2f}' text_position = (x1, y1 - 10 if y1 > 20 else y1 + 20) cv2.putText(frame, text, text_position, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) timestamps.append(timestamp) classes_detected.append(class_id) writer.append_data(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) cap.release() writer.close() formatted_time_ranges = format_time_ranges(timestamps, classes_detected) print(f"Processed video saved at: {output_video_path}") return output_video_path, formatted_time_ranges def process_image(input_image): # Convert image from RGB to BGR for OpenCV processing bgr_frame = cv2.cvtColor(input_image, cv2.COLOR_RGB2BGR) # Convert to grayscale and create a 3-channel grayscale image gray_frame = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2GRAY) input_frame = cv2.merge([gray_frame, gray_frame, gray_frame]) # Run the model on the processed input results = model.predict(input_frame) detections_log = [] # Store detection logs classes_detected = [] # Track detected class IDs for result in results: for box in result.boxes: if box.conf[0] >= confidence_threshold: # Filter by confidence x1, y1, x2, y2 = map(int, box.xyxy[0]) # Bounding box coordinates class_id = int(box.cls[0]) # Class ID class_name = class_names.get(class_id, f"Class {class_id}") color = class_colors.get(class_id, (0, 255, 0)) # Default green color # Draw bounding box and class text on the frame cv2.rectangle(bgr_frame, (x1, y1), (x2, y2), color, 2) text = f'{class_name}, Conf: {box.conf[0]:.2f}' text_position = (x1, y1 - 10 if y1 > 20 else y1 + 20) cv2.putText(bgr_frame, text, text_position, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) # Log detection information detections_log.append({ "class": class_name, "confidence": box.conf[0] }) classes_detected.append(class_id) # Count occurrences of each class detected class_count = {class_names.get(cls, f"Class {cls}"): classes_detected.count(cls) for cls in set(classes_detected)} # Format the detections as 'Class = Count' pairs formatted_log = ", ".join([f"{class_name} = {count}" for class_name, count in class_count.items()]) # Convert the output frame back to RGB output_image = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB) return output_image, formatted_log with gr.Blocks() as app: gr.Markdown("## Judol Detection using YOLOv11") with gr.Tab("Video Detection"): with gr.Row(): input_video = gr.Video(label="Upload a video") output_video = gr.Video(label="Processed Video") detections_log = gr.Textbox(label="Detections Log", lines=10) input_video.change( fn=lambda input_video: process_video(input_video) if input_video else ("", []), inputs=input_video, outputs=[output_video, detections_log], ) with gr.Tab("Image Detection"): with gr.Row(): input_image = gr.Image(label="Upload an image") output_image = gr.Image(label="Processed Image") image_detections_log = gr.Textbox(label="Detections Log", lines=10) input_image.change( fn=process_image, inputs=input_image, outputs=[output_image, image_detections_log], ) app.launch()