Upload app.py

dec4087 verified about 1 year ago

6.77 kB

	# -- coding: utf-8 --
	"""Judol Gradio YOLO11.ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/1oiuTAi-cys1ydtUhSDJSRdeA02mAmZQH
	"""

	!pip install ultralytics
	!pip install gradio

	import cv2
	from ultralytics import YOLO
	import gradio as gr
	import imageio
	from google.colab import drive

	model = YOLO('https://huggingface.co/JrEasy/Judol-Detection-YOLO11/resolve/main/best.pt')


	confidence_threshold = 0.6

	class_names = {
	0: "BK8",
	1: "Gate of Olympus",
	2: "Princess",
	3: "Starlight Princess",
	4: "Zeus",
	}

	class_colors = {
	0: (0, 255, 0), # Green for BK8
	1: (255, 0, 0), # Blue for Gate of Olympus
	2: (0, 0, 255), # Red for Princess
	3: (255, 255, 0), # Cyan for Starlight Princess
	4: (255, 0, 255), # Magenta for Zeus
	}

	def format_time_ranges(timestamps, classes):

	if not timestamps:
	return ""


	class_timestamps = {}

	for timestamp, class_id in zip(timestamps, classes):
	class_name = class_names.get(class_id, 'Unknown')
	if class_name not in class_timestamps:
	class_timestamps[class_name] = []
	class_timestamps[class_name].append(timestamp)


	formatted_ranges = []

	for class_name, timestamps in class_timestamps.items():
	timestamps = sorted(timestamps)
	ranges = []
	start = timestamps[0]
	for i in range(1, len(timestamps)):
	if timestamps[i] - timestamps[i - 1] <= 1:
	continue
	else:
	ranges.append(f"{int(start)}-{int(timestamps[i - 1])}")
	start = timestamps[i]

	ranges.append(f"{int(start)}-{int(timestamps[-1])}")

	formatted_ranges.append(f"{class_name} = {', '.join(ranges)}")

	return ", ".join(formatted_ranges)

	def process_video(input_video):
	cap = cv2.VideoCapture(input_video)
	if not cap.isOpened():
	print("Error: Could not open input video.")
	return None, []

	fps = cap.get(cv2.CAP_PROP_FPS)
	output_video_path = "/content/drive/MyDrive/Computer Vision YOLO-Judol Detection/processed_video.mp4"
	writer = imageio.get_writer(output_video_path, fps=fps, codec="libx264")

	frame_count = 0
	timestamps = []
	classes_detected = []

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	timestamp = frame_count / fps
	frame_count += 1

	gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
	input_frame = cv2.merge([gray_frame, gray_frame, gray_frame])

	results = model.predict(input_frame)

	for result in results:
	for box in result.boxes:
	if box.conf[0] >= confidence_threshold:
	x1, y1, x2, y2 = map(int, box.xyxy[0])
	class_id = int(box.cls[0])
	class_name = class_names.get(class_id, f"Class {class_id}")
	color = class_colors.get(class_id, (0, 255, 0))
	cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
	text = f'{class_name}, Conf: {box.conf[0]:.2f}'
	text_position = (x1, y1 - 10 if y1 > 20 else y1 + 20)
	cv2.putText(frame, text, text_position, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

	timestamps.append(timestamp)
	classes_detected.append(class_id)

	writer.append_data(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

	cap.release()
	writer.close()


	formatted_time_ranges = format_time_ranges(timestamps, classes_detected)

	print(f"Processed video saved at: {output_video_path}")

	return output_video_path, formatted_time_ranges


	def process_image(input_image):
	# Convert image from RGB to BGR for OpenCV processing
	bgr_frame = cv2.cvtColor(input_image, cv2.COLOR_RGB2BGR)

	# Convert to grayscale and create a 3-channel grayscale image
	gray_frame = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2GRAY)
	input_frame = cv2.merge([gray_frame, gray_frame, gray_frame])

	# Run the model on the processed input
	results = model.predict(input_frame)

	detections_log = [] # Store detection logs
	classes_detected = [] # Track detected class IDs

	for result in results:
	for box in result.boxes:
	if box.conf[0] >= confidence_threshold: # Filter by confidence
	x1, y1, x2, y2 = map(int, box.xyxy[0]) # Bounding box coordinates
	class_id = int(box.cls[0]) # Class ID
	class_name = class_names.get(class_id, f"Class {class_id}")
	color = class_colors.get(class_id, (0, 255, 0)) # Default green color

	# Draw bounding box and class text on the frame
	cv2.rectangle(bgr_frame, (x1, y1), (x2, y2), color, 2)
	text = f'{class_name}, Conf: {box.conf[0]:.2f}'
	text_position = (x1, y1 - 10 if y1 > 20 else y1 + 20)
	cv2.putText(bgr_frame, text, text_position, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

	# Log detection information
	detections_log.append({
	"class": class_name,
	"confidence": box.conf[0]
	})
	classes_detected.append(class_id)

	# Count occurrences of each class detected
	class_count = {class_names.get(cls, f"Class {cls}"): classes_detected.count(cls) for cls in set(classes_detected)}

	# Format the detections as 'Class = Count' pairs
	formatted_log = ", ".join([f"{class_name} = {count}" for class_name, count in class_count.items()])

	# Convert the output frame back to RGB
	output_image = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB)
	return output_image, formatted_log

	with gr.Blocks() as app:
	gr.Markdown("## Judol Detection using YOLOv11")

	with gr.Tab("Video Detection"):
	with gr.Row():
	input_video = gr.Video(label="Upload a video")
	output_video = gr.Video(label="Processed Video")
	detections_log = gr.Textbox(label="Detections Log", lines=10)

	input_video.change(
	fn=lambda input_video: process_video(input_video) if input_video else ("", []),
	inputs=input_video,
	outputs=[output_video, detections_log],
	)

	with gr.Tab("Image Detection"):
	with gr.Row():
	input_image = gr.Image(label="Upload an image")
	output_image = gr.Image(label="Processed Image")
	image_detections_log = gr.Textbox(label="Detections Log", lines=10)

	input_image.change(
	fn=process_image,
	inputs=input_image,
	outputs=[output_image, image_detections_log],
	)

	app.launch()