# Importing Required Libraries import cv2 import torch import gradio as gr import numpy as np import tempfile import ffmpeg from ultralytics import YOLO # Load the Pretrained YOLOv8 model model = YOLO("yolov8n.pt") # Mask-related class names MASK_LABELS = ["mask", "no-mask", "person"] # Function to process images def detect_mask_in_image(image): image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) results = model(image_bgr) # Run YOLOv8 inference masked_count = 0 for result in results: for box in result.boxes: x1, y1, x2, y2 = map(int, box.xyxy[0]) conf = box.conf[0] # Confidence score cls = int(box.cls[0]) # Class index label = model.names[cls] # If the detected class is a "masked person" if label in MASK_LABELS: color = (0, 255, 0) if label == "mask" else (0, 0, 255) # Green for mask, Red for no-mask cv2.rectangle(image_bgr, (x1, y1), (x2, y2), color, 2) cv2.putText(image_bgr, f"{label} {conf:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) if label == "mask": masked_count += 1 # Count masked people # Convert back to RGB for displaying in Gradio image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) return image_rgb # Function to process videos def detect_mask_in_video(video_file): temp_input = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") temp_input.write(video_file.read()) # Save uploaded video to a temporary file temp_input.close() cap = cv2.VideoCapture(temp_input.name) frame_width, frame_height = int(cap.get(3)), int(cap.get(4)) fps = int(cap.get(cv2.CAP_PROP_FPS)) temp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Video codec out = cv2.VideoWriter(temp_output.name, fourcc, fps, (frame_width, frame_height)) total_masked = 0 while cap.isOpened(): ret, frame = cap.read() if not ret: break # Stop when video ends results = model(frame) # YOLOv8 inference masked_count = 0 for result in results: for box in result.boxes: x1, y1, x2, y2 = map(int, box.xyxy[0]) conf = box.conf[0] cls = int(box.cls[0]) label = model.names[cls] if label in MASK_LABELS: color = (0, 255, 0) if label == "mask" else (0, 0, 255) cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2) cv2.putText(frame, f"{label} {conf:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) if label == "mask": masked_count += 1 total_masked += masked_count out.write(frame) # Save processed frame cap.release() out.release() return temp_output.name # Return path of processed video # Function to Clear Inputs/Outputs def clear(): return None, None # Gradio UI with gr.Blocks() as demo: gr.Markdown("Person Detection System") gr.Markdown("Upload an **image or video** to detect individuals.") with gr.Row(): input_image = gr.Image(type="numpy", label="Upload Image") image_output = gr.Image(type="numpy", label="Processed Image") with gr.Row(): input_video = gr.File(label="Upload Video") output_video = gr.Video(label="Processed Video") with gr.Row(): process_image_button = gr.Button("Detect Person in Image", elem_id="process_image_button") process_video_button = gr.Button("Detect Person in Video", elem_id="process_video_button") clear_button = gr.Button("Clear", elem_id="clear_button") # Link buttons to functions process_image_button.click(fn=detect_mask_in_image, inputs=input_image, outputs=[image_output]) process_video_button.click(fn=detect_mask_in_video, inputs=input_video, outputs=[output_video]) clear_button.click(fn=clear, inputs=[], outputs=[input_image, image_output, input_video, output_video]) # Launch Gradio Interface for Hugging Face demo.launch()