# Importing Required Libraries
import cv2
import torch
import gradio as gr
import numpy as np
import tempfile
import ffmpeg
from ultralytics import YOLO

# Load the Pretrained YOLOv8 model
model = YOLO("yolov8n.pt")

# Mask-related class names
MASK_LABELS = ["mask", "no-mask", "person"]

# Function to process images
def detect_mask_in_image(image):
    image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    results = model(image_bgr)  # Run YOLOv8 inference

    masked_count = 0

    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            conf = box.conf[0]  # Confidence score
            cls = int(box.cls[0])  # Class index
            label = model.names[cls]

            # If the detected class is a "masked person"
            if label in MASK_LABELS:
                color = (0, 255, 0) if label == "mask" else (0, 0, 255)  # Green for mask, Red for no-mask
                cv2.rectangle(image_bgr, (x1, y1), (x2, y2), color, 2)
                cv2.putText(image_bgr, f"{label} {conf:.2f}", (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                if label == "mask":
                    masked_count += 1  # Count masked people

    # Convert back to RGB for displaying in Gradio
    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
    return image_rgb

# Function to process videos
def detect_mask_in_video(video_file):
    temp_input = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
    temp_input.write(video_file.read())  # Save uploaded video to a temporary file
    temp_input.close()

    cap = cv2.VideoCapture(temp_input.name)
    frame_width, frame_height = int(cap.get(3)), int(cap.get(4))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    temp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Video codec
    out = cv2.VideoWriter(temp_output.name, fourcc, fps, (frame_width, frame_height))

    total_masked = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break  # Stop when video ends

        results = model(frame)  # YOLOv8 inference

        masked_count = 0
        for result in results:
            for box in result.boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                conf = box.conf[0]
                cls = int(box.cls[0])
                label = model.names[cls]

                if label in MASK_LABELS:
                    color = (0, 255, 0) if label == "mask" else (0, 0, 255)
                    cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                    cv2.putText(frame, f"{label} {conf:.2f}", (x1, y1 - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                    if label == "mask":
                        masked_count += 1

        total_masked += masked_count
        out.write(frame)  # Save processed frame

    cap.release()
    out.release()

    return temp_output.name  # Return path of processed video

# Function to Clear Inputs/Outputs
def clear():
    return None, None

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("Person Detection System")
    gr.Markdown("Upload an **image or video** to detect individuals.")

    with gr.Row():
        input_image = gr.Image(type="numpy", label="Upload Image")
        image_output = gr.Image(type="numpy", label="Processed Image")

    with gr.Row():
        input_video = gr.File(label="Upload Video")
        output_video = gr.Video(label="Processed Video")

    with gr.Row():
        process_image_button = gr.Button("Detect Person in Image", elem_id="process_image_button")
        process_video_button = gr.Button("Detect Person in Video", elem_id="process_video_button")
        clear_button = gr.Button("Clear", elem_id="clear_button")

    # Link buttons to functions
    process_image_button.click(fn=detect_mask_in_image, inputs=input_image, outputs=[image_output])
    process_video_button.click(fn=detect_mask_in_video, inputs=input_video, outputs=[output_video])
    clear_button.click(fn=clear, inputs=[], outputs=[input_image, image_output, input_video, output_video])

# Launch Gradio Interface for Hugging Face
demo.launch()