import os
os.system("pip install ultralytics")
from ultralytics import YOLO

import cv2
import gradio as gr
import numpy as np
from ultralytics import YOLO

# Load the YOLOv8 model
model = YOLO("yolov8n.pt")  # Using pre-trained YOLOv8 nano model

# Object classes in YOLOv8
CLASS_NAMES = model.names
HUMAN_CLASS_ID = 0  # Class ID for "person" in YOLO

def vid_inf(vid_path, contour_thresh):
    cap = cv2.VideoCapture(vid_path)

    # Get the video frames' width and height
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_size = (frame_width, frame_height)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    output_video = "output_recorded.mp4"
    out = cv2.VideoWriter(output_video, fourcc, fps, frame_size)

    backSub = cv2.createBackgroundSubtractorMOG2(history=200, varThreshold=25, detectShadows=True)

    if not cap.isOpened():
        print("Error opening video file")
        return

    count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # YOLOv8 Object Detection
        results = model(frame)
        detected_boxes = []
        
        for result in results:
            for box in result.boxes:
                class_id = int(box.cls[0].item())
                conf = box.conf[0].item()
                
                if class_id != HUMAN_CLASS_ID and conf > 0.5:  # Ignore humans, detect other objects
                    x1, y1, x2, y2 = map(int, box.xyxy[0])  # Bounding box coordinates
                    detected_boxes.append((x1, y1, x2, y2))

        fg_mask = backSub.apply(frame)
        retval, mask_thresh = cv2.threshold(fg_mask, 200, 255, cv2.THRESH_BINARY)
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
        mask_eroded = cv2.morphologyEx(mask_thresh, cv2.MORPH_OPEN, kernel)

        contours, _ = cv2.findContours(mask_eroded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        min_contour_area = contour_thresh
        large_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > min_contour_area]

        frame_out = frame.copy()

        # Draw bounding boxes only on non-human moving objects
        for cnt in large_contours:
            x, y, w, h = cv2.boundingRect(cnt)
            for (x1, y1, x2, y2) in detected_boxes:
                if x > x1 and y > y1 and (x + w) < x2 and (y + h) < y2:  # Ensure it's inside an object
                    frame_out = cv2.rectangle(frame_out, (x, y), (x + w, y + h), (0, 0, 200), 3)

        frame_out_final = cv2.cvtColor(frame_out, cv2.COLOR_BGR2RGB)
        out.write(frame_out)

        if not count % 12:
            yield frame_out_final, None
        count += 1

    cap.release()
    out.release()
    cv2.destroyAllWindows()
    yield None, output_video

# Gradio interface
input_video = gr.Video(label="Input Video")
contour_thresh = gr.Slider(0, 10000, value=4, label="Contour Threshold", info="Adjust the threshold based on package size.")
output_frames = gr.Image(label="Output Frames")
output_video_file = gr.Video(label="Output Video")

app = gr.Interface(
    fn=vid_inf,
    inputs=[input_video, contour_thresh],
    outputs=[output_frames, output_video_file],
    title="Package Tracking using YOLOv8 & Motion Detection",
    description="A smart video analysis tool that uses YOLOv8 to track packages while ignoring human movement.",
    allow_flagging="never",
    examples=[["./sample/car.mp4", "1000"], ["./sample/motion_test.mp4", "5000"], ["./sample/home.mp4", "4500"]],
    cache_examples=False,
)
app.queue().launch()