import os os.system("pip install ultralytics") from ultralytics import YOLO import cv2 import gradio as gr import numpy as np from ultralytics import YOLO # Load the YOLOv8 model model = YOLO("yolov8n.pt") # Using pre-trained YOLOv8 nano model # Object classes in YOLOv8 CLASS_NAMES = model.names HUMAN_CLASS_ID = 0 # Class ID for "person" in YOLO def vid_inf(vid_path, contour_thresh): cap = cv2.VideoCapture(vid_path) # Get the video frames' width and height frame_width = int(cap.get(3)) frame_height = int(cap.get(4)) fps = int(cap.get(cv2.CAP_PROP_FPS)) frame_size = (frame_width, frame_height) fourcc = cv2.VideoWriter_fourcc(*'mp4v') output_video = "output_recorded.mp4" out = cv2.VideoWriter(output_video, fourcc, fps, frame_size) backSub = cv2.createBackgroundSubtractorMOG2(history=200, varThreshold=25, detectShadows=True) if not cap.isOpened(): print("Error opening video file") return count = 0 while cap.isOpened(): ret, frame = cap.read() if not ret: break # YOLOv8 Object Detection results = model(frame) detected_boxes = [] for result in results: for box in result.boxes: class_id = int(box.cls[0].item()) conf = box.conf[0].item() if class_id != HUMAN_CLASS_ID and conf > 0.5: # Ignore humans, detect other objects x1, y1, x2, y2 = map(int, box.xyxy[0]) # Bounding box coordinates detected_boxes.append((x1, y1, x2, y2)) fg_mask = backSub.apply(frame) retval, mask_thresh = cv2.threshold(fg_mask, 200, 255, cv2.THRESH_BINARY) kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) mask_eroded = cv2.morphologyEx(mask_thresh, cv2.MORPH_OPEN, kernel) contours, _ = cv2.findContours(mask_eroded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) min_contour_area = contour_thresh large_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > min_contour_area] frame_out = frame.copy() # Draw bounding boxes only on non-human moving objects for cnt in large_contours: x, y, w, h = cv2.boundingRect(cnt) for (x1, y1, x2, y2) in detected_boxes: if x > x1 and y > y1 and (x + w) < x2 and (y + h) < y2: # Ensure it's inside an object frame_out = cv2.rectangle(frame_out, (x, y), (x + w, y + h), (0, 0, 200), 3) frame_out_final = cv2.cvtColor(frame_out, cv2.COLOR_BGR2RGB) out.write(frame_out) if not count % 12: yield frame_out_final, None count += 1 cap.release() out.release() cv2.destroyAllWindows() yield None, output_video # Gradio interface input_video = gr.Video(label="Input Video") contour_thresh = gr.Slider(0, 10000, value=4, label="Contour Threshold", info="Adjust the threshold based on package size.") output_frames = gr.Image(label="Output Frames") output_video_file = gr.Video(label="Output Video") app = gr.Interface( fn=vid_inf, inputs=[input_video, contour_thresh], outputs=[output_frames, output_video_file], title="Package Tracking using YOLOv8 & Motion Detection", description="A smart video analysis tool that uses YOLOv8 to track packages while ignoring human movement.", allow_flagging="never", examples=[["./sample/car.mp4", "1000"], ["./sample/motion_test.mp4", "5000"], ["./sample/home.mp4", "4500"]], cache_examples=False, ) app.queue().launch()