Spaces:

JingsAPI
/

243263STest

Sleeping

File size: 4,024 Bytes

import gradio as gr
from ultralytics import YOLO
from PIL import Image
import numpy as np
import cv2
import tempfile
import os

# Load model
model = YOLO("best.pt")

def detect_image(image):
    # Run inference on image
    results = model(image, conf=0.5, iou=0.6)
    
    # Get annotated image
    annotated = results[0].plot()
    annotated_rgb = annotated[..., ::-1]
    
    # Get detections text
    detections = []
    for box in results[0].boxes:
        class_name = model.names[int(box.cls)]
        confidence = float(box.conf)
        detections.append(f"{class_name}: {confidence:.2f}")
    
    detection_text = "\n".join(detections) if detections else "No objects detected"
    
    return Image.fromarray(annotated_rgb), detection_text


def detect_video(video_path):
    if video_path is None:
        return None, "No video uploaded"

    # Open video
    cap = cv2.VideoCapture(video_path)
    
    # Get video properties
    original_fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # Reduce resolution for CPU (resize to 640 width)
    scale = 640 / width
    new_width = 640
    new_height = int(height * scale)
    
    # Process every 3rd frame only (reduces from 30fps to 10fps)
    # This makes it much faster on CPU
    frame_skip = 3
    output_fps = original_fps / frame_skip
    
    # Create temp output file
    temp_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
    output_path = temp_output.name
    temp_output.close()
    
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_path, fourcc, output_fps, (new_width, new_height))
    
    frame_count = 0
    processed_count = 0
    
    print(f"Processing video: {total_frames} total frames, skipping every {frame_skip} frames...")
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Only process every nth frame
        if frame_count % frame_skip == 0:
            # Resize frame to reduce resolution
            frame_resized = cv2.resize(frame, (new_width, new_height))
            
            # Run inference
            results = model(frame_resized, conf=0.5, iou=0.6, verbose=False)
            
            # Get annotated frame
            annotated_frame = results[0].plot()
            
            # Write to output
            out.write(annotated_frame)
            processed_count += 1
        
        frame_count += 1
    
    cap.release()
    out.release()
    
    return output_path, f"Done! Processed {processed_count} frames from {total_frames} total frames"


# Gradio UI
with gr.Blocks(title="243263S - Traffic Cone & Cardboard Box Detector") as demo:
    gr.Markdown("# 243263S - Traffic Cone & Cardboard Box Detector")
    gr.Markdown("Upload an image or video to detect **traffic cones** and **cardboard boxes**!")
    
    with gr.Tab("Image Detection"):
        with gr.Row():
            image_input = gr.Image(type="numpy", label="Upload Image")
            image_output = gr.Image(label="Detection Result")
        detection_text = gr.Textbox(label="Detections")
        image_btn = gr.Button("Detect!", variant="primary")
        image_btn.click(
            fn=detect_image,
            inputs=image_input,
            outputs=[image_output, detection_text]
        )
    
    with gr.Tab("Video Detection"):
        gr.Markdown("⚠️ Video processing may take a few minutes on CPU. Please be patient!")
        with gr.Row():
            video_input = gr.Video(label="Upload Video")
            video_output = gr.Video(label="Detection Result")
        video_status = gr.Textbox(label="Status")
        video_btn = gr.Button("Detect!", variant="primary")
        video_btn.click(
            fn=detect_video,
            inputs=video_input,
            outputs=[video_output, video_status]
        )

demo.launch()