Spaces:

daanidev
/

Ai-Video-Analyzer

Sleeping

File size: 2,934 Bytes

import gradio as gr
import cv2
import numpy as np
import tempfile
from ultralytics import YOLO

# --- Load YOLO Models ---
helmet_model = YOLO("models/helmet_detection.pt")
fire_model = YOLO("models/fire_detection.pt")
seg_model = YOLO("models/yolo11x-seg.pt")

model_map = {
    "Helmet Detection": helmet_model,
    "Fire Detection": fire_model,
    "Instance Segmentation": seg_model,
    "Workout Pose (MediaPipe)": None  # MediaPipe handled separately
}

# --- Placeholder MediaPipe workout processing ---
def process_workout(reference_video_path, user_video_path):
    # Here you would add your actual MediaPipe logic
    # For demo, just return the user video as "processed"
    return user_video_path

# --- Video processing function ---
def process_video(model_name, video_file, reference_file=None):
    if not video_file:
        return None
    
    # MediaPipe workflow
    if model_name == "Workout Pose (MediaPipe)":
        if not reference_file:
            return None
        return process_workout(reference_file.name, video_file.name)
    
    # Use the uploaded video path directly
    video_path = video_file.name
    model = model_map[model_name]
    
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return None
    
    output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = None
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Run YOLO prediction
        results = model.predict(frame)
        for r in results:
            if hasattr(r, "boxes"):
                boxes = r.boxes.xyxy.cpu().numpy()
                for box in boxes:
                    x1, y1, x2, y2 = map(int, box)
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        
        if out is None:
            h, w, _ = frame.shape
            out = cv2.VideoWriter(output_file, fourcc, 20.0, (w, h))
        out.write(frame)
    
    cap.release()
    if out:
        out.release()
    
    return output_file

# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# Multi-Model AI Video Analyzer")
    
    with gr.Row():
        model_choice = gr.Dropdown(list(model_map.keys()), label="Select Model")
        video_input = gr.File(label="Upload Video")
        reference_input = gr.File(label="Reference Video (for workout)", visible=False)
    
    output_video = gr.Video(label="Processed Video")
    
    def toggle_reference_input(model_name):
        return gr.update(visible=(model_name == "Workout Pose (MediaPipe)"))
    
    model_choice.change(toggle_reference_input, inputs=model_choice, outputs=reference_input)
    
    submit_btn = gr.Button("Process Video")
    submit_btn.click(
        process_video,
        inputs=[model_choice, video_input, reference_input],
        outputs=output_video
    )

demo.launch()