Spaces:

Ultralytics
/

YOLOv8

Running

File size: 8,746 Bytes

a4afcba
e12c915
a4afcba
 
2f5236c
a4afcba
2f5236c
a365b8a
a4afcba
2f5236c
a4afcba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f5236c
0948942
a4afcba
2f5236c
 
 
 
 
0948942
a4afcba
2f5236c
 
 
a4afcba
2f5236c
 
 
 
 
a4afcba

# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license

import tempfile
import cv2
import gradio as gr
import numpy as np
import PIL.Image as Image
from ultralytics import YOLO
from pathlib import Path


MODEL_CHOICES = [
    "yolov8n",
    "yolov8s",
    "yolov8m",
    "yolov8n-seg",
    "yolov8s-seg",
    "yolov8m-seg",
    "yolov8n-pose",
    "yolov8s-pose",
    "yolov8m-pose",
    "yolov8n-obb",
    "yolov8s-obb",
    "yolov8m-obb",
    "yolov8n-cls",
    "yolov8s-cls",
    "yolov8m-cls",
]

IMAGE_SIZE_CHOICES = [320, 640, 1024]
CUSTOM_CSS = (Path(__file__).parent / "ultralytics.css").read_text()

def predict_image(img, conf_threshold, iou_threshold, model_name, show_labels, show_conf, imgsz):
    """Predicts objects in an image using a Ultralytics YOLO model with adjustable confidence and IOU thresholds."""
    model = YOLO(model_name)
    results = model.predict(
        source=img,
        conf=conf_threshold,
        iou=iou_threshold,
        imgsz=imgsz,
        verbose=False,
    )

    for r in results:
        im_array = r.plot(labels=show_labels, conf=show_conf)
        im = Image.fromarray(im_array[..., ::-1])

    return im


def predict_video(video_path, conf_threshold, iou_threshold, model_name, show_labels, show_conf, imgsz):
    """Predicts objects in a video using a Ultralytics YOLO model and returns the annotated video."""
    if video_path is None:
        return None

    model = YOLO(model_name)

    # Open the video
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return None

    # Get video properties
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Create temporary output file
    temp_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
    output_path = temp_output.name
    temp_output.close()

    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Run inference on the frame
        results = model.predict(
            source=frame,
            conf=conf_threshold,
            iou=iou_threshold,
            imgsz=imgsz,
            verbose=False,
        )

        # Get the annotated frame
        annotated_frame = results[0].plot(labels=show_labels, conf=show_conf)
        out.write(annotated_frame)

    cap.release()
    out.release()

    return output_path

# Cache model for streaming performance
_model_cache = {}

def get_model(model_name):
    """Get or create a cached model instance."""
    if model_name not in _model_cache:
        _model_cache[model_name] = YOLO(model_name)
    return _model_cache[model_name]


def predict_webcam(frame, conf_threshold, iou_threshold, model_name, show_labels, show_conf, imgsz):
    """Predicts objects in a webcam frame using a Ultralytics YOLO model (optimized for streaming)."""
    if frame is None:
        return None

    # Use cached model for better streaming performance
    model = get_model(model_name)

    if isinstance(frame, np.ndarray):
        # Gradio webcam sends RGB, but Ultralytics YOLO expects BGR for OpenCV operations
        # Convert RGB to BGR for YOLO
        frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        
        # Run inference
        results = model.predict(
            source=frame_bgr,
            conf=conf_threshold,
            iou=iou_threshold,
            imgsz=imgsz,
            verbose=False,
        )

        # YOLO's plot() returns BGR, convert back to RGB for Gradio display
        annotated_frame = results[0].plot(labels=show_labels, conf=show_conf)
        # Convert BGR to RGB for Gradio
        return cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB)

    return None


# Create the Gradio app with tabs
with gr.Blocks(title="Ultralytics YOLOv8 Inference 🚀") as demo:
    gr.Markdown("# Ultralytics YOLOv8 Inference 🚀")
    gr.Markdown("Upload images, videos, or use your webcam for real-time object detection.")

    with gr.Tabs():
        # Image Tab
        with gr.TabItem("📷 Image"):
            with gr.Row():
                with gr.Column():
                    img_input = gr.Image(type="pil", label="Upload Image")
                    img_conf = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence threshold")
                    img_iou = gr.Slider(minimum=0, maximum=1, value=0.7, label="IoU threshold")
                    img_model = gr.Radio(choices=MODEL_CHOICES, label="Model Name", value="yolov8n")
                    img_labels = gr.Checkbox(value=True, label="Show Labels")
                    img_conf_show = gr.Checkbox(value=True, label="Show Confidence")
                    img_size = gr.Radio(choices=IMAGE_SIZE_CHOICES, label="Image Size", value=640)
                    img_btn = gr.Button("Detect Objects", variant="primary")
                with gr.Column():
                    img_output = gr.Image(type="pil", label="Result")

            img_btn.click(
                predict_image,
                inputs=[img_input, img_conf, img_iou, img_model, img_labels, img_conf_show, img_size],
                outputs=img_output,
            )

            gr.Examples(
                examples=[
                    ["https://ultralytics.com/images/bus.jpg", 0.25, 0.7, "yolov8n", True, True, 640],
                    ["https://ultralytics.com/images/zidane.jpg", 0.25, 0.7, "yolov8n-seg", True, True, 640],
                    ["https://ultralytics.com/images/boats.jpg", 0.25, 0.7, "yolov8n-obb", True, True, 1024],
                ],
                inputs=[img_input, img_conf, img_iou, img_model, img_labels, img_conf_show, img_size],
            )

        # Video Tab
        with gr.TabItem("🎬 Video"):
            with gr.Row():
                with gr.Column():
                    vid_input = gr.Video(label="Upload Video")
                    vid_conf = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence threshold")
                    vid_iou = gr.Slider(minimum=0, maximum=1, value=0.7, label="IoU threshold")
                    vid_model = gr.Radio(choices=MODEL_CHOICES, label="Model Name", value="yolov8n")
                    vid_labels = gr.Checkbox(value=True, label="Show Labels")
                    vid_conf_show = gr.Checkbox(value=True, label="Show Confidence")
                    vid_size = gr.Radio(choices=IMAGE_SIZE_CHOICES, label="Image Size", value=640)
                    vid_btn = gr.Button("Process Video", variant="primary")
                with gr.Column():
                    vid_output = gr.Video(label="Result")

            vid_btn.click(
                predict_video,
                inputs=[vid_input, vid_conf, vid_iou, vid_model, vid_labels, vid_conf_show, vid_size],
                outputs=vid_output,
            )

        # Webcam Tab - Real-time streaming
        with gr.TabItem("📹 Webcam"):
            gr.Markdown("### Real-time Webcam Detection")
            gr.Markdown("Enable streaming for live detection as you move!")
            with gr.Row():
                with gr.Column():
                    webcam_conf = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence threshold")
                    webcam_iou = gr.Slider(minimum=0, maximum=1, value=0.7, label="IoU threshold")
                    webcam_model = gr.Radio(choices=MODEL_CHOICES, label="Model Name", value="yolov8n")
                    webcam_labels = gr.Checkbox(value=True, label="Show Labels")
                    webcam_conf_show = gr.Checkbox(value=True, label="Show Confidence")
                    webcam_size = gr.Radio(choices=IMAGE_SIZE_CHOICES, label="Image Size", value=640)
                with gr.Column():
                    # Streaming webcam input with real-time output
                    webcam_input = gr.Image(
                        sources=["webcam"],
                        type="numpy",
                        label="Webcam (streaming)",
                        streaming=True,
                    )
                    webcam_output = gr.Image(type="numpy", label="Detection Result")

            # Stream event for real-time detection
            webcam_input.stream(
                predict_webcam,
                inputs=[
                    webcam_input,
                    webcam_conf,
                    webcam_iou,
                    webcam_model,
                    webcam_labels,
                    webcam_conf_show,
                    webcam_size,
                ],
                outputs=webcam_output,
            )

demo.launch(share=True, css=CUSTOM_CSS)