YOLOv8 / app.py
onuralpszr's picture
refactor: ♻️ update gradio add video and webcam and update styling of gradio (#1)
a4afcba verified
# Ultralytics πŸš€ AGPL-3.0 License - https://ultralytics.com/license
import tempfile
import cv2
import gradio as gr
import numpy as np
import PIL.Image as Image
from ultralytics import YOLO
from pathlib import Path
MODEL_CHOICES = [
"yolov8n",
"yolov8s",
"yolov8m",
"yolov8n-seg",
"yolov8s-seg",
"yolov8m-seg",
"yolov8n-pose",
"yolov8s-pose",
"yolov8m-pose",
"yolov8n-obb",
"yolov8s-obb",
"yolov8m-obb",
"yolov8n-cls",
"yolov8s-cls",
"yolov8m-cls",
]
IMAGE_SIZE_CHOICES = [320, 640, 1024]
CUSTOM_CSS = (Path(__file__).parent / "ultralytics.css").read_text()
def predict_image(img, conf_threshold, iou_threshold, model_name, show_labels, show_conf, imgsz):
"""Predicts objects in an image using a Ultralytics YOLO model with adjustable confidence and IOU thresholds."""
model = YOLO(model_name)
results = model.predict(
source=img,
conf=conf_threshold,
iou=iou_threshold,
imgsz=imgsz,
verbose=False,
)
for r in results:
im_array = r.plot(labels=show_labels, conf=show_conf)
im = Image.fromarray(im_array[..., ::-1])
return im
def predict_video(video_path, conf_threshold, iou_threshold, model_name, show_labels, show_conf, imgsz):
"""Predicts objects in a video using a Ultralytics YOLO model and returns the annotated video."""
if video_path is None:
return None
model = YOLO(model_name)
# Open the video
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
return None
# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# Create temporary output file
temp_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
output_path = temp_output.name
temp_output.close()
# Initialize video writer
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
while True:
ret, frame = cap.read()
if not ret:
break
# Run inference on the frame
results = model.predict(
source=frame,
conf=conf_threshold,
iou=iou_threshold,
imgsz=imgsz,
verbose=False,
)
# Get the annotated frame
annotated_frame = results[0].plot(labels=show_labels, conf=show_conf)
out.write(annotated_frame)
cap.release()
out.release()
return output_path
# Cache model for streaming performance
_model_cache = {}
def get_model(model_name):
"""Get or create a cached model instance."""
if model_name not in _model_cache:
_model_cache[model_name] = YOLO(model_name)
return _model_cache[model_name]
def predict_webcam(frame, conf_threshold, iou_threshold, model_name, show_labels, show_conf, imgsz):
"""Predicts objects in a webcam frame using a Ultralytics YOLO model (optimized for streaming)."""
if frame is None:
return None
# Use cached model for better streaming performance
model = get_model(model_name)
if isinstance(frame, np.ndarray):
# Gradio webcam sends RGB, but Ultralytics YOLO expects BGR for OpenCV operations
# Convert RGB to BGR for YOLO
frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
# Run inference
results = model.predict(
source=frame_bgr,
conf=conf_threshold,
iou=iou_threshold,
imgsz=imgsz,
verbose=False,
)
# YOLO's plot() returns BGR, convert back to RGB for Gradio display
annotated_frame = results[0].plot(labels=show_labels, conf=show_conf)
# Convert BGR to RGB for Gradio
return cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB)
return None
# Create the Gradio app with tabs
with gr.Blocks(title="Ultralytics YOLOv8 Inference πŸš€") as demo:
gr.Markdown("# Ultralytics YOLOv8 Inference πŸš€")
gr.Markdown("Upload images, videos, or use your webcam for real-time object detection.")
with gr.Tabs():
# Image Tab
with gr.TabItem("πŸ“· Image"):
with gr.Row():
with gr.Column():
img_input = gr.Image(type="pil", label="Upload Image")
img_conf = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence threshold")
img_iou = gr.Slider(minimum=0, maximum=1, value=0.7, label="IoU threshold")
img_model = gr.Radio(choices=MODEL_CHOICES, label="Model Name", value="yolov8n")
img_labels = gr.Checkbox(value=True, label="Show Labels")
img_conf_show = gr.Checkbox(value=True, label="Show Confidence")
img_size = gr.Radio(choices=IMAGE_SIZE_CHOICES, label="Image Size", value=640)
img_btn = gr.Button("Detect Objects", variant="primary")
with gr.Column():
img_output = gr.Image(type="pil", label="Result")
img_btn.click(
predict_image,
inputs=[img_input, img_conf, img_iou, img_model, img_labels, img_conf_show, img_size],
outputs=img_output,
)
gr.Examples(
examples=[
["https://ultralytics.com/images/bus.jpg", 0.25, 0.7, "yolov8n", True, True, 640],
["https://ultralytics.com/images/zidane.jpg", 0.25, 0.7, "yolov8n-seg", True, True, 640],
["https://ultralytics.com/images/boats.jpg", 0.25, 0.7, "yolov8n-obb", True, True, 1024],
],
inputs=[img_input, img_conf, img_iou, img_model, img_labels, img_conf_show, img_size],
)
# Video Tab
with gr.TabItem("🎬 Video"):
with gr.Row():
with gr.Column():
vid_input = gr.Video(label="Upload Video")
vid_conf = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence threshold")
vid_iou = gr.Slider(minimum=0, maximum=1, value=0.7, label="IoU threshold")
vid_model = gr.Radio(choices=MODEL_CHOICES, label="Model Name", value="yolov8n")
vid_labels = gr.Checkbox(value=True, label="Show Labels")
vid_conf_show = gr.Checkbox(value=True, label="Show Confidence")
vid_size = gr.Radio(choices=IMAGE_SIZE_CHOICES, label="Image Size", value=640)
vid_btn = gr.Button("Process Video", variant="primary")
with gr.Column():
vid_output = gr.Video(label="Result")
vid_btn.click(
predict_video,
inputs=[vid_input, vid_conf, vid_iou, vid_model, vid_labels, vid_conf_show, vid_size],
outputs=vid_output,
)
# Webcam Tab - Real-time streaming
with gr.TabItem("πŸ“Ή Webcam"):
gr.Markdown("### Real-time Webcam Detection")
gr.Markdown("Enable streaming for live detection as you move!")
with gr.Row():
with gr.Column():
webcam_conf = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence threshold")
webcam_iou = gr.Slider(minimum=0, maximum=1, value=0.7, label="IoU threshold")
webcam_model = gr.Radio(choices=MODEL_CHOICES, label="Model Name", value="yolov8n")
webcam_labels = gr.Checkbox(value=True, label="Show Labels")
webcam_conf_show = gr.Checkbox(value=True, label="Show Confidence")
webcam_size = gr.Radio(choices=IMAGE_SIZE_CHOICES, label="Image Size", value=640)
with gr.Column():
# Streaming webcam input with real-time output
webcam_input = gr.Image(
sources=["webcam"],
type="numpy",
label="Webcam (streaming)",
streaming=True,
)
webcam_output = gr.Image(type="numpy", label="Detection Result")
# Stream event for real-time detection
webcam_input.stream(
predict_webcam,
inputs=[
webcam_input,
webcam_conf,
webcam_iou,
webcam_model,
webcam_labels,
webcam_conf_show,
webcam_size,
],
outputs=webcam_output,
)
demo.launch(share=True, css=CUSTOM_CSS)