Spaces:
Running
Running
File size: 8,746 Bytes
a4afcba e12c915 a4afcba 2f5236c a4afcba 2f5236c a365b8a a4afcba 2f5236c a4afcba 2f5236c 0948942 a4afcba 2f5236c 0948942 a4afcba 2f5236c a4afcba 2f5236c a4afcba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 |
# Ultralytics π AGPL-3.0 License - https://ultralytics.com/license
import tempfile
import cv2
import gradio as gr
import numpy as np
import PIL.Image as Image
from ultralytics import YOLO
from pathlib import Path
MODEL_CHOICES = [
"yolov8n",
"yolov8s",
"yolov8m",
"yolov8n-seg",
"yolov8s-seg",
"yolov8m-seg",
"yolov8n-pose",
"yolov8s-pose",
"yolov8m-pose",
"yolov8n-obb",
"yolov8s-obb",
"yolov8m-obb",
"yolov8n-cls",
"yolov8s-cls",
"yolov8m-cls",
]
IMAGE_SIZE_CHOICES = [320, 640, 1024]
CUSTOM_CSS = (Path(__file__).parent / "ultralytics.css").read_text()
def predict_image(img, conf_threshold, iou_threshold, model_name, show_labels, show_conf, imgsz):
"""Predicts objects in an image using a Ultralytics YOLO model with adjustable confidence and IOU thresholds."""
model = YOLO(model_name)
results = model.predict(
source=img,
conf=conf_threshold,
iou=iou_threshold,
imgsz=imgsz,
verbose=False,
)
for r in results:
im_array = r.plot(labels=show_labels, conf=show_conf)
im = Image.fromarray(im_array[..., ::-1])
return im
def predict_video(video_path, conf_threshold, iou_threshold, model_name, show_labels, show_conf, imgsz):
"""Predicts objects in a video using a Ultralytics YOLO model and returns the annotated video."""
if video_path is None:
return None
model = YOLO(model_name)
# Open the video
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
return None
# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# Create temporary output file
temp_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
output_path = temp_output.name
temp_output.close()
# Initialize video writer
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
while True:
ret, frame = cap.read()
if not ret:
break
# Run inference on the frame
results = model.predict(
source=frame,
conf=conf_threshold,
iou=iou_threshold,
imgsz=imgsz,
verbose=False,
)
# Get the annotated frame
annotated_frame = results[0].plot(labels=show_labels, conf=show_conf)
out.write(annotated_frame)
cap.release()
out.release()
return output_path
# Cache model for streaming performance
_model_cache = {}
def get_model(model_name):
"""Get or create a cached model instance."""
if model_name not in _model_cache:
_model_cache[model_name] = YOLO(model_name)
return _model_cache[model_name]
def predict_webcam(frame, conf_threshold, iou_threshold, model_name, show_labels, show_conf, imgsz):
"""Predicts objects in a webcam frame using a Ultralytics YOLO model (optimized for streaming)."""
if frame is None:
return None
# Use cached model for better streaming performance
model = get_model(model_name)
if isinstance(frame, np.ndarray):
# Gradio webcam sends RGB, but Ultralytics YOLO expects BGR for OpenCV operations
# Convert RGB to BGR for YOLO
frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
# Run inference
results = model.predict(
source=frame_bgr,
conf=conf_threshold,
iou=iou_threshold,
imgsz=imgsz,
verbose=False,
)
# YOLO's plot() returns BGR, convert back to RGB for Gradio display
annotated_frame = results[0].plot(labels=show_labels, conf=show_conf)
# Convert BGR to RGB for Gradio
return cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB)
return None
# Create the Gradio app with tabs
with gr.Blocks(title="Ultralytics YOLOv8 Inference π") as demo:
gr.Markdown("# Ultralytics YOLOv8 Inference π")
gr.Markdown("Upload images, videos, or use your webcam for real-time object detection.")
with gr.Tabs():
# Image Tab
with gr.TabItem("π· Image"):
with gr.Row():
with gr.Column():
img_input = gr.Image(type="pil", label="Upload Image")
img_conf = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence threshold")
img_iou = gr.Slider(minimum=0, maximum=1, value=0.7, label="IoU threshold")
img_model = gr.Radio(choices=MODEL_CHOICES, label="Model Name", value="yolov8n")
img_labels = gr.Checkbox(value=True, label="Show Labels")
img_conf_show = gr.Checkbox(value=True, label="Show Confidence")
img_size = gr.Radio(choices=IMAGE_SIZE_CHOICES, label="Image Size", value=640)
img_btn = gr.Button("Detect Objects", variant="primary")
with gr.Column():
img_output = gr.Image(type="pil", label="Result")
img_btn.click(
predict_image,
inputs=[img_input, img_conf, img_iou, img_model, img_labels, img_conf_show, img_size],
outputs=img_output,
)
gr.Examples(
examples=[
["https://ultralytics.com/images/bus.jpg", 0.25, 0.7, "yolov8n", True, True, 640],
["https://ultralytics.com/images/zidane.jpg", 0.25, 0.7, "yolov8n-seg", True, True, 640],
["https://ultralytics.com/images/boats.jpg", 0.25, 0.7, "yolov8n-obb", True, True, 1024],
],
inputs=[img_input, img_conf, img_iou, img_model, img_labels, img_conf_show, img_size],
)
# Video Tab
with gr.TabItem("π¬ Video"):
with gr.Row():
with gr.Column():
vid_input = gr.Video(label="Upload Video")
vid_conf = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence threshold")
vid_iou = gr.Slider(minimum=0, maximum=1, value=0.7, label="IoU threshold")
vid_model = gr.Radio(choices=MODEL_CHOICES, label="Model Name", value="yolov8n")
vid_labels = gr.Checkbox(value=True, label="Show Labels")
vid_conf_show = gr.Checkbox(value=True, label="Show Confidence")
vid_size = gr.Radio(choices=IMAGE_SIZE_CHOICES, label="Image Size", value=640)
vid_btn = gr.Button("Process Video", variant="primary")
with gr.Column():
vid_output = gr.Video(label="Result")
vid_btn.click(
predict_video,
inputs=[vid_input, vid_conf, vid_iou, vid_model, vid_labels, vid_conf_show, vid_size],
outputs=vid_output,
)
# Webcam Tab - Real-time streaming
with gr.TabItem("πΉ Webcam"):
gr.Markdown("### Real-time Webcam Detection")
gr.Markdown("Enable streaming for live detection as you move!")
with gr.Row():
with gr.Column():
webcam_conf = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence threshold")
webcam_iou = gr.Slider(minimum=0, maximum=1, value=0.7, label="IoU threshold")
webcam_model = gr.Radio(choices=MODEL_CHOICES, label="Model Name", value="yolov8n")
webcam_labels = gr.Checkbox(value=True, label="Show Labels")
webcam_conf_show = gr.Checkbox(value=True, label="Show Confidence")
webcam_size = gr.Radio(choices=IMAGE_SIZE_CHOICES, label="Image Size", value=640)
with gr.Column():
# Streaming webcam input with real-time output
webcam_input = gr.Image(
sources=["webcam"],
type="numpy",
label="Webcam (streaming)",
streaming=True,
)
webcam_output = gr.Image(type="numpy", label="Detection Result")
# Stream event for real-time detection
webcam_input.stream(
predict_webcam,
inputs=[
webcam_input,
webcam_conf,
webcam_iou,
webcam_model,
webcam_labels,
webcam_conf_show,
webcam_size,
],
outputs=webcam_output,
)
demo.launch(share=True, css=CUSTOM_CSS)
|