Spaces:
Running
Running
| # Ultralytics π AGPL-3.0 License - https://ultralytics.com/license | |
| import tempfile | |
| import cv2 | |
| import gradio as gr | |
| import numpy as np | |
| import PIL.Image as Image | |
| from ultralytics import YOLO | |
| from pathlib import Path | |
| MODEL_CHOICES = [ | |
| "yolov8n", | |
| "yolov8s", | |
| "yolov8m", | |
| "yolov8n-seg", | |
| "yolov8s-seg", | |
| "yolov8m-seg", | |
| "yolov8n-pose", | |
| "yolov8s-pose", | |
| "yolov8m-pose", | |
| "yolov8n-obb", | |
| "yolov8s-obb", | |
| "yolov8m-obb", | |
| "yolov8n-cls", | |
| "yolov8s-cls", | |
| "yolov8m-cls", | |
| ] | |
| IMAGE_SIZE_CHOICES = [320, 640, 1024] | |
| CUSTOM_CSS = (Path(__file__).parent / "ultralytics.css").read_text() | |
| def predict_image(img, conf_threshold, iou_threshold, model_name, show_labels, show_conf, imgsz): | |
| """Predicts objects in an image using a Ultralytics YOLO model with adjustable confidence and IOU thresholds.""" | |
| model = YOLO(model_name) | |
| results = model.predict( | |
| source=img, | |
| conf=conf_threshold, | |
| iou=iou_threshold, | |
| imgsz=imgsz, | |
| verbose=False, | |
| ) | |
| for r in results: | |
| im_array = r.plot(labels=show_labels, conf=show_conf) | |
| im = Image.fromarray(im_array[..., ::-1]) | |
| return im | |
| def predict_video(video_path, conf_threshold, iou_threshold, model_name, show_labels, show_conf, imgsz): | |
| """Predicts objects in a video using a Ultralytics YOLO model and returns the annotated video.""" | |
| if video_path is None: | |
| return None | |
| model = YOLO(model_name) | |
| # Open the video | |
| cap = cv2.VideoCapture(video_path) | |
| if not cap.isOpened(): | |
| return None | |
| # Get video properties | |
| fps = int(cap.get(cv2.CAP_PROP_FPS)) | |
| width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
| height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
| # Create temporary output file | |
| temp_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) | |
| output_path = temp_output.name | |
| temp_output.close() | |
| # Initialize video writer | |
| fourcc = cv2.VideoWriter_fourcc(*"mp4v") | |
| out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) | |
| while True: | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| # Run inference on the frame | |
| results = model.predict( | |
| source=frame, | |
| conf=conf_threshold, | |
| iou=iou_threshold, | |
| imgsz=imgsz, | |
| verbose=False, | |
| ) | |
| # Get the annotated frame | |
| annotated_frame = results[0].plot(labels=show_labels, conf=show_conf) | |
| out.write(annotated_frame) | |
| cap.release() | |
| out.release() | |
| return output_path | |
| # Cache model for streaming performance | |
| _model_cache = {} | |
| def get_model(model_name): | |
| """Get or create a cached model instance.""" | |
| if model_name not in _model_cache: | |
| _model_cache[model_name] = YOLO(model_name) | |
| return _model_cache[model_name] | |
| def predict_webcam(frame, conf_threshold, iou_threshold, model_name, show_labels, show_conf, imgsz): | |
| """Predicts objects in a webcam frame using a Ultralytics YOLO model (optimized for streaming).""" | |
| if frame is None: | |
| return None | |
| # Use cached model for better streaming performance | |
| model = get_model(model_name) | |
| if isinstance(frame, np.ndarray): | |
| # Gradio webcam sends RGB, but Ultralytics YOLO expects BGR for OpenCV operations | |
| # Convert RGB to BGR for YOLO | |
| frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) | |
| # Run inference | |
| results = model.predict( | |
| source=frame_bgr, | |
| conf=conf_threshold, | |
| iou=iou_threshold, | |
| imgsz=imgsz, | |
| verbose=False, | |
| ) | |
| # YOLO's plot() returns BGR, convert back to RGB for Gradio display | |
| annotated_frame = results[0].plot(labels=show_labels, conf=show_conf) | |
| # Convert BGR to RGB for Gradio | |
| return cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB) | |
| return None | |
| # Create the Gradio app with tabs | |
| with gr.Blocks(title="Ultralytics YOLOv8 Inference π") as demo: | |
| gr.Markdown("# Ultralytics YOLOv8 Inference π") | |
| gr.Markdown("Upload images, videos, or use your webcam for real-time object detection.") | |
| with gr.Tabs(): | |
| # Image Tab | |
| with gr.TabItem("π· Image"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| img_input = gr.Image(type="pil", label="Upload Image") | |
| img_conf = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence threshold") | |
| img_iou = gr.Slider(minimum=0, maximum=1, value=0.7, label="IoU threshold") | |
| img_model = gr.Radio(choices=MODEL_CHOICES, label="Model Name", value="yolov8n") | |
| img_labels = gr.Checkbox(value=True, label="Show Labels") | |
| img_conf_show = gr.Checkbox(value=True, label="Show Confidence") | |
| img_size = gr.Radio(choices=IMAGE_SIZE_CHOICES, label="Image Size", value=640) | |
| img_btn = gr.Button("Detect Objects", variant="primary") | |
| with gr.Column(): | |
| img_output = gr.Image(type="pil", label="Result") | |
| img_btn.click( | |
| predict_image, | |
| inputs=[img_input, img_conf, img_iou, img_model, img_labels, img_conf_show, img_size], | |
| outputs=img_output, | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["https://ultralytics.com/images/bus.jpg", 0.25, 0.7, "yolov8n", True, True, 640], | |
| ["https://ultralytics.com/images/zidane.jpg", 0.25, 0.7, "yolov8n-seg", True, True, 640], | |
| ["https://ultralytics.com/images/boats.jpg", 0.25, 0.7, "yolov8n-obb", True, True, 1024], | |
| ], | |
| inputs=[img_input, img_conf, img_iou, img_model, img_labels, img_conf_show, img_size], | |
| ) | |
| # Video Tab | |
| with gr.TabItem("π¬ Video"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| vid_input = gr.Video(label="Upload Video") | |
| vid_conf = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence threshold") | |
| vid_iou = gr.Slider(minimum=0, maximum=1, value=0.7, label="IoU threshold") | |
| vid_model = gr.Radio(choices=MODEL_CHOICES, label="Model Name", value="yolov8n") | |
| vid_labels = gr.Checkbox(value=True, label="Show Labels") | |
| vid_conf_show = gr.Checkbox(value=True, label="Show Confidence") | |
| vid_size = gr.Radio(choices=IMAGE_SIZE_CHOICES, label="Image Size", value=640) | |
| vid_btn = gr.Button("Process Video", variant="primary") | |
| with gr.Column(): | |
| vid_output = gr.Video(label="Result") | |
| vid_btn.click( | |
| predict_video, | |
| inputs=[vid_input, vid_conf, vid_iou, vid_model, vid_labels, vid_conf_show, vid_size], | |
| outputs=vid_output, | |
| ) | |
| # Webcam Tab - Real-time streaming | |
| with gr.TabItem("πΉ Webcam"): | |
| gr.Markdown("### Real-time Webcam Detection") | |
| gr.Markdown("Enable streaming for live detection as you move!") | |
| with gr.Row(): | |
| with gr.Column(): | |
| webcam_conf = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence threshold") | |
| webcam_iou = gr.Slider(minimum=0, maximum=1, value=0.7, label="IoU threshold") | |
| webcam_model = gr.Radio(choices=MODEL_CHOICES, label="Model Name", value="yolov8n") | |
| webcam_labels = gr.Checkbox(value=True, label="Show Labels") | |
| webcam_conf_show = gr.Checkbox(value=True, label="Show Confidence") | |
| webcam_size = gr.Radio(choices=IMAGE_SIZE_CHOICES, label="Image Size", value=640) | |
| with gr.Column(): | |
| # Streaming webcam input with real-time output | |
| webcam_input = gr.Image( | |
| sources=["webcam"], | |
| type="numpy", | |
| label="Webcam (streaming)", | |
| streaming=True, | |
| ) | |
| webcam_output = gr.Image(type="numpy", label="Detection Result") | |
| # Stream event for real-time detection | |
| webcam_input.stream( | |
| predict_webcam, | |
| inputs=[ | |
| webcam_input, | |
| webcam_conf, | |
| webcam_iou, | |
| webcam_model, | |
| webcam_labels, | |
| webcam_conf_show, | |
| webcam_size, | |
| ], | |
| outputs=webcam_output, | |
| ) | |
| demo.launch(share=True, css=CUSTOM_CSS) | |