File size: 12,391 Bytes
0443f48
 
 
3a81d21
 
0443f48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a81d21
0443f48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a81d21
0443f48
 
 
3a81d21
0443f48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a81d21
0443f48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a91da3a
0443f48
 
 
 
 
 
 
 
2d0be29
 
0443f48
 
 
 
 
 
 
2d0be29
 
0443f48
 
 
1673e7a
0443f48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a5e595
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license

import tempfile
from pathlib import Path

import cv2
import gradio as gr
import numpy as np
import PIL.Image as Image
from ultralytics import YOLO

MODEL_CHOICES = [
    "yolo26n",
    "yolo26s",
    "yolo26m",
    "yolo26n-seg",
    "yolo26s-seg",
    "yolo26m-seg",
    "yolo26n-pose",
    "yolo26s-pose",
    "yolo26m-pose",
    "yolo26n-obb",
    "yolo26s-obb",
    "yolo26m-obb",
    "yolo26n-cls",
    "yolo26s-cls",
    "yolo26m-cls",
]

IMAGE_SIZE_CHOICES = [320, 640, 1024]
CUSTOM_CSS = (Path(__file__).parent / "ultralytics.css").read_text()


def predict_image(img, conf_threshold, iou_threshold, model_name, show_labels, show_conf, imgsz):
    """Predicts objects in an image using a Ultralytics YOLO model with adjustable confidence and IOU thresholds."""
    model = YOLO(model_name)
    results = model.predict(
        source=img,
        conf=conf_threshold,
        iou=iou_threshold,
        imgsz=imgsz,
        verbose=False,
    )

    for r in results:
        im_array = r.plot(labels=show_labels, conf=show_conf)
        im = Image.fromarray(im_array[..., ::-1])

    return im


def predict_video(video_path, conf_threshold, iou_threshold, model_name, show_labels, show_conf, imgsz):
    """Predicts objects in a video using a Ultralytics YOLO model and returns the annotated video."""
    if video_path is None:
        return None

    model = YOLO(model_name)

    # Open the video
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return None

    # Get video properties
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Create temporary output file
    temp_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
    output_path = temp_output.name
    temp_output.close()

    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Run inference on the frame
        results = model.predict(
            source=frame,
            conf=conf_threshold,
            iou=iou_threshold,
            imgsz=imgsz,
            verbose=False,
        )

        # Get the annotated frame
        annotated_frame = results[0].plot(labels=show_labels, conf=show_conf)
        out.write(annotated_frame)

    cap.release()
    out.release()

    return output_path


# Cache model for streaming performance
_model_cache = {}


def get_model(model_name):
    """Get or create a cached model instance."""
    if model_name not in _model_cache:
        _model_cache[model_name] = YOLO(model_name)
    return _model_cache[model_name]


def predict_webcam(frame, conf_threshold, iou_threshold, model_name, show_labels, show_conf, imgsz):
    """Predicts objects in a webcam frame using a Ultralytics YOLO model (optimized for streaming)."""
    if frame is None:
        return None

    # Use cached model for better streaming performance
    model = get_model(model_name)

    if isinstance(frame, np.ndarray):
        # Gradio webcam sends RGB, but Ultralytics YOLO expects BGR for OpenCV operations
        # Convert RGB to BGR for YOLO
        frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        # Run inference
        results = model.predict(
            source=frame_bgr,
            conf=conf_threshold,
            iou=iou_threshold,
            imgsz=imgsz,
            verbose=False,
        )

        # YOLO's plot() returns BGR, convert back to RGB for Gradio display
        annotated_frame = results[0].plot(labels=show_labels, conf=show_conf)
        # Convert BGR to RGB for Gradio
        return cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB)

    return None


# Create the Gradio app with tabs
with gr.Blocks(title="Ultralytics YOLO26 Inference 🚀") as demo:
    gr.Markdown(
        """
<div align="center">
  <p>
    <a href="https://platform.ultralytics.com/?utm_source=huggingface&utm_medium=referral&utm_campaign=yolo26&utm_content=banner" target="_blank">
      <img width="50%" src="https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/banner-yolov8.png" alt="Ultralytics YOLO banner"></a>
  </p>
  <p style="margin: 3px 0;">
    <a href="https://docs.ultralytics.com/zh/">中文</a> | <a href="https://docs.ultralytics.com/ko/">한국어</a> | <a href="https://docs.ultralytics.com/ja/">日本語</a> | <a href="https://docs.ultralytics.com/ru/">Русский</a> | <a href="https://docs.ultralytics.com/de/">Deutsch</a> | <a href="https://docs.ultralytics.com/fr/">Français</a> | <a href="https://docs.ultralytics.com/es">Español</a> | <a href="https://docs.ultralytics.com/pt/">Português</a> | <a href="https://docs.ultralytics.com/tr/">Türkçe</a> | <a href="https://docs.ultralytics.com/vi/">Tiếng Việt</a> | <a href="https://docs.ultralytics.com/ar/">العربية</a>
  </p>

  <div style="display: flex; flex-wrap: wrap; justify-content: center; align-items: center; gap: 3px; margin-top: 3px;">
    <a href="https://github.com/ultralytics/ultralytics/actions/workflows/ci.yml"><img src="https://github.com/ultralytics/ultralytics/actions/workflows/ci.yml/badge.svg" alt="Ultralytics CI"></a>
    <a href="https://pepy.tech/projects/ultralytics"><img src="https://static.pepy.tech/badge/ultralytics" alt="Ultralytics Downloads"></a>
    <a href="https://zenodo.org/badge/latestdoi/264818686"><img src="https://zenodo.org/badge/264818686.svg" alt="Ultralytics YOLO Citation"></a>
    <a href="https://discord.com/invite/ultralytics"><img alt="Ultralytics Discord" src="https://img.shields.io/discord/1089800235347353640?logo=discord&logoColor=white&label=Discord&color=blue"></a>
    <a href="https://community.ultralytics.com/"><img alt="Ultralytics Forums" src="https://img.shields.io/discourse/users?server=https%3A%2F%2Fcommunity.ultralytics.com&logo=discourse&label=Forums&color=blue"></a>
    <a href="https://www.reddit.com/r/ultralytics/"><img alt="Ultralytics Reddit" src="https://img.shields.io/reddit/subreddit-subscribers/ultralytics?style=flat&logo=reddit&logoColor=white&label=Reddit&color=blue"></a>
  </div>
  <div style="display: flex; flex-wrap: wrap; justify-content: center; align-items: center; gap: 3px; margin-top: 3px;">
    <a href="https://console.paperspace.com/github/ultralytics/ultralytics"><img src="https://assets.paperspace.io/img/gradient-badge.svg" alt="Run Ultralytics on Gradient"></a>
    <a href="https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open Ultralytics In Colab"></a>
    <a href="https://www.kaggle.com/models/ultralytics/yolo26"><img src="https://kaggle.com/static/images/open-in-kaggle.svg" alt="Open Ultralytics In Kaggle"></a>
    <a href="https://mybinder.org/v2/gh/ultralytics/ultralytics/HEAD?labpath=examples%2Ftutorial.ipynb"><img src="https://mybinder.org/badge_logo.svg" alt="Open Ultralytics In Binder"></a>
  </div>
</div>

[Ultralytics]( https://www.ultralytics.com/?utm_source=huggingface&utm_medium=referral&utm_campaign=yolo26&utm_content=contextual) [YOLO26](https://platform.ultralytics.com/ultralytics/yolo26?utm_source=huggingface&utm_medium=referral&utm_campaign=yolo26&utm_content=contextual_model_link) is the latest evolution in the YOLO series of real-time object detectors, engineered from the ground up for edge and low-power devices. It introduces a streamlined design that removes unnecessary complexity while integrating targeted innovations to deliver faster, lighter, and more accessible deployment.
"""
    )

    with gr.Tabs():
        # Image Tab
        with gr.TabItem("📷 Image"):
            with gr.Row():
                with gr.Column():
                    img_input = gr.Image(type="pil", label="Upload Image")
                    img_conf = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence threshold")
                    img_iou = gr.Slider(minimum=0, maximum=1, value=0.7, label="IoU threshold")
                    img_model = gr.Radio(choices=MODEL_CHOICES, label="Model Name", value="yolo26n")
                    img_labels = gr.Checkbox(value=True, label="Show Labels")
                    img_conf_show = gr.Checkbox(value=True, label="Show Confidence")
                    img_size = gr.Radio(choices=IMAGE_SIZE_CHOICES, label="Image Size", value=640)
                    img_btn = gr.Button("Detect Objects", variant="primary")
                with gr.Column():
                    img_output = gr.Image(type="pil", label="Result")

            img_btn.click(
                predict_image,
                inputs=[img_input, img_conf, img_iou, img_model, img_labels, img_conf_show, img_size],
                outputs=img_output,
            )

            gr.Examples(
                examples=[
                    ["https://ultralytics.com/images/bus.jpg", 0.25, 0.7, "yolo26n", True, True, 640],
                    ["https://ultralytics.com/images/zidane.jpg", 0.25, 0.7, "yolo26n-seg", True, True, 640],
                    ["https://ultralytics.com/images/boats.jpg", 0.25, 0.7, "yolo26n-obb", True, True, 1024],
                ],
                inputs=[img_input, img_conf, img_iou, img_model, img_labels, img_conf_show, img_size],
            )

        # Video Tab
        with gr.TabItem("🎬 Video"):
            with gr.Row():
                with gr.Column():
                    vid_input = gr.Video(label="Upload Video")
                    vid_conf = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence threshold")
                    vid_iou = gr.Slider(minimum=0, maximum=1, value=0.7, label="IoU threshold")
                    vid_model = gr.Radio(choices=MODEL_CHOICES, label="Model Name", value="yolo26n")
                    vid_labels = gr.Checkbox(value=True, label="Show Labels")
                    vid_conf_show = gr.Checkbox(value=True, label="Show Confidence")
                    vid_size = gr.Radio(choices=IMAGE_SIZE_CHOICES, label="Image Size", value=640)
                    vid_btn = gr.Button("Process Video", variant="primary")
                with gr.Column():
                    vid_output = gr.Video(label="Result")

            vid_btn.click(
                predict_video,
                inputs=[vid_input, vid_conf, vid_iou, vid_model, vid_labels, vid_conf_show, vid_size],
                outputs=vid_output,
            )

        # Webcam Tab - Real-time streaming
        with gr.TabItem("📹 Webcam"):
            gr.Markdown("### Real-time Webcam Detection")
            gr.Markdown("Enable streaming for live detection as you move!")
            with gr.Row():
                with gr.Column():
                    webcam_conf = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence threshold")
                    webcam_iou = gr.Slider(minimum=0, maximum=1, value=0.7, label="IoU threshold")
                    webcam_model = gr.Radio(choices=MODEL_CHOICES, label="Model Name", value="yolo26n")
                    webcam_labels = gr.Checkbox(value=True, label="Show Labels")
                    webcam_conf_show = gr.Checkbox(value=True, label="Show Confidence")
                    webcam_size = gr.Radio(choices=IMAGE_SIZE_CHOICES, label="Image Size", value=640)
                with gr.Column():
                    # Streaming webcam input with real-time output
                    webcam_input = gr.Image(
                        sources=["webcam"],
                        type="numpy",
                        label="Webcam (streaming)",
                        streaming=True,
                    )
                    webcam_output = gr.Image(type="numpy", label="Detection Result")

            # Stream event for real-time detection
            webcam_input.stream(
                predict_webcam,
                inputs=[
                    webcam_input,
                    webcam_conf,
                    webcam_iou,
                    webcam_model,
                    webcam_labels,
                    webcam_conf_show,
                    webcam_size,
                ],
                outputs=webcam_output,
            )

demo.launch(css=CUSTOM_CSS, ssr_mode=False)