atalaydenknalbant commited on
Commit
1e73dc3
·
verified ·
1 Parent(s): 631cb4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -143
app.py CHANGED
@@ -10,123 +10,136 @@ import numpy as np
10
  # Inference
11
  # -----------------------------
12
  @spaces.GPU
13
- def yolo_inference(input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection):
14
- """
15
- Ultralytics YOLO26 inference for image or video.
16
- Accepts detect/seg/pose/obb/cls checkpoints and renders r.plot().
17
- """
18
  model = YOLO(model_id)
19
  if getattr(model, "task", None) != "classify":
20
  head = model.model.model[-1]
21
  if hasattr(head, "one2one_cv2"):
22
  delattr(head, "one2one_cv2")
23
- if input_type == "Image":
24
- if image is None:
25
- w, h = 640, 480
26
- blank = Image.new("RGB", (w, h), color="white")
27
- draw = ImageDraw.Draw(blank)
28
- msg = "No image provided"
29
- font = ImageFont.load_default(size=40)
30
- bbox = draw.textbbox((0, 0), msg, font=font)
31
- tw, th = bbox[2] - bbox[0], bbox[3] - bbox[1]
32
- draw.text(((w - tw) / 2, (h - th) / 2), msg, fill="black", font=font)
33
- return blank, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  results = model.predict(
36
- source=image,
37
  conf=conf_threshold,
38
  iou=iou_threshold,
39
  imgsz=640,
40
  max_det=max_detection,
41
  show_labels=True,
42
  show_conf=True,
 
43
  )
44
- annotated_image = None
 
45
  for r in results:
46
- img_bgr = r.plot()
47
- annotated_image = Image.fromarray(img_bgr[..., ::-1])
48
- return annotated_image, None
49
-
50
- if input_type == "Video":
51
- if video is None:
52
- w, h = 640, 480
53
- blank = Image.new("RGB", (w, h), color="white")
54
- draw = ImageDraw.Draw(blank)
55
- msg = "No video provided"
56
- font = ImageFont.load_default(size=40)
57
- bbox = draw.textbbox((0, 0), msg, font=font)
58
- tw, th = bbox[2] - bbox[0], bbox[3] - bbox[1]
59
- draw.text(((w - tw) / 2, (h - th) / 2), msg, fill="black", font=font)
60
- tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
61
- fourcc = cv2.VideoWriter_fourcc(*"mp4v")
62
- out = cv2.VideoWriter(tmp, fourcc, 1, (w, h))
63
- out.write(cv2.cvtColor(np.array(blank), cv2.COLOR_RGB2BGR))
64
- out.release()
65
- return None, tmp
66
-
67
- cap = cv2.VideoCapture(video)
68
- fps = cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 25
69
- frames = []
70
- while True:
71
- ret, frame = cap.read()
72
- if not ret:
73
- break
74
- pil_frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
75
- results = model.predict(
76
- source=pil_frame,
77
- conf=conf_threshold,
78
- iou=iou_threshold,
79
- imgsz=640,
80
- max_det=max_detection,
81
- show_labels=True,
82
- show_conf=True,
83
- )
84
- for r in results:
85
- anno_bgr = r.plot()
86
- anno_rgb = cv2.cvtColor(anno_bgr, cv2.COLOR_BGR2RGB)
87
- frames.append(anno_rgb)
88
- cap.release()
89
- if not frames:
90
- return None, None
91
-
92
- h, w, _ = frames[0].shape
93
- tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
94
- fourcc = cv2.VideoWriter_fourcc(*"mp4v")
95
- out = cv2.VideoWriter(tmp, fourcc, fps, (w, h))
96
- for f in frames:
97
- out.write(cv2.cvtColor(f, cv2.COLOR_RGB2BGR))
98
- out.release()
99
- return None, tmp
100
 
101
- return None, None
 
102
 
 
 
103
 
104
- def update_visibility(input_type):
105
- if input_type == "Image":
106
- return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
107
- else:
108
- return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)
109
 
110
 
111
  def yolo_inference_for_examples(image, model_id, conf_threshold, iou_threshold, max_detection):
112
- annotated_image, _ = yolo_inference(
113
- input_type="Image",
114
- image=image,
115
- video=None,
116
- model_id=model_id,
117
- conf_threshold=conf_threshold,
118
- iou_threshold=iou_threshold,
119
- max_detection=max_detection
120
- )
121
- return annotated_image
122
 
123
 
 
 
 
 
 
 
 
 
124
 
125
  with gr.Blocks() as app:
126
  gr.Markdown("# YOLO26")
127
- gr.Markdown("Image or video inference with detection, segmentation, pose, oriented bounding boxes, and classification using the latest Ultralytics YOLO26 models.")
 
 
 
128
  with gr.Accordion("Reference", open=False):
129
- gr.Markdown("""
 
130
  **BibTeX:**
131
  ```
132
  @software{yolo26_ultralytics,
@@ -139,70 +152,69 @@ with gr.Blocks() as app:
139
  license = {AGPL-3.0}
140
  }
141
  ```
142
- """
143
  )
144
 
145
- with gr.Row():
146
- with gr.Column():
147
- image = gr.Image(type="pil", label="Image", visible=True)
148
- video = gr.Video(label="Video", visible=False)
149
- input_type = gr.Radio(choices=["Image", "Video"], value="Image", label="Input Type")
150
-
151
- model_id = gr.Dropdown(
152
- label="Model",
153
- choices=[
154
- # detect
155
- "yolo26n.pt","yolo26s.pt","yolo26m.pt","yolo26l.pt","yolo26x.pt",
156
- # seg
157
- "yolo26n-seg.pt","yolo26s-seg.pt","yolo26m-seg.pt","yolo26l-seg.pt","yolo26x-seg.pt",
158
- # pose
159
- "yolo26n-pose.pt","yolo26s-pose.pt","yolo26m-pose.pt","yolo26l-pose.pt","yolo26x-pose.pt",
160
- # obb
161
- "yolo26n-obb.pt","yolo26s-obb.pt","yolo26m-obb.pt","yolo26l-obb.pt","yolo26x-obb.pt",
162
- # cls
163
- "yolo26n-cls.pt","yolo26s-cls.pt","yolo26m-cls.pt","yolo26l-cls.pt","yolo26x-cls.pt",
 
 
 
 
 
 
 
164
  ],
165
- value="yolo26n.pt",
 
 
 
166
  )
167
 
168
- conf_threshold = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence Threshold")
169
- iou_threshold = gr.Slider(minimum=0, maximum=1, value=0.45, label="IoU Threshold")
170
- max_detection = gr.Slider(minimum=1, maximum=300, step=1, value=300, label="Max Detection")
 
171
 
172
- infer_button = gr.Button("Detect Objects", variant="primary")
 
 
 
173
 
174
- with gr.Column():
175
- output_image = gr.Image(type="pil", show_label=False, visible=True)
176
- output_video = gr.Video(show_label=False, visible=False)
177
- gr.DeepLinkButton(variant="primary")
178
 
179
- input_type.change(
180
- fn=update_visibility,
181
- inputs=input_type,
182
- outputs=[image, video, output_image, output_video],
183
- )
184
 
185
- infer_button.click(
186
- fn=yolo_inference,
187
- inputs=[input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection],
188
- outputs=[output_image, output_video],
189
  )
190
 
191
- gr.Examples(
192
- examples=[
193
- ["zidane.jpg", "yolo26s.pt", 0.25, 0.45, 300],
194
- ["bus.jpg", "yolo26m.pt", 0.25, 0.45, 300],
195
- ["yolo_vision.jpg", "yolo26x.pt", 0.25, 0.45, 300],
196
- ["Tricycle.jpg", "yolo26x-cls.pt", 0.25, 0.45, 300],
197
- ["tcganadolu.jpg", "yolo26m-obb.pt", 0.25, 0.45, 300],
198
- ["San Diego Airport.jpg", "yolo26x-seg.pt", 0.25, 0.45, 300],
199
- ["Theodore_Roosevelt.png", "yolo26l-pose.pt", 0.25, 0.45, 300],
200
- ],
201
- fn=yolo_inference_for_examples,
202
- inputs=[image, model_id, conf_threshold, iou_threshold, max_detection],
203
- outputs=[output_image],
204
- label="Examples",
205
  )
206
 
207
  if __name__ == "__main__":
208
- app.launch(mcp_server=True, theme = gr.themes.Ocean(primary_hue="indigo", secondary_hue="blue"))
 
10
  # Inference
11
  # -----------------------------
12
  @spaces.GPU
13
+ def yolo_inference_image(image, model_id, conf_threshold, iou_threshold, max_detection):
 
 
 
 
14
  model = YOLO(model_id)
15
  if getattr(model, "task", None) != "classify":
16
  head = model.model.model[-1]
17
  if hasattr(head, "one2one_cv2"):
18
  delattr(head, "one2one_cv2")
19
+
20
+ if image is None:
21
+ w, h = 640, 480
22
+ blank = Image.new("RGB", (w, h), color="white")
23
+ draw = ImageDraw.Draw(blank)
24
+ msg = "No image provided"
25
+ font = ImageFont.load_default(size=40)
26
+ bbox = draw.textbbox((0, 0), msg, font=font)
27
+ tw, th = bbox[2] - bbox[0], bbox[3] - bbox[1]
28
+ draw.text(((w - tw) / 2, (h - th) / 2), msg, fill="black", font=font)
29
+ return blank
30
+
31
+ results = model.predict(
32
+ source=image,
33
+ conf=conf_threshold,
34
+ iou=iou_threshold,
35
+ imgsz=640,
36
+ max_det=max_detection,
37
+ show_labels=True,
38
+ show_conf=True,
39
+ )
40
+
41
+ annotated_image = None
42
+ for r in results:
43
+ img_bgr = r.plot()
44
+ annotated_image = Image.fromarray(img_bgr[..., ::-1])
45
+ return annotated_image
46
+
47
+
48
+ @spaces.GPU
49
+ def yolo_inference_video(video, model_id, conf_threshold, iou_threshold, max_detection):
50
+ model = YOLO(model_id)
51
+ if getattr(model, "task", None) != "classify":
52
+ head = model.model.model[-1]
53
+ if hasattr(head, "one2one_cv2"):
54
+ delattr(head, "one2one_cv2")
55
+
56
+ if video is None:
57
+ w, h = 640, 480
58
+ blank = Image.new("RGB", (w, h), color="white")
59
+ draw = ImageDraw.Draw(blank)
60
+ msg = "No video provided"
61
+ font = ImageFont.load_default(size=40)
62
+ bbox = draw.textbbox((0, 0), msg, font=font)
63
+ tw, th = bbox[2] - bbox[0], bbox[3] - bbox[1]
64
+ draw.text(((w - tw) / 2, (h - th) / 2), msg, fill="black", font=font)
65
+
66
+ tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
67
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
68
+ out = cv2.VideoWriter(tmp, fourcc, 1, (w, h))
69
+ out.write(cv2.cvtColor(np.array(blank), cv2.COLOR_RGB2BGR))
70
+ out.release()
71
+ return tmp
72
+
73
+ cap = cv2.VideoCapture(video)
74
+ if not cap.isOpened():
75
+ return None
76
+
77
+ fps_val = cap.get(cv2.CAP_PROP_FPS)
78
+ fps = fps_val if fps_val and fps_val > 0 else 25
79
+
80
+ w_val = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
81
+ h_val = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
82
+ w = w_val if w_val and w_val > 0 else 640
83
+ h = h_val if h_val and h_val > 0 else 480
84
+
85
+ tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
86
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
87
+ out = cv2.VideoWriter(tmp, fourcc, fps, (w, h))
88
+
89
+ wrote_any = False
90
+ while True:
91
+ ret, frame = cap.read()
92
+ if not ret:
93
+ break
94
 
95
  results = model.predict(
96
+ source=frame,
97
  conf=conf_threshold,
98
  iou=iou_threshold,
99
  imgsz=640,
100
  max_det=max_detection,
101
  show_labels=True,
102
  show_conf=True,
103
+ verbose=False,
104
  )
105
+
106
+ anno_bgr = frame
107
  for r in results:
108
+ anno_bgr = r.plot()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
+ out.write(anno_bgr)
111
+ wrote_any = True
112
 
113
+ cap.release()
114
+ out.release()
115
 
116
+ if not wrote_any:
117
+ return None
118
+ return tmp
 
 
119
 
120
 
121
  def yolo_inference_for_examples(image, model_id, conf_threshold, iou_threshold, max_detection):
122
+ return yolo_inference_image(image, model_id, conf_threshold, iou_threshold, max_detection)
 
 
 
 
 
 
 
 
 
123
 
124
 
125
+ MODEL_CHOICES = [
126
+ "yolo26n.pt", "yolo26s.pt", "yolo26m.pt", "yolo26l.pt", "yolo26x.pt",
127
+ "yolo26n-seg.pt", "yolo26s-seg.pt", "yolo26m-seg.pt", "yolo26l-seg.pt", "yolo26x-seg.pt",
128
+ "yolo26n-pose.pt", "yolo26s-pose.pt", "yolo26m-pose.pt", "yolo26l-pose.pt", "yolo26x-pose.pt",
129
+ "yolo26n-obb.pt", "yolo26s-obb.pt", "yolo26m-obb.pt", "yolo26l-obb.pt", "yolo26x-obb.pt",
130
+ "yolo26n-cls.pt", "yolo26s-cls.pt", "yolo26m-cls.pt", "yolo26l-cls.pt", "yolo26x-cls.pt",
131
+ ]
132
+
133
 
134
  with gr.Blocks() as app:
135
  gr.Markdown("# YOLO26")
136
+ gr.Markdown(
137
+ "Image or video inference with detection, segmentation, pose, oriented bounding boxes, and classification using the latest Ultralytics YOLO26 models."
138
+ )
139
+
140
  with gr.Accordion("Reference", open=False):
141
+ gr.Markdown(
142
+ """
143
  **BibTeX:**
144
  ```
145
  @software{yolo26_ultralytics,
 
152
  license = {AGPL-3.0}
153
  }
154
  ```
155
+ """
156
  )
157
 
158
+ with gr.Tabs() as media_tabs:
159
+ with gr.Tab("Image") as image_tab:
160
+ with gr.Row():
161
+ with gr.Column():
162
+ image = gr.Image(type="pil", label="Image")
163
+
164
+ model_id_img = gr.Dropdown(label="Model", choices=MODEL_CHOICES, value="yolo26n.pt")
165
+ conf_img = gr.Slider(0, 1, value=0.25, label="Confidence Threshold")
166
+ iou_img = gr.Slider(0, 1, value=0.45, label="IoU Threshold")
167
+ max_det_img = gr.Slider(1, 300, step=1, value=300, label="Max Detection")
168
+
169
+ infer_image_button = gr.Button("Detect Objects", variant="primary")
170
+
171
+ with gr.Column():
172
+ output_image = gr.Image(type="pil", show_label=False)
173
+ gr.DeepLinkButton(variant="primary")
174
+
175
+ gr.Examples(
176
+ examples=[
177
+ ["zidane.jpg", "yolo26s.pt", 0.25, 0.45, 300],
178
+ ["bus.jpg", "yolo26m.pt", 0.25, 0.45, 300],
179
+ ["yolo_vision.jpg", "yolo26x.pt", 0.25, 0.45, 300],
180
+ ["Tricycle.jpg", "yolo26x-cls.pt", 0.25, 0.45, 300],
181
+ ["tcganadolu.jpg", "yolo26m-obb.pt", 0.25, 0.45, 300],
182
+ ["San Diego Airport.jpg", "yolo26x-seg.pt", 0.25, 0.45, 300],
183
+ ["Theodore_Roosevelt.png", "yolo26l-pose.pt", 0.25, 0.45, 300],
184
  ],
185
+ fn=yolo_inference_for_examples,
186
+ inputs=[image, model_id_img, conf_img, iou_img, max_det_img],
187
+ outputs=[output_image],
188
+ label="Examples",
189
  )
190
 
191
+ with gr.Tab("Video") as video_tab:
192
+ with gr.Row():
193
+ with gr.Column():
194
+ video = gr.Video(label="Video")
195
 
196
+ model_id_vid = gr.Dropdown(label="Model", choices=MODEL_CHOICES, value="yolo26n.pt")
197
+ conf_vid = gr.Slider(0, 1, value=0.25, label="Confidence Threshold")
198
+ iou_vid = gr.Slider(0, 1, value=0.45, label="IoU Threshold")
199
+ max_det_vid = gr.Slider(1, 300, step=1, value=300, label="Max Detection")
200
 
201
+ infer_video_button = gr.Button("Detect Objects", variant="primary")
 
 
 
202
 
203
+ with gr.Column():
204
+ output_video = gr.Video(show_label=False)
205
+ gr.DeepLinkButton(variant="primary")
 
 
206
 
207
+ infer_image_button.click(
208
+ fn=yolo_inference_image,
209
+ inputs=[image, model_id_img, conf_img, iou_img, max_det_img],
210
+ outputs=[output_image],
211
  )
212
 
213
+ infer_video_button.click(
214
+ fn=yolo_inference_video,
215
+ inputs=[video, model_id_vid, conf_vid, iou_vid, max_det_vid],
216
+ outputs=[output_video],
 
 
 
 
 
 
 
 
 
 
217
  )
218
 
219
  if __name__ == "__main__":
220
+ app.launch(mcp_server=True, theme=gr.themes.Ocean(primary_hue="indigo", secondary_hue="blue"))