openvision commited on
Commit
ddc05ef
·
verified ·
1 Parent(s): fc9e3e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -60
app.py CHANGED
@@ -10,78 +10,79 @@ OBB_IMAGE = ASSETS.parent / "boats.jpg"
10
  if not OBB_IMAGE.exists():
11
  safe_download("https://ultralytics.com/images/boats.jpg", dir=ASSETS.parent)
12
 
13
- # Model cache
 
 
 
 
 
 
 
 
 
14
  model_cache = {}
15
 
16
- TASK_REPO_SUFFIX = {
17
- "Detection": "",
18
- "Segmentation": "-seg",
19
- "Classification": "-cls",
20
- "Pose": "-pose",
21
- "OBB": "-obb",
22
- }
23
 
24
- def _scale_from_label(label: str) -> str:
25
- # "YOLO26-M" -> "m", "YOLOE-26L" -> "l"
26
- return label.strip()[-1].lower()
 
 
 
 
 
 
27
 
28
  def _get_model(repo_id: str) -> YOLO:
29
- if repo_id not in model_cache:
30
- path = hf_hub_download(repo_id=repo_id, filename="model.pt")
31
- model_cache[repo_id] = YOLO(path)
32
- return model_cache[repo_id]
 
 
 
33
 
34
  def predict_yolo26(image, model_name, task, conf, iou, retina):
35
- scale = _scale_from_label(model_name)
 
 
 
36
 
37
- # openvision/yolo26-n, yolo26-n-seg, yolo26-n-pose, etc.
38
- repo_id = f"openvision/yolo26-{scale}{TASK_REPO_SUFFIX[task]}"
39
  model = _get_model(repo_id)
40
 
41
- results = model.predict(
42
- source=image,
43
- conf=conf,
44
- iou=iou,
45
- imgsz=640,
46
- retina_masks=bool(retina and task == "Segmentation"),
47
- )
48
 
49
  if task == "Classification":
50
  top5 = results[0].probs.top5
51
- return None, {
52
- results[0].names[i]: float(results[0].probs.top5conf[j])
53
- for j, i in enumerate(top5)
54
- }
55
 
56
  return Image.fromarray(results[0].plot()[..., ::-1]), None
57
 
 
58
  def predict_yoloe26(image, model_name, classes_text, conf, retina):
59
- scale = _scale_from_label(model_name)
 
 
60
 
61
- # openvision/yoloe26-n-seg (open-vocab)
62
- repo_id = f"openvision/yoloe26-{scale}-seg"
63
  model = _get_model(repo_id)
64
 
65
- names = [c.strip() for c in classes_text.split(",") if c.strip()]
66
- if not names:
67
- names = ["person", "car", "dog", "cat"]
68
-
69
  model.set_classes(names, model.get_text_pe(names))
70
 
71
- results = model.predict(
72
- source=image,
73
- conf=conf,
74
- imgsz=640,
75
- retina_masks=bool(retina),
76
- )
77
 
78
- return Image.fromarray(results[0].plot()[..., ::-1])
79
 
 
 
 
80
 
 
81
  with gr.Blocks(title="Ultralytics YOLO26 & YOLOE26 Demo") as demo:
82
  gr.Markdown(
83
  "# 🚀 Ultralytics YOLO26 & YOLOE26 Demo\n"
84
- "Showcasing YOLO26 tasks and YOLOE26 open-vocabulary detection. "
85
  "[GitHub](https://github.com/ultralytics/ultralytics) | [Docs](https://docs.ultralytics.com/models/yolo26/)"
86
  )
87
 
@@ -92,12 +93,13 @@ with gr.Blocks(title="Ultralytics YOLO26 & YOLOE26 Demo") as demo:
92
  with gr.Column():
93
  y26_image = gr.Image(type="pil", label="Upload Image")
94
  with gr.Row():
95
- y26_model = gr.Dropdown(["YOLO26-N", "YOLO26-S", "YOLO26-M", "YOLO26-L", "YOLO26-X"], label="Model")
96
- y26_task = gr.Dropdown(list(TASK_SUFFIX.keys()), label="Task")
 
97
  with gr.Accordion("Advanced Settings", open=False):
98
- y26_conf = gr.Slider(0, 1, label="Confidence Threshold")
99
- y26_iou = gr.Slider(0, 1, label="IoU Threshold")
100
- y26_retina = gr.Checkbox(label="Retina Masks", info="Higher quality masks, slower inference")
101
  y26_btn = gr.Button("Run Inference", variant="primary")
102
  with gr.Column():
103
  y26_output = gr.Image(type="pil", label="Result")
@@ -108,19 +110,26 @@ with gr.Blocks(title="Ultralytics YOLO26 & YOLOE26 Demo") as demo:
108
  y26_task,
109
  [y26_output, y26_label],
110
  )
 
111
  gr.Examples(
112
  examples=[
113
- [str(ASSETS / "bus.jpg"), "YOLO26-M", "Detection", 0.25, 0.45, True],
114
- [str(ASSETS / "bus.jpg"), "YOLO26-M", "Segmentation", 0.25, 0.45, True],
115
- [str(ASSETS / "zidane.jpg"), "YOLO26-M", "Pose", 0.25, 0.45, True],
116
- [str(OBB_IMAGE), "YOLO26-M", "OBB", 0.25, 0.45, True],
 
117
  ],
118
  inputs=[y26_image, y26_model, y26_task, y26_conf, y26_iou, y26_retina],
119
  outputs=[y26_output, y26_label],
120
  fn=predict_yolo26,
121
  cache_examples=True,
122
  )
123
- y26_btn.click(predict_yolo26, [y26_image, y26_model, y26_task, y26_conf, y26_iou, y26_retina], [y26_output, y26_label])
 
 
 
 
 
124
 
125
  with gr.Tab("YOLOE26 Open-Vocabulary"):
126
  gr.Markdown("### Ultralytics YOLOE26: Open-Vocabulary Segmentation - Detect any object by text description")
@@ -128,9 +137,7 @@ with gr.Blocks(title="Ultralytics YOLO26 & YOLOE26 Demo") as demo:
128
  with gr.Column():
129
  ye_image = gr.Image(type="pil", label="Upload Image", value=str(ASSETS / "bus.jpg"))
130
  with gr.Row():
131
- ye_model = gr.Dropdown(
132
- ["YOLOE-26N", "YOLOE-26S", "YOLOE-26M", "YOLOE-26L", "YOLOE-26X"], value="YOLOE-26L", label="Model"
133
- )
134
  ye_classes = gr.Textbox(value="person, bus, car", label="Classes", placeholder="person, dog, cat...")
135
  with gr.Accordion("Advanced Settings", open=False):
136
  ye_conf = gr.Slider(0, 1, value=0.2, label="Confidence Threshold")
@@ -141,15 +148,16 @@ with gr.Blocks(title="Ultralytics YOLO26 & YOLOE26 Demo") as demo:
141
 
142
  gr.Examples(
143
  examples=[
144
- [str(ASSETS / "bus.jpg"), "YOLOE-26L", "person, bus, car", 0.2, True],
145
- [str(ASSETS / "zidane.jpg"), "YOLOE-26L", "person, football, grass", 0.2, True],
146
  ],
147
  inputs=[ye_image, ye_model, ye_classes, ye_conf, ye_retina],
148
  outputs=ye_output,
149
  fn=predict_yoloe26,
150
  cache_examples=True,
151
  )
 
152
  ye_btn.click(predict_yoloe26, [ye_image, ye_model, ye_classes, ye_conf, ye_retina], ye_output)
153
 
154
  if __name__ == "__main__":
155
- demo.launch(theme=theme, allowed_paths=[str(ASSETS), str(ASSETS.parent)])
 
10
  if not OBB_IMAGE.exists():
11
  safe_download("https://ultralytics.com/images/boats.jpg", dir=ASSETS.parent)
12
 
13
+ TASK_TO_REPO_TEMPLATE = {
14
+ "Detection": "openvision/yolo26-{scale}",
15
+ "Segmentation": "openvision/yolo26-{scale}-seg",
16
+ "Classification": "openvision/yolo26-{scale}-cls",
17
+ "Pose": "openvision/yolo26-{scale}-pose",
18
+ "OBB": "openvision/yolo26-{scale}-obb",
19
+ }
20
+
21
+ YOLOE_REPO_TEMPLATE = "openvision/yoloe26-{scale}-seg"
22
+
23
  model_cache = {}
24
 
 
 
 
 
 
 
 
25
 
26
+ def _scale_from_ui_name(model_name: str) -> str:
27
+ """
28
+ Convert dropdown model string to scale token used in repo names.
29
+ Examples:
30
+ "YOLO26-N" -> "n"
31
+ "YOLOE26-N" -> "n"
32
+ """
33
+ return model_name.split("-")[-1].strip().lower()
34
+
35
 
36
  def _get_model(repo_id: str) -> YOLO:
37
+ """Download (if needed) and cache YOLO model from a repo that contains 'model.pt'."""
38
+ cache_key = f"{repo_id}::model.pt"
39
+ if cache_key not in model_cache:
40
+ weights_path = hf_hub_download(repo_id=repo_id, filename="model.pt")
41
+ model_cache[cache_key] = YOLO(weights_path)
42
+ return model_cache[cache_key]
43
+
44
 
45
  def predict_yolo26(image, model_name, task, conf, iou, retina):
46
+ """Run YOLO26 inference for various tasks."""
47
+ scale = _scale_from_ui_name(model_name)
48
+ repo_tmpl = TASK_TO_REPO_TEMPLATE[task]
49
+ repo_id = repo_tmpl.format(scale=scale)
50
 
 
 
51
  model = _get_model(repo_id)
52
 
53
+ use_retina = bool(retina) and task == "Segmentation"
54
+ results = model.predict(source=image, conf=conf, iou=iou, imgsz=640, retina_masks=use_retina)
 
 
 
 
 
55
 
56
  if task == "Classification":
57
  top5 = results[0].probs.top5
58
+ return None, {results[0].names[i]: float(results[0].probs.top5conf[j]) for j, i in enumerate(top5)}
 
 
 
59
 
60
  return Image.fromarray(results[0].plot()[..., ::-1]), None
61
 
62
+
63
  def predict_yoloe26(image, model_name, classes_text, conf, retina):
64
+ """Run YOLOE26 open-vocabulary inference with text prompts."""
65
+ scale = _scale_from_ui_name(model_name)
66
+ repo_id = YOLOE_REPO_TEMPLATE.format(scale=scale)
67
 
 
 
68
  model = _get_model(repo_id)
69
 
70
+ names = [c.strip() for c in classes_text.split(",") if c.strip()] or ["person", "car", "dog", "cat"]
 
 
 
71
  model.set_classes(names, model.get_text_pe(names))
72
 
73
+ res = model.predict(source=image, conf=conf, imgsz=640, retina_masks=bool(retina))[0]
74
+ return Image.fromarray(res.plot()[..., ::-1])
 
 
 
 
75
 
 
76
 
77
+ theme = gr.themes.Base().set(
78
+ button_primary_background_fill="#111F68", button_primary_background_fill_hover="#042AFF"
79
+ )
80
 
81
+ # Build interface
82
  with gr.Blocks(title="Ultralytics YOLO26 & YOLOE26 Demo") as demo:
83
  gr.Markdown(
84
  "# 🚀 Ultralytics YOLO26 & YOLOE26 Demo\n"
85
+ "Showcasing YOLO26 tasks and YOLOE26 open-vocabulary segmentation. "
86
  "[GitHub](https://github.com/ultralytics/ultralytics) | [Docs](https://docs.ultralytics.com/models/yolo26/)"
87
  )
88
 
 
93
  with gr.Column():
94
  y26_image = gr.Image(type="pil", label="Upload Image")
95
  with gr.Row():
96
+ # Repos you provided are only for the N scale, so keep dropdown aligned to that.
97
+ y26_model = gr.Dropdown(["YOLO26-N"], value="YOLO26-N", label="Model")
98
+ y26_task = gr.Dropdown(list(TASK_TO_REPO_TEMPLATE.keys()), value="Detection", label="Task")
99
  with gr.Accordion("Advanced Settings", open=False):
100
+ y26_conf = gr.Slider(0, 1, value=0.25, label="Confidence Threshold")
101
+ y26_iou = gr.Slider(0, 1, value=0.45, label="IoU Threshold")
102
+ y26_retina = gr.Checkbox(value=True, label="Retina Masks", info="Higher quality masks, slower inference")
103
  y26_btn = gr.Button("Run Inference", variant="primary")
104
  with gr.Column():
105
  y26_output = gr.Image(type="pil", label="Result")
 
110
  y26_task,
111
  [y26_output, y26_label],
112
  )
113
+
114
  gr.Examples(
115
  examples=[
116
+ [str(ASSETS / "bus.jpg"), "YOLO26-N", "Detection", 0.25, 0.45, True],
117
+ [str(ASSETS / "bus.jpg"), "YOLO26-N", "Segmentation", 0.25, 0.45, True],
118
+ [str(ASSETS / "zidane.jpg"), "YOLO26-N", "Pose", 0.25, 0.45, True],
119
+ [str(OBB_IMAGE), "YOLO26-N", "OBB", 0.25, 0.45, True],
120
+ [str(ASSETS / "bus.jpg"), "YOLO26-N", "Classification", 0.25, 0.45, True],
121
  ],
122
  inputs=[y26_image, y26_model, y26_task, y26_conf, y26_iou, y26_retina],
123
  outputs=[y26_output, y26_label],
124
  fn=predict_yolo26,
125
  cache_examples=True,
126
  )
127
+
128
+ y26_btn.click(
129
+ predict_yolo26,
130
+ [y26_image, y26_model, y26_task, y26_conf, y26_iou, y26_retina],
131
+ [y26_output, y26_label],
132
+ )
133
 
134
  with gr.Tab("YOLOE26 Open-Vocabulary"):
135
  gr.Markdown("### Ultralytics YOLOE26: Open-Vocabulary Segmentation - Detect any object by text description")
 
137
  with gr.Column():
138
  ye_image = gr.Image(type="pil", label="Upload Image", value=str(ASSETS / "bus.jpg"))
139
  with gr.Row():
140
+ ye_model = gr.Dropdown(["YOLOE26-N"], value="YOLOE26-N", label="Model")
 
 
141
  ye_classes = gr.Textbox(value="person, bus, car", label="Classes", placeholder="person, dog, cat...")
142
  with gr.Accordion("Advanced Settings", open=False):
143
  ye_conf = gr.Slider(0, 1, value=0.2, label="Confidence Threshold")
 
148
 
149
  gr.Examples(
150
  examples=[
151
+ [str(ASSETS / "bus.jpg"), "YOLOE26-N", "person, bus, car", 0.2, True],
152
+ [str(ASSETS / "zidane.jpg"), "YOLOE26-N", "person, football, grass", 0.2, True],
153
  ],
154
  inputs=[ye_image, ye_model, ye_classes, ye_conf, ye_retina],
155
  outputs=ye_output,
156
  fn=predict_yoloe26,
157
  cache_examples=True,
158
  )
159
+
160
  ye_btn.click(predict_yoloe26, [ye_image, ye_model, ye_classes, ye_conf, ye_retina], ye_output)
161
 
162
  if __name__ == "__main__":
163
+ demo.launch(theme=theme, allowed_paths=[str(ASSETS), str(ASSETS.parent)])