Spaces:

openvision
/

YOLO26

Running

App Files Files Community

openvision commited on 5 days ago

Commit

ddc05ef

verified ·

1 Parent(s): fc9e3e8

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -60

app.py CHANGED Viewed

@@ -10,78 +10,79 @@ OBB_IMAGE = ASSETS.parent / "boats.jpg"
 if not OBB_IMAGE.exists():
     safe_download("https://ultralytics.com/images/boats.jpg", dir=ASSETS.parent)
-# Model cache
 model_cache = {}
-TASK_REPO_SUFFIX = {
-    "Detection": "",
-    "Segmentation": "-seg",
-    "Classification": "-cls",
-    "Pose": "-pose",
-    "OBB": "-obb",
-}
-def _scale_from_label(label: str) -> str:
-    # "YOLO26-M" -> "m", "YOLOE-26L" -> "l"
-    return label.strip()[-1].lower()
 def _get_model(repo_id: str) -> YOLO:
-    if repo_id not in model_cache:
-        path = hf_hub_download(repo_id=repo_id, filename="model.pt")
-        model_cache[repo_id] = YOLO(path)
-    return model_cache[repo_id]
 def predict_yolo26(image, model_name, task, conf, iou, retina):
-    scale = _scale_from_label(model_name)
-    # openvision/yolo26-n, yolo26-n-seg, yolo26-n-pose, etc.
-    repo_id = f"openvision/yolo26-{scale}{TASK_REPO_SUFFIX[task]}"
     model = _get_model(repo_id)
-    results = model.predict(
-        source=image,
-        conf=conf,
-        iou=iou,
-        imgsz=640,
-        retina_masks=bool(retina and task == "Segmentation"),
-    )
     if task == "Classification":
         top5 = results[0].probs.top5
-        return None, {
-            results[0].names[i]: float(results[0].probs.top5conf[j])
-            for j, i in enumerate(top5)
-        }
     return Image.fromarray(results[0].plot()[..., ::-1]), None
 def predict_yoloe26(image, model_name, classes_text, conf, retina):
-    scale = _scale_from_label(model_name)
-    # openvision/yoloe26-n-seg (open-vocab)
-    repo_id = f"openvision/yoloe26-{scale}-seg"
     model = _get_model(repo_id)
-    names = [c.strip() for c in classes_text.split(",") if c.strip()]
-    if not names:
-        names = ["person", "car", "dog", "cat"]
     model.set_classes(names, model.get_text_pe(names))
-    results = model.predict(
-        source=image,
-        conf=conf,
-        imgsz=640,
-        retina_masks=bool(retina),
-    )
-    return Image.fromarray(results[0].plot()[..., ::-1])
 with gr.Blocks(title="Ultralytics YOLO26 & YOLOE26 Demo") as demo:
     gr.Markdown(
         "# 🚀 Ultralytics YOLO26 & YOLOE26 Demo\n"
-        "Showcasing YOLO26 tasks and YOLOE26 open-vocabulary detection. "
         "[GitHub](https://github.com/ultralytics/ultralytics) | [Docs](https://docs.ultralytics.com/models/yolo26/)"
     )
@@ -92,12 +93,13 @@ with gr.Blocks(title="Ultralytics YOLO26 & YOLOE26 Demo") as demo:
                 with gr.Column():
                     y26_image = gr.Image(type="pil", label="Upload Image")
                     with gr.Row():
-                        y26_model = gr.Dropdown(["YOLO26-N", "YOLO26-S", "YOLO26-M", "YOLO26-L", "YOLO26-X"], label="Model")
-                        y26_task = gr.Dropdown(list(TASK_SUFFIX.keys()), label="Task")
                     with gr.Accordion("Advanced Settings", open=False):
-                        y26_conf = gr.Slider(0, 1, label="Confidence Threshold")
-                        y26_iou = gr.Slider(0, 1, label="IoU Threshold")
-                        y26_retina = gr.Checkbox(label="Retina Masks", info="Higher quality masks, slower inference")
                     y26_btn = gr.Button("Run Inference", variant="primary")
                 with gr.Column():
                     y26_output = gr.Image(type="pil", label="Result")
@@ -108,19 +110,26 @@ with gr.Blocks(title="Ultralytics YOLO26 & YOLOE26 Demo") as demo:
                 y26_task,
                 [y26_output, y26_label],
             )
             gr.Examples(
                 examples=[
-                    [str(ASSETS / "bus.jpg"), "YOLO26-M", "Detection", 0.25, 0.45, True],
-                    [str(ASSETS / "bus.jpg"), "YOLO26-M", "Segmentation", 0.25, 0.45, True],
-                    [str(ASSETS / "zidane.jpg"), "YOLO26-M", "Pose", 0.25, 0.45, True],
-                    [str(OBB_IMAGE), "YOLO26-M", "OBB", 0.25, 0.45, True],
                 ],
                 inputs=[y26_image, y26_model, y26_task, y26_conf, y26_iou, y26_retina],
                 outputs=[y26_output, y26_label],
                 fn=predict_yolo26,
                 cache_examples=True,
             )
-            y26_btn.click(predict_yolo26, [y26_image, y26_model, y26_task, y26_conf, y26_iou, y26_retina], [y26_output, y26_label])
         with gr.Tab("YOLOE26 Open-Vocabulary"):
             gr.Markdown("### Ultralytics YOLOE26: Open-Vocabulary Segmentation - Detect any object by text description")
@@ -128,9 +137,7 @@ with gr.Blocks(title="Ultralytics YOLO26 & YOLOE26 Demo") as demo:
                 with gr.Column():
                     ye_image = gr.Image(type="pil", label="Upload Image", value=str(ASSETS / "bus.jpg"))
                     with gr.Row():
-                        ye_model = gr.Dropdown(
-                            ["YOLOE-26N", "YOLOE-26S", "YOLOE-26M", "YOLOE-26L", "YOLOE-26X"], value="YOLOE-26L", label="Model"
-                        )
                         ye_classes = gr.Textbox(value="person, bus, car", label="Classes", placeholder="person, dog, cat...")
                     with gr.Accordion("Advanced Settings", open=False):
                         ye_conf = gr.Slider(0, 1, value=0.2, label="Confidence Threshold")
@@ -141,15 +148,16 @@ with gr.Blocks(title="Ultralytics YOLO26 & YOLOE26 Demo") as demo:
             gr.Examples(
                 examples=[
-                    [str(ASSETS / "bus.jpg"), "YOLOE-26L", "person, bus, car", 0.2, True],
-                    [str(ASSETS / "zidane.jpg"), "YOLOE-26L", "person, football, grass", 0.2, True],
                 ],
                 inputs=[ye_image, ye_model, ye_classes, ye_conf, ye_retina],
                 outputs=ye_output,
                 fn=predict_yoloe26,
                 cache_examples=True,
             )
             ye_btn.click(predict_yoloe26, [ye_image, ye_model, ye_classes, ye_conf, ye_retina], ye_output)
 if __name__ == "__main__":
-    demo.launch(theme=theme, allowed_paths=[str(ASSETS), str(ASSETS.parent)])

 if not OBB_IMAGE.exists():
     safe_download("https://ultralytics.com/images/boats.jpg", dir=ASSETS.parent)
+TASK_TO_REPO_TEMPLATE = {
+    "Detection": "openvision/yolo26-{scale}",
+    "Segmentation": "openvision/yolo26-{scale}-seg",
+    "Classification": "openvision/yolo26-{scale}-cls",
+    "Pose": "openvision/yolo26-{scale}-pose",
+    "OBB": "openvision/yolo26-{scale}-obb",
+}
+YOLOE_REPO_TEMPLATE = "openvision/yoloe26-{scale}-seg"
 model_cache = {}
+def _scale_from_ui_name(model_name: str) -> str:
+    """
+    Convert dropdown model string to scale token used in repo names.
+    Examples:
+        "YOLO26-N"  -> "n"
+        "YOLOE26-N" -> "n"
+    """
+    return model_name.split("-")[-1].strip().lower()
 def _get_model(repo_id: str) -> YOLO:
+    """Download (if needed) and cache YOLO model from a repo that contains 'model.pt'."""
+    cache_key = f"{repo_id}::model.pt"
+    if cache_key not in model_cache:
+        weights_path = hf_hub_download(repo_id=repo_id, filename="model.pt")
+        model_cache[cache_key] = YOLO(weights_path)
+    return model_cache[cache_key]
 def predict_yolo26(image, model_name, task, conf, iou, retina):
+    """Run YOLO26 inference for various tasks."""
+    scale = _scale_from_ui_name(model_name)
+    repo_tmpl = TASK_TO_REPO_TEMPLATE[task]
+    repo_id = repo_tmpl.format(scale=scale)
     model = _get_model(repo_id)
+    use_retina = bool(retina) and task == "Segmentation"
+    results = model.predict(source=image, conf=conf, iou=iou, imgsz=640, retina_masks=use_retina)
     if task == "Classification":
         top5 = results[0].probs.top5
+        return None, {results[0].names[i]: float(results[0].probs.top5conf[j]) for j, i in enumerate(top5)}
     return Image.fromarray(results[0].plot()[..., ::-1]), None
 def predict_yoloe26(image, model_name, classes_text, conf, retina):
+    """Run YOLOE26 open-vocabulary inference with text prompts."""
+    scale = _scale_from_ui_name(model_name)
+    repo_id = YOLOE_REPO_TEMPLATE.format(scale=scale)
     model = _get_model(repo_id)
+    names = [c.strip() for c in classes_text.split(",") if c.strip()] or ["person", "car", "dog", "cat"]
     model.set_classes(names, model.get_text_pe(names))
+    res = model.predict(source=image, conf=conf, imgsz=640, retina_masks=bool(retina))[0]
+    return Image.fromarray(res.plot()[..., ::-1])
+theme = gr.themes.Base().set(
+    button_primary_background_fill="#111F68", button_primary_background_fill_hover="#042AFF"
+)
+# Build interface
 with gr.Blocks(title="Ultralytics YOLO26 & YOLOE26 Demo") as demo:
     gr.Markdown(
         "# 🚀 Ultralytics YOLO26 & YOLOE26 Demo\n"
+        "Showcasing YOLO26 tasks and YOLOE26 open-vocabulary segmentation. "
         "[GitHub](https://github.com/ultralytics/ultralytics) | [Docs](https://docs.ultralytics.com/models/yolo26/)"
     )
                 with gr.Column():
                     y26_image = gr.Image(type="pil", label="Upload Image")
                     with gr.Row():
+                        # Repos you provided are only for the N scale, so keep dropdown aligned to that.
+                        y26_model = gr.Dropdown(["YOLO26-N"], value="YOLO26-N", label="Model")
+                        y26_task = gr.Dropdown(list(TASK_TO_REPO_TEMPLATE.keys()), value="Detection", label="Task")
                     with gr.Accordion("Advanced Settings", open=False):
+                        y26_conf = gr.Slider(0, 1, value=0.25, label="Confidence Threshold")
+                        y26_iou = gr.Slider(0, 1, value=0.45, label="IoU Threshold")
+                        y26_retina = gr.Checkbox(value=True, label="Retina Masks", info="Higher quality masks, slower inference")
                     y26_btn = gr.Button("Run Inference", variant="primary")
                 with gr.Column():
                     y26_output = gr.Image(type="pil", label="Result")
                 y26_task,
                 [y26_output, y26_label],
             )
             gr.Examples(
                 examples=[
+                    [str(ASSETS / "bus.jpg"), "YOLO26-N", "Detection", 0.25, 0.45, True],
+                    [str(ASSETS / "bus.jpg"), "YOLO26-N", "Segmentation", 0.25, 0.45, True],
+                    [str(ASSETS / "zidane.jpg"), "YOLO26-N", "Pose", 0.25, 0.45, True],
+                    [str(OBB_IMAGE), "YOLO26-N", "OBB", 0.25, 0.45, True],
+                    [str(ASSETS / "bus.jpg"), "YOLO26-N", "Classification", 0.25, 0.45, True],
                 ],
                 inputs=[y26_image, y26_model, y26_task, y26_conf, y26_iou, y26_retina],
                 outputs=[y26_output, y26_label],
                 fn=predict_yolo26,
                 cache_examples=True,
             )
+            y26_btn.click(
+                predict_yolo26,
+                [y26_image, y26_model, y26_task, y26_conf, y26_iou, y26_retina],
+                [y26_output, y26_label],
+            )
         with gr.Tab("YOLOE26 Open-Vocabulary"):
             gr.Markdown("### Ultralytics YOLOE26: Open-Vocabulary Segmentation - Detect any object by text description")
                 with gr.Column():
                     ye_image = gr.Image(type="pil", label="Upload Image", value=str(ASSETS / "bus.jpg"))
                     with gr.Row():
+                        ye_model = gr.Dropdown(["YOLOE26-N"], value="YOLOE26-N", label="Model")
                         ye_classes = gr.Textbox(value="person, bus, car", label="Classes", placeholder="person, dog, cat...")
                     with gr.Accordion("Advanced Settings", open=False):
                         ye_conf = gr.Slider(0, 1, value=0.2, label="Confidence Threshold")
             gr.Examples(
                 examples=[
+                    [str(ASSETS / "bus.jpg"), "YOLOE26-N", "person, bus, car", 0.2, True],
+                    [str(ASSETS / "zidane.jpg"), "YOLOE26-N", "person, football, grass", 0.2, True],
                 ],
                 inputs=[ye_image, ye_model, ye_classes, ye_conf, ye_retina],
                 outputs=ye_output,
                 fn=predict_yoloe26,
                 cache_examples=True,
             )
             ye_btn.click(predict_yoloe26, [ye_image, ye_model, ye_classes, ye_conf, ye_retina], ye_output)
 if __name__ == "__main__":
+    demo.launch(theme=theme, allowed_paths=[str(ASSETS), str(ASSETS.parent)])