Yolo26

Sleeping

App Files Files Community

atalaydenknalbant commited on Jan 14

Commit

1e73dc3

verified ·

1 Parent(s): 631cb4f

Update app.py

Browse files

Files changed (1) hide show

app.py +155 -143

app.py CHANGED Viewed

@@ -10,123 +10,136 @@ import numpy as np
 # Inference
 # -----------------------------
 @spaces.GPU
-def yolo_inference(input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection):
-    """
-    Ultralytics YOLO26 inference for image or video.
-    Accepts detect/seg/pose/obb/cls checkpoints and renders r.plot().
-    """
     model = YOLO(model_id)
     if getattr(model, "task", None) != "classify":
         head = model.model.model[-1]
         if hasattr(head, "one2one_cv2"):
             delattr(head, "one2one_cv2")
-    if input_type == "Image":
-        if image is None:
-            w, h = 640, 480
-            blank = Image.new("RGB", (w, h), color="white")
-            draw = ImageDraw.Draw(blank)
-            msg = "No image provided"
-            font = ImageFont.load_default(size=40)
-            bbox = draw.textbbox((0, 0), msg, font=font)
-            tw, th = bbox[2] - bbox[0], bbox[3] - bbox[1]
-            draw.text(((w - tw) / 2, (h - th) / 2), msg, fill="black", font=font)
-            return blank, None
         results = model.predict(
-            source=image,
             conf=conf_threshold,
             iou=iou_threshold,
             imgsz=640,
             max_det=max_detection,
             show_labels=True,
             show_conf=True,
         )
-        annotated_image = None
         for r in results:
-            img_bgr = r.plot()
-            annotated_image = Image.fromarray(img_bgr[..., ::-1])
-        return annotated_image, None
-    if input_type == "Video":
-        if video is None:
-            w, h = 640, 480
-            blank = Image.new("RGB", (w, h), color="white")
-            draw = ImageDraw.Draw(blank)
-            msg = "No video provided"
-            font = ImageFont.load_default(size=40)
-            bbox = draw.textbbox((0, 0), msg, font=font)
-            tw, th = bbox[2] - bbox[0], bbox[3] - bbox[1]
-            draw.text(((w - tw) / 2, (h - th) / 2), msg, fill="black", font=font)
-            tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
-            fourcc = cv2.VideoWriter_fourcc(*"mp4v")
-            out = cv2.VideoWriter(tmp, fourcc, 1, (w, h))
-            out.write(cv2.cvtColor(np.array(blank), cv2.COLOR_RGB2BGR))
-            out.release()
-            return None, tmp
-        cap = cv2.VideoCapture(video)
-        fps = cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 25
-        frames = []
-        while True:
-            ret, frame = cap.read()
-            if not ret:
-                break
-            pil_frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-            results = model.predict(
-                source=pil_frame,
-                conf=conf_threshold,
-                iou=iou_threshold,
-                imgsz=640,
-                max_det=max_detection,
-                show_labels=True,
-                show_conf=True,
-            )
-            for r in results:
-                anno_bgr = r.plot()
-                anno_rgb = cv2.cvtColor(anno_bgr, cv2.COLOR_BGR2RGB)
-            frames.append(anno_rgb)
-        cap.release()
-        if not frames:
-            return None, None
-        h, w, _ = frames[0].shape
-        tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
-        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
-        out = cv2.VideoWriter(tmp, fourcc, fps, (w, h))
-        for f in frames:
-            out.write(cv2.cvtColor(f, cv2.COLOR_RGB2BGR))
-        out.release()
-        return None, tmp
-    return None, None
-def update_visibility(input_type):
-    if input_type == "Image":
-        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
-    else:
-        return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)
 def yolo_inference_for_examples(image, model_id, conf_threshold, iou_threshold, max_detection):
-    annotated_image, _ = yolo_inference(
-        input_type="Image",
-        image=image,
-        video=None,
-        model_id=model_id,
-        conf_threshold=conf_threshold,
-        iou_threshold=iou_threshold,
-        max_detection=max_detection
-    )
-    return annotated_image
 with gr.Blocks() as app:
     gr.Markdown("# YOLO26")
-    gr.Markdown("Image or video inference with detection, segmentation, pose, oriented bounding boxes, and classification using the latest Ultralytics YOLO26 models.")
     with gr.Accordion("Reference", open=False):
-        gr.Markdown("""
         **BibTeX:**
         ```
         @software{yolo26_ultralytics,
@@ -139,70 +152,69 @@ with gr.Blocks() as app:
           license = {AGPL-3.0}
         }
         ```
-        """
         )
-    with gr.Row():
-        with gr.Column():
-            image = gr.Image(type="pil", label="Image", visible=True)
-            video = gr.Video(label="Video", visible=False)
-            input_type = gr.Radio(choices=["Image", "Video"], value="Image", label="Input Type")
-            model_id = gr.Dropdown(
-                label="Model",
-                choices=[
-                    # detect
-                    "yolo26n.pt","yolo26s.pt","yolo26m.pt","yolo26l.pt","yolo26x.pt",
-                    # seg
-                    "yolo26n-seg.pt","yolo26s-seg.pt","yolo26m-seg.pt","yolo26l-seg.pt","yolo26x-seg.pt",
-                    # pose
-                    "yolo26n-pose.pt","yolo26s-pose.pt","yolo26m-pose.pt","yolo26l-pose.pt","yolo26x-pose.pt",
-                    # obb
-                    "yolo26n-obb.pt","yolo26s-obb.pt","yolo26m-obb.pt","yolo26l-obb.pt","yolo26x-obb.pt",
-                    # cls
-                    "yolo26n-cls.pt","yolo26s-cls.pt","yolo26m-cls.pt","yolo26l-cls.pt","yolo26x-cls.pt",
                 ],
-                value="yolo26n.pt",
             )
-            conf_threshold = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence Threshold")
-            iou_threshold = gr.Slider(minimum=0, maximum=1, value=0.45, label="IoU Threshold")
-            max_detection = gr.Slider(minimum=1, maximum=300, step=1, value=300, label="Max Detection")
-            infer_button = gr.Button("Detect Objects", variant="primary")
-        with gr.Column():
-            output_image = gr.Image(type="pil", show_label=False, visible=True)
-            output_video = gr.Video(show_label=False, visible=False)
-            gr.DeepLinkButton(variant="primary")
-    input_type.change(
-        fn=update_visibility,
-        inputs=input_type,
-        outputs=[image, video, output_image, output_video],
-    )
-    infer_button.click(
-        fn=yolo_inference,
-        inputs=[input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection],
-        outputs=[output_image, output_video],
     )
-    gr.Examples(
-        examples=[
-            ["zidane.jpg", "yolo26s.pt", 0.25, 0.45, 300],
-            ["bus.jpg", "yolo26m.pt", 0.25, 0.45, 300],
-            ["yolo_vision.jpg", "yolo26x.pt", 0.25, 0.45, 300],
-            ["Tricycle.jpg", "yolo26x-cls.pt", 0.25, 0.45, 300],
-            ["tcganadolu.jpg", "yolo26m-obb.pt", 0.25, 0.45, 300],
-            ["San Diego Airport.jpg", "yolo26x-seg.pt", 0.25, 0.45, 300],
-            ["Theodore_Roosevelt.png", "yolo26l-pose.pt", 0.25, 0.45, 300],
-        ],
-        fn=yolo_inference_for_examples,
-        inputs=[image, model_id, conf_threshold, iou_threshold, max_detection],
-        outputs=[output_image],
-        label="Examples",
     )
 if __name__ == "__main__":
-    app.launch(mcp_server=True, theme = gr.themes.Ocean(primary_hue="indigo", secondary_hue="blue"))

 # Inference
 # -----------------------------
 @spaces.GPU
+def yolo_inference_image(image, model_id, conf_threshold, iou_threshold, max_detection):
     model = YOLO(model_id)
     if getattr(model, "task", None) != "classify":
         head = model.model.model[-1]
         if hasattr(head, "one2one_cv2"):
             delattr(head, "one2one_cv2")
+    if image is None:
+        w, h = 640, 480
+        blank = Image.new("RGB", (w, h), color="white")
+        draw = ImageDraw.Draw(blank)
+        msg = "No image provided"
+        font = ImageFont.load_default(size=40)
+        bbox = draw.textbbox((0, 0), msg, font=font)
+        tw, th = bbox[2] - bbox[0], bbox[3] - bbox[1]
+        draw.text(((w - tw) / 2, (h - th) / 2), msg, fill="black", font=font)
+        return blank
+    results = model.predict(
+        source=image,
+        conf=conf_threshold,
+        iou=iou_threshold,
+        imgsz=640,
+        max_det=max_detection,
+        show_labels=True,
+        show_conf=True,
+    )
+    annotated_image = None
+    for r in results:
+        img_bgr = r.plot()
+        annotated_image = Image.fromarray(img_bgr[..., ::-1])
+    return annotated_image
+@spaces.GPU
+def yolo_inference_video(video, model_id, conf_threshold, iou_threshold, max_detection):
+    model = YOLO(model_id)
+    if getattr(model, "task", None) != "classify":
+        head = model.model.model[-1]
+        if hasattr(head, "one2one_cv2"):
+            delattr(head, "one2one_cv2")
+    if video is None:
+        w, h = 640, 480
+        blank = Image.new("RGB", (w, h), color="white")
+        draw = ImageDraw.Draw(blank)
+        msg = "No video provided"
+        font = ImageFont.load_default(size=40)
+        bbox = draw.textbbox((0, 0), msg, font=font)
+        tw, th = bbox[2] - bbox[0], bbox[3] - bbox[1]
+        draw.text(((w - tw) / 2, (h - th) / 2), msg, fill="black", font=font)
+        tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+        out = cv2.VideoWriter(tmp, fourcc, 1, (w, h))
+        out.write(cv2.cvtColor(np.array(blank), cv2.COLOR_RGB2BGR))
+        out.release()
+        return tmp
+    cap = cv2.VideoCapture(video)
+    if not cap.isOpened():
+        return None
+    fps_val = cap.get(cv2.CAP_PROP_FPS)
+    fps = fps_val if fps_val and fps_val > 0 else 25
+    w_val = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    h_val = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    w = w_val if w_val and w_val > 0 else 640
+    h = h_val if h_val and h_val > 0 else 480
+    tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    out = cv2.VideoWriter(tmp, fourcc, fps, (w, h))
+    wrote_any = False
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
         results = model.predict(
+            source=frame,
             conf=conf_threshold,
             iou=iou_threshold,
             imgsz=640,
             max_det=max_detection,
             show_labels=True,
             show_conf=True,
+            verbose=False,
         )
+        anno_bgr = frame
         for r in results:
+            anno_bgr = r.plot()
+        out.write(anno_bgr)
+        wrote_any = True
+    cap.release()
+    out.release()
+    if not wrote_any:
+        return None
+    return tmp
 def yolo_inference_for_examples(image, model_id, conf_threshold, iou_threshold, max_detection):
+    return yolo_inference_image(image, model_id, conf_threshold, iou_threshold, max_detection)
+MODEL_CHOICES = [
+    "yolo26n.pt", "yolo26s.pt", "yolo26m.pt", "yolo26l.pt", "yolo26x.pt",
+    "yolo26n-seg.pt", "yolo26s-seg.pt", "yolo26m-seg.pt", "yolo26l-seg.pt", "yolo26x-seg.pt",
+    "yolo26n-pose.pt", "yolo26s-pose.pt", "yolo26m-pose.pt", "yolo26l-pose.pt", "yolo26x-pose.pt",
+    "yolo26n-obb.pt", "yolo26s-obb.pt", "yolo26m-obb.pt", "yolo26l-obb.pt", "yolo26x-obb.pt",
+    "yolo26n-cls.pt", "yolo26s-cls.pt", "yolo26m-cls.pt", "yolo26l-cls.pt", "yolo26x-cls.pt",
+]
 with gr.Blocks() as app:
     gr.Markdown("# YOLO26")
+    gr.Markdown(
+        "Image or video inference with detection, segmentation, pose, oriented bounding boxes, and classification using the latest Ultralytics YOLO26 models."
+    )
     with gr.Accordion("Reference", open=False):
+        gr.Markdown(
+            """
         **BibTeX:**
         ```
         @software{yolo26_ultralytics,
           license = {AGPL-3.0}
         }
         ```
+        """
         )
+    with gr.Tabs() as media_tabs:
+        with gr.Tab("Image") as image_tab:
+            with gr.Row():
+                with gr.Column():
+                    image = gr.Image(type="pil", label="Image")
+                    model_id_img = gr.Dropdown(label="Model", choices=MODEL_CHOICES, value="yolo26n.pt")
+                    conf_img = gr.Slider(0, 1, value=0.25, label="Confidence Threshold")
+                    iou_img = gr.Slider(0, 1, value=0.45, label="IoU Threshold")
+                    max_det_img = gr.Slider(1, 300, step=1, value=300, label="Max Detection")
+                    infer_image_button = gr.Button("Detect Objects", variant="primary")
+                with gr.Column():
+                    output_image = gr.Image(type="pil", show_label=False)
+                    gr.DeepLinkButton(variant="primary")
+            gr.Examples(
+                examples=[
+                    ["zidane.jpg", "yolo26s.pt", 0.25, 0.45, 300],
+                    ["bus.jpg", "yolo26m.pt", 0.25, 0.45, 300],
+                    ["yolo_vision.jpg", "yolo26x.pt", 0.25, 0.45, 300],
+                    ["Tricycle.jpg", "yolo26x-cls.pt", 0.25, 0.45, 300],
+                    ["tcganadolu.jpg", "yolo26m-obb.pt", 0.25, 0.45, 300],
+                    ["San Diego Airport.jpg", "yolo26x-seg.pt", 0.25, 0.45, 300],
+                    ["Theodore_Roosevelt.png", "yolo26l-pose.pt", 0.25, 0.45, 300],
                 ],
+                fn=yolo_inference_for_examples,
+                inputs=[image, model_id_img, conf_img, iou_img, max_det_img],
+                outputs=[output_image],
+                label="Examples",
             )
+        with gr.Tab("Video") as video_tab:
+            with gr.Row():
+                with gr.Column():
+                    video = gr.Video(label="Video")
+                    model_id_vid = gr.Dropdown(label="Model", choices=MODEL_CHOICES, value="yolo26n.pt")
+                    conf_vid = gr.Slider(0, 1, value=0.25, label="Confidence Threshold")
+                    iou_vid = gr.Slider(0, 1, value=0.45, label="IoU Threshold")
+                    max_det_vid = gr.Slider(1, 300, step=1, value=300, label="Max Detection")
+                    infer_video_button = gr.Button("Detect Objects", variant="primary")
+                with gr.Column():
+                    output_video = gr.Video(show_label=False)
+                    gr.DeepLinkButton(variant="primary")
+    infer_image_button.click(
+        fn=yolo_inference_image,
+        inputs=[image, model_id_img, conf_img, iou_img, max_det_img],
+        outputs=[output_image],
     )
+    infer_video_button.click(
+        fn=yolo_inference_video,
+        inputs=[video, model_id_vid, conf_vid, iou_vid, max_det_vid],
+        outputs=[output_video],
     )
 if __name__ == "__main__":
+    app.launch(mcp_server=True, theme=gr.themes.Ocean(primary_hue="indigo", secondary_hue="blue"))