Spaces:

atalaydenknalbant
/

Yolov13

Running on Zero

App Files Files Community

atalaydenknalbant commited on Jul 15

Commit

cc582e0

verified ·

1 Parent(s): 6f6b144

Upload 10 files

Browse files

Files changed (11) hide show

.gitattributes +7 -0
README.md +18 -13
San Diego Airport.jpg +3 -0
Theodore_Roosevelt.png +3 -0
Tricycle.jpg +3 -0
app.py +194 -0
bus.jpg +3 -0
requirements.txt +3 -0
tcganadolu.jpg +3 -0
yolo_vision.jpg +3 -0
zidane.jpg +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+bus.jpg filter=lfs diff=lfs merge=lfs -text
+San[[:space:]]Diego[[:space:]]Airport.jpg filter=lfs diff=lfs merge=lfs -text
+tcganadolu.jpg filter=lfs diff=lfs merge=lfs -text
+Theodore_Roosevelt.png filter=lfs diff=lfs merge=lfs -text
+Tricycle.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vision.jpg filter=lfs diff=lfs merge=lfs -text
+zidane.jpg filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,13 +1,18 @@
----
-title: Yolo13
-emoji: 🔥
-colorFrom: green
-colorTo: green
-sdk: gradio
-sdk_version: 5.37.0
-app_file: app.py
-pinned: false
-short_description: Detect objects in images and videos
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Yolo13
+emoji: 👁
+colorFrom: pink
+colorTo: blue
+sdk: gradio
+sdk_version: 5.29.1
+app_file: app.py
+pinned: false
+tags:
+- Object Detection
+- Instance Segmentation
+- Pose/Keypoints
+- Oriented Detection
+- Image Classification
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

San Diego Airport.jpg ADDED Viewed

Git LFS Details

SHA256: 55b7890afdb58a1fd78912b980bf63ed9040915734aa3ae1e7d7785d540813d8
Pointer size: 131 Bytes
Size of remote file: 335 kB

Theodore_Roosevelt.png ADDED Viewed

Git LFS Details

SHA256: 2f26f6876212efaf4c2e5551cc618cc70210b31c98d28fb2c72387b609b2887c
Pointer size: 132 Bytes
Size of remote file: 1.79 MB

Tricycle.jpg ADDED Viewed

Git LFS Details

SHA256: dee0d670a1753d635550fc8b58b010063799fd005c0d5591e9207d9f9372a582
Pointer size: 131 Bytes
Size of remote file: 763 kB

app.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import spaces
+import gradio as gr
+from PIL import Image, ImageDraw, ImageFont
+from ultralytics import YOLO
+from huggingface_hub import hf_hub_download
+import cv2
+import tempfile
+def download_model(model_filename):
+    return hf_hub_download(repo_id="atalaydenknalbant/Yolov13", filename=model_filename)
+@spaces.GPU
+def yolo_inference(input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection):
+    model_path = download_model(model_id)
+    if input_type == "Image":
+        if image is None:
+            width, height = 640, 480
+            blank_image = Image.new("RGB", (width, height), color="white")
+            draw = ImageDraw.Draw(blank_image)
+            message = "No image provided"
+            font = ImageFont.load_default(size=40)
+            bbox = draw.textbbox((0, 0), message, font=font)
+            text_width = bbox[2] - bbox[0]
+            text_height = bbox[3] - bbox[1]
+            text_x = (width - text_width) / 2
+            text_y = (height - text_height) / 2
+            draw.text((text_x, text_y), message, fill="black", font=font)
+            return blank_image, None
+        model = YOLO(model_path)
+        results = model.predict(
+            source=image,
+            conf=conf_threshold,
+            iou=iou_threshold,
+            imgsz=640,
+            max_det=max_detection,
+            show_labels=True,
+            show_conf=True,
+        )
+        for r in results:
+            image_array = r.plot()
+            annotated_image = Image.fromarray(image_array[..., ::-1])
+        return annotated_image, None
+    elif input_type == "Video":
+        if video is None:
+            width, height = 640, 480
+            blank_image = Image.new("RGB", (width, height), color="white")
+            draw = ImageDraw.Draw(blank_image)
+            message = "No video provided"
+            font = ImageFont.load_default(size=40)
+            bbox = draw.textbbox((0, 0), message, font=font)
+            text_width = bbox[2] - bbox[0]
+            text_height = bbox[3] - bbox[1]
+            text_x = (width - text_width) / 2
+            text_y = (height - text_height) / 2
+            draw.text((text_x, text_y), message, fill="black", font=font)
+            temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+            fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+            out = cv2.VideoWriter(temp_video_file, fourcc, 1, (width, height))
+            frame = cv2.cvtColor(np.array(blank_image), cv2.COLOR_RGB2BGR)
+            out.write(frame)
+            out.release()
+            return None, temp_video_file
+        model = YOLO(model_path)
+        cap = cv2.VideoCapture(video)
+        fps = cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 25
+        frames = []
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            pil_frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+            results = model.predict(
+                source=pil_frame,
+                conf=conf_threshold,
+                iou=iou_threshold,
+                imgsz=640,
+                max_det=max_detection,
+                show_labels=True,
+                show_conf=True,
+            )
+            for r in results:
+                annotated_frame_array = r.plot()
+                annotated_frame = cv2.cvtColor(annotated_frame_array, cv2.COLOR_BGR2RGB)
+            frames.append(annotated_frame)
+        cap.release()
+        if not frames:
+            return None, None
+        height_out, width_out, _ = frames[0].shape
+        temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+        out = cv2.VideoWriter(temp_video_file, fourcc, fps, (width_out, height_out))
+        for f in frames:
+            f_bgr = cv2.cvtColor(f, cv2.COLOR_RGB2BGR)
+            out.write(f_bgr)
+        out.release()
+        return None, temp_video_file
+    return None, None
+def update_visibility(input_type):
+    if input_type == "Image":
+        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
+    else:
+        return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=true)
+def yolo_inference_for_examples(image, model_id, conf_threshold, iou_threshold, max_detection):
+    annotated_image, _ = yolo_inference(
+        input_type="Image",
+        image=image,
+        video=None,
+        model_id=model_id,
+        conf_threshold=conf_threshold,
+        iou_threshold=iou_threshold,
+        max_detection=max_detection
+    )
+    return gr.update(value="Image"), annotated_image
+with gr.Blocks() as app:
+    gr.Markdown("# Yolo13: Object Detection")
+    gr.Markdown("Upload an image or video for inference using the latest YOLOv13 models.")
+    with gr.Accordion("Paper and Citation", open=False):
+        gr.Markdown("""
+        This application is based on the research from the paper: **YOLOv13: Real-Time Object Detection with Hypergraph-Enhanced Adaptive Visual Perception**.
+        - **Authors:** Mengqi Lei, Siqi Li, Yihong Wu, et al.
+        - **Preprint Link:** [https://arxiv.org/abs/2506.17733](https://arxiv.org/abs/2506.17733)
+        **BibTeX:**
+        ```
+        @article{yolov13,
+          title={YOLOv13: Real-Time Object Detection with Hypergraph-Enhanced Adaptive Visual Perception},
+          author={Lei, Mengqi and Li, Siqi and Wu, Yihong and et al.},
+          journal={arXiv preprint arXiv:2506.17733},
+          year={2025}
+        }
+        ```
+        """)
+    with gr.Row():
+        with gr.Column():
+            image = gr.Image(type="pil", label="Image", visible=True)
+            video = gr.Video(label="Video", visible=False)
+            input_type = gr.Radio(
+                choices=["Image", "Video"],
+                value="Image",
+                label="Input Type",
+            )
+            model_id = gr.Dropdown(
+                label="Model Name",
+                choices=[
+                    'yolov13n.pt', 'yolov13s.pt', 'yolov13l.pt', 'yolov13x.pt',
+                ],
+                value="yolov13n.pt",
+            )
+            conf_threshold = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence Threshold")
+            iou_threshold = gr.Slider(minimum=0, maximum=1, value=0.45, label="IoU Threshold")
+            max_detection = gr.Slider(minimum=1, maximum=300, step=1, value=300, label="Max Detection")
+            infer_button = gr.Button("Detect Objects")
+        with gr.Column():
+            output_image = gr.Image(type="pil", label="Annotated Image", visible=True)
+            output_video = gr.Video(label="Annotated Video", visible=False)
+            gr.DeepLinkButton()
+    input_type.change(
+        fn=update_visibility,
+        inputs=input_type,
+        outputs=[image, video, output_image, output_video],
+    )
+    infer_button.click(
+        fn=yolo_inference,
+        inputs=[input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection],
+        outputs=[output_image, output_video],
+    )
+    gr.Examples(
+        examples=[
+            ["zidane.jpg", "yolov13s.pt", 0.25, 0.45, 300],
+            ["bus.jpg", "yolov13l.pt", 0.25, 0.45, 300],
+            ["yolo_vision.jpg", "yolov13x.pt", 0.25, 0.45, 300],
+        ],
+        fn=yolo_inference_for_examples,
+        inputs=[image, model_id, conf_threshold, iou_threshold, max_detection],
+        outputs=[input_type, output_image],
+        label="Examples (Images)",
+    )
+if __name__ == '__main__':
+    app.launch()