Spaces:

simar007
/

objectdetection

Sleeping

App Files Files Community

simar007 commited on Oct 29, 2025

Commit

01706d9

verified ·

1 Parent(s): f43a152

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -268

app.py DELETED Viewed

@@ -1,268 +0,0 @@
-import spaces
-import gradio as gr
-from PIL import Image, ImageDraw, ImageFont
-from ultralytics import YOLO
-from huggingface_hub import hf_hub_download
-import cv2
-import tempfile
-import numpy as np
-def download_model(model_filename):
-    """
-    Downloads a YOLO model from the Hugging Face Hub.
-    This function fetches a specified YOLO model file from the
-    'atalaydenknalbant/Yolov13' repository on the Hugging Face Hub.
-    Args:
-        model_filename (str): The name of the model file to download
-                              (e.g., 'yolov13n.pt').
-    Returns:
-        str: The local path to the downloaded model file.
-    """
-    return hf_hub_download(repo_id="atalaydenknalbant/Yolov13", filename=model_filename)
-@spaces.GPU
-def yolo_inference(input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection):
-    """
-    Performs object detection inference using a YOLOv13 model on either an image or a video.
-    This function downloads the specified YOLO model, then applies it to the
-    provided input. For images, it returns an annotated image. For videos, it
-    processes each frame and returns an annotated video. Error handling for
-    missing inputs is included, returning blank outputs with messages.
-    Args:
-        input_type (str): Specifies the input type, either "Image" or "Video".
-        image (PIL.Image.Image or None): The input image if `input_type` is "Image".
-                                         None otherwise.
-        video (str or None): The path to the input video file if `input_type` is "Video".
-                             None otherwise.
-        model_id (str): The identifier of the YOLO model to use (e.g., 'yolov13n.pt').
-        conf_threshold (float): The confidence threshold for object detection.
-                                Detections with lower confidence are discarded.
-        iou_threshold (float): The Intersection over Union (IoU) threshold for
-                               Non-Maximum Suppression (NMS).
-        max_detection (int): The maximum number of detections to return per image or frame.
-    Returns:
-        tuple: A tuple containing two elements:
-            - PIL.Image.Image or None: The annotated image if `input_type` was "Image",
-                                     otherwise None.
-            - str or None: The path to the annotated video file if `input_type` was "Video",
-                           otherwise None.
-    """
-    model_path = download_model(model_id)
-    if input_type == "Image":
-        if image is None:
-            width, height = 640, 480
-            blank_image = Image.new("RGB", (width, height), color="white")
-            draw = ImageDraw.Draw(blank_image)
-            message = "No image provided"
-            font = ImageFont.load_default(size=40)
-            bbox = draw.textbbox((0, 0), message, font=font)
-            text_width = bbox[2] - bbox[0]
-            text_height = bbox[3] - bbox[1]
-            text_x = (width - text_width) / 2
-            text_y = (height - text_height) / 2
-            draw.text((text_x, text_y), message, fill="black", font=font)
-            return blank_image, None
-        model = YOLO(model_path)
-        results = model.predict(
-            source=image,
-            conf=conf_threshold,
-            iou=iou_threshold,
-            imgsz=640,
-            max_det=max_detection,
-            show_labels=True,
-            show_conf=True,
-        )
-        for r in results:
-            image_array = r.plot()
-            annotated_image = Image.fromarray(image_array[..., ::-1])
-        return annotated_image, None
-    elif input_type == "Video":
-        if video is None:
-            width, height = 640, 480
-            blank_image = Image.new("RGB", (width, height), color="white")
-            draw = ImageDraw.Draw(blank_image)
-            message = "No video provided"
-            font = ImageFont.load_default(size=40)
-            bbox = draw.textbbox((0, 0), message, font=font)
-            text_width = bbox[2] - bbox[0]
-            text_height = bbox[3] - bbox[1]
-            text_x = (width - text_width) / 2
-            text_y = (height - text_height) / 2
-            draw.text((text_x, text_y), message, fill="black", font=font)
-            temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
-            fourcc = cv2.VideoWriter_fourcc(*"mp4v")
-            out = cv2.VideoWriter(temp_video_file, fourcc, 1, (width, height))
-            frame = cv2.cvtColor(np.array(blank_image), cv2.COLOR_RGB2BGR)
-            out.write(frame)
-            out.release()
-            return None, temp_video_file
-        model = YOLO(model_path)
-        cap = cv2.VideoCapture(video)
-        fps = cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 25
-        frames = []
-        while True:
-            ret, frame = cap.read()
-            if not ret:
-                break
-            pil_frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-            results = model.predict(
-                source=pil_frame,
-                conf=conf_threshold,
-                iou=iou_threshold,
-                imgsz=640,
-                max_det=max_detection,
-                show_labels=True,
-                show_conf=True,
-            )
-            for r in results:
-                annotated_frame_array = r.plot()
-                annotated_frame = cv2.cvtColor(annotated_frame_array, cv2.COLOR_BGR2RGB)
-            frames.append(annotated_frame)
-        cap.release()
-        if not frames:
-            return None, None
-        height_out, width_out, _ = frames[0].shape
-        temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
-        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
-        out = cv2.VideoWriter(temp_video_file, fourcc, fps, (width_out, height_out))
-        for f in frames:
-            f_bgr = cv2.cvtColor(f, cv2.COLOR_RGB2BGR)
-            out.write(f_bgr)
-        out.release()
-        return None, temp_video_file
-    return None, None
-def update_visibility(input_type):
-    """
-    Adjusts the visibility of Gradio components based on the selected input type.
-    This function dynamically shows or hides the image and video input/output
-    components in the Gradio interface to ensure only relevant fields are visible.
-    Args:
-        input_type (str): The selected input type, either "Image" or "Video".
-    Returns:
-        tuple: A tuple of `gr.update` objects for the visibility of:
-               (image input, video input, image output, video output).
-    """
-    if input_type == "Image":
-        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
-    else:
-        return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)
-def yolo_inference_for_examples(image, model_id, conf_threshold, iou_threshold, max_detection):
-    """
-    Wrapper function for `yolo_inference` specifically for Gradio examples that use images.
-    This function simplifies the `yolo_inference` call for the `gr.Examples` component,
-    ensuring only image-based inference is performed for predefined examples.
-    Args:
-        image (PIL.Image.Image): The input image for the example.
-        model_id (str): The identifier of the YOLO model to use.
-        conf_threshold (float): The confidence threshold.
-        iou_threshold (float): The IoU threshold.
-        max_detection (int): The maximum number of detections.
-    Returns:
-        PIL.Image.Image or None: The annotated image. Returns None if no image is processed.
-    """
-    annotated_image, _ = yolo_inference(
-        input_type="Image",
-        image=image,
-        video=None,
-        model_id=model_id,
-        conf_threshold=conf_threshold,
-        iou_threshold=iou_threshold,
-        max_detection=max_detection
-    )
-    return annotated_image
-theme = gr.themes.Ocean(primary_hue="blue", secondary_hue="pink")
-with gr.Blocks(theme=theme) as app:
-    gr.Markdown("# Yolov13: Object Detection")
-    gr.Markdown("Upload an image or video for inference using the latest YOLOv13 models.")
-    gr.Markdown("📝 **Note:** Better-trained models will be deployed as they become available.")
-    with gr.Accordion("Paper and Citation", open=False):
-        gr.Markdown("""
-        This application is based on the research from the paper: **YOLOv13: Real-Time Object Detection with Hypergraph-Enhanced Adaptive Visual Perception**.
-        - **Authors:** Mengqi Lei, Siqi Li, Yihong Wu, et al.
-        - **Preprint Link:** [https://arxiv.org/abs/2506.17733](https://arxiv.org/abs/2506.17733)
-        **BibTeX:**
-        ```
-        @article{yolov13,
-          title={YOLOv13: Real-Time Object Detection with Hypergraph-Enhanced Adaptive Visual Perception},
-          author={Lei, Mengqi and Li, Siqi and Wu, Yihong and et al.},
-          journal={arXiv preprint arXiv:2506.17733},
-          year={2025}
-        }
-        ```
-        """)
-    with gr.Row():
-        with gr.Column():
-            image = gr.Image(type="pil", label="Image", visible=True)
-            video = gr.Video(label="Video", visible=False)
-            input_type = gr.Radio(
-                choices=["Image", "Video"],
-                value="Image",
-                label="Input Type",
-            )
-            model_id = gr.Dropdown(
-                label="Model Name",
-                choices=[
-                    'yolov13n.pt', 'yolov13s.pt', 'yolov13l.pt', 'yolov13x.pt',
-                ],
-                value="yolov13n.pt",
-            )
-            conf_threshold = gr.Slider(minimum=0, maximum=1, value=0.35, label="Confidence Threshold")
-            iou_threshold = gr.Slider(minimum=0, maximum=1, value=0.45, label="IoU Threshold")
-            max_detection = gr.Slider(minimum=1, maximum=300, step=1, value=300, label="Max Detection")
-            infer_button = gr.Button("Detect Objects", variant="primary")
-        with gr.Column():
-            output_image = gr.Image(type="pil", show_label=False, show_share_button=False, visible=True)
-            output_video = gr.Video(show_label=False, show_share_button=False, visible=False)
-            gr.DeepLinkButton(variant="primary")
-    input_type.change(
-        fn=update_visibility,
-        inputs=input_type,
-        outputs=[image, video, output_image, output_video],
-    )
-    infer_button.click(
-        fn=yolo_inference,
-        inputs=[input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection],
-        outputs=[output_image, output_video],
-    )
-    gr.Examples(
-        examples=[
-            ["zidane.jpg", "yolov13s.pt", 0.35, 0.45, 300],
-            ["bus.jpg", "yolov13l.pt", 0.35, 0.45, 300],
-            ["yolo_vision.jpg", "yolov13x.pt", 0.35, 0.45, 300],
-        ],
-        fn=yolo_inference_for_examples,
-        inputs=[image, model_id, conf_threshold, iou_threshold, max_detection],
-        outputs=[output_image],
-        label="Examples (Images)",
-    )
-if __name__ == '__main__':
-    app.launch(mcp_server=True)