simar007 commited on
Commit
01706d9
·
verified ·
1 Parent(s): f43a152

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -268
app.py DELETED
@@ -1,268 +0,0 @@
1
- import spaces
2
- import gradio as gr
3
- from PIL import Image, ImageDraw, ImageFont
4
- from ultralytics import YOLO
5
- from huggingface_hub import hf_hub_download
6
- import cv2
7
- import tempfile
8
- import numpy as np
9
-
10
- def download_model(model_filename):
11
- """
12
- Downloads a YOLO model from the Hugging Face Hub.
13
-
14
- This function fetches a specified YOLO model file from the
15
- 'atalaydenknalbant/Yolov13' repository on the Hugging Face Hub.
16
-
17
- Args:
18
- model_filename (str): The name of the model file to download
19
- (e.g., 'yolov13n.pt').
20
-
21
- Returns:
22
- str: The local path to the downloaded model file.
23
- """
24
- return hf_hub_download(repo_id="atalaydenknalbant/Yolov13", filename=model_filename)
25
-
26
- @spaces.GPU
27
- def yolo_inference(input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection):
28
- """
29
- Performs object detection inference using a YOLOv13 model on either an image or a video.
30
-
31
- This function downloads the specified YOLO model, then applies it to the
32
- provided input. For images, it returns an annotated image. For videos, it
33
- processes each frame and returns an annotated video. Error handling for
34
- missing inputs is included, returning blank outputs with messages.
35
-
36
- Args:
37
- input_type (str): Specifies the input type, either "Image" or "Video".
38
- image (PIL.Image.Image or None): The input image if `input_type` is "Image".
39
- None otherwise.
40
- video (str or None): The path to the input video file if `input_type` is "Video".
41
- None otherwise.
42
- model_id (str): The identifier of the YOLO model to use (e.g., 'yolov13n.pt').
43
- conf_threshold (float): The confidence threshold for object detection.
44
- Detections with lower confidence are discarded.
45
- iou_threshold (float): The Intersection over Union (IoU) threshold for
46
- Non-Maximum Suppression (NMS).
47
- max_detection (int): The maximum number of detections to return per image or frame.
48
-
49
- Returns:
50
- tuple: A tuple containing two elements:
51
- - PIL.Image.Image or None: The annotated image if `input_type` was "Image",
52
- otherwise None.
53
- - str or None: The path to the annotated video file if `input_type` was "Video",
54
- otherwise None.
55
- """
56
- model_path = download_model(model_id)
57
-
58
- if input_type == "Image":
59
- if image is None:
60
- width, height = 640, 480
61
- blank_image = Image.new("RGB", (width, height), color="white")
62
- draw = ImageDraw.Draw(blank_image)
63
- message = "No image provided"
64
- font = ImageFont.load_default(size=40)
65
- bbox = draw.textbbox((0, 0), message, font=font)
66
- text_width = bbox[2] - bbox[0]
67
- text_height = bbox[3] - bbox[1]
68
- text_x = (width - text_width) / 2
69
- text_y = (height - text_height) / 2
70
- draw.text((text_x, text_y), message, fill="black", font=font)
71
- return blank_image, None
72
-
73
- model = YOLO(model_path)
74
- results = model.predict(
75
- source=image,
76
- conf=conf_threshold,
77
- iou=iou_threshold,
78
- imgsz=640,
79
- max_det=max_detection,
80
- show_labels=True,
81
- show_conf=True,
82
- )
83
- for r in results:
84
- image_array = r.plot()
85
- annotated_image = Image.fromarray(image_array[..., ::-1])
86
- return annotated_image, None
87
-
88
- elif input_type == "Video":
89
- if video is None:
90
- width, height = 640, 480
91
- blank_image = Image.new("RGB", (width, height), color="white")
92
- draw = ImageDraw.Draw(blank_image)
93
- message = "No video provided"
94
- font = ImageFont.load_default(size=40)
95
- bbox = draw.textbbox((0, 0), message, font=font)
96
- text_width = bbox[2] - bbox[0]
97
- text_height = bbox[3] - bbox[1]
98
- text_x = (width - text_width) / 2
99
- text_y = (height - text_height) / 2
100
- draw.text((text_x, text_y), message, fill="black", font=font)
101
- temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
102
- fourcc = cv2.VideoWriter_fourcc(*"mp4v")
103
- out = cv2.VideoWriter(temp_video_file, fourcc, 1, (width, height))
104
- frame = cv2.cvtColor(np.array(blank_image), cv2.COLOR_RGB2BGR)
105
- out.write(frame)
106
- out.release()
107
- return None, temp_video_file
108
-
109
- model = YOLO(model_path)
110
- cap = cv2.VideoCapture(video)
111
- fps = cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 25
112
- frames = []
113
- while True:
114
- ret, frame = cap.read()
115
- if not ret:
116
- break
117
- pil_frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
118
- results = model.predict(
119
- source=pil_frame,
120
- conf=conf_threshold,
121
- iou=iou_threshold,
122
- imgsz=640,
123
- max_det=max_detection,
124
- show_labels=True,
125
- show_conf=True,
126
- )
127
- for r in results:
128
- annotated_frame_array = r.plot()
129
- annotated_frame = cv2.cvtColor(annotated_frame_array, cv2.COLOR_BGR2RGB)
130
- frames.append(annotated_frame)
131
- cap.release()
132
- if not frames:
133
- return None, None
134
-
135
- height_out, width_out, _ = frames[0].shape
136
- temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
137
- fourcc = cv2.VideoWriter_fourcc(*"mp4v")
138
- out = cv2.VideoWriter(temp_video_file, fourcc, fps, (width_out, height_out))
139
- for f in frames:
140
- f_bgr = cv2.cvtColor(f, cv2.COLOR_RGB2BGR)
141
- out.write(f_bgr)
142
- out.release()
143
- return None, temp_video_file
144
-
145
- return None, None
146
-
147
- def update_visibility(input_type):
148
- """
149
- Adjusts the visibility of Gradio components based on the selected input type.
150
-
151
- This function dynamically shows or hides the image and video input/output
152
- components in the Gradio interface to ensure only relevant fields are visible.
153
-
154
- Args:
155
- input_type (str): The selected input type, either "Image" or "Video".
156
-
157
- Returns:
158
- tuple: A tuple of `gr.update` objects for the visibility of:
159
- (image input, video input, image output, video output).
160
- """
161
- if input_type == "Image":
162
- return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
163
- else:
164
- return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)
165
-
166
- def yolo_inference_for_examples(image, model_id, conf_threshold, iou_threshold, max_detection):
167
- """
168
- Wrapper function for `yolo_inference` specifically for Gradio examples that use images.
169
-
170
- This function simplifies the `yolo_inference` call for the `gr.Examples` component,
171
- ensuring only image-based inference is performed for predefined examples.
172
-
173
- Args:
174
- image (PIL.Image.Image): The input image for the example.
175
- model_id (str): The identifier of the YOLO model to use.
176
- conf_threshold (float): The confidence threshold.
177
- iou_threshold (float): The IoU threshold.
178
- max_detection (int): The maximum number of detections.
179
-
180
- Returns:
181
- PIL.Image.Image or None: The annotated image. Returns None if no image is processed.
182
- """
183
- annotated_image, _ = yolo_inference(
184
- input_type="Image",
185
- image=image,
186
- video=None,
187
- model_id=model_id,
188
- conf_threshold=conf_threshold,
189
- iou_threshold=iou_threshold,
190
- max_detection=max_detection
191
- )
192
- return annotated_image
193
-
194
- theme = gr.themes.Ocean(primary_hue="blue", secondary_hue="pink")
195
-
196
- with gr.Blocks(theme=theme) as app:
197
- gr.Markdown("# Yolov13: Object Detection")
198
- gr.Markdown("Upload an image or video for inference using the latest YOLOv13 models.")
199
- gr.Markdown("📝 **Note:** Better-trained models will be deployed as they become available.")
200
- with gr.Accordion("Paper and Citation", open=False):
201
- gr.Markdown("""
202
- This application is based on the research from the paper: **YOLOv13: Real-Time Object Detection with Hypergraph-Enhanced Adaptive Visual Perception**.
203
-
204
- - **Authors:** Mengqi Lei, Siqi Li, Yihong Wu, et al.
205
- - **Preprint Link:** [https://arxiv.org/abs/2506.17733](https://arxiv.org/abs/2506.17733)
206
-
207
- **BibTeX:**
208
- ```
209
- @article{yolov13,
210
- title={YOLOv13: Real-Time Object Detection with Hypergraph-Enhanced Adaptive Visual Perception},
211
- author={Lei, Mengqi and Li, Siqi and Wu, Yihong and et al.},
212
- journal={arXiv preprint arXiv:2506.17733},
213
- year={2025}
214
- }
215
- ```
216
- """)
217
-
218
- with gr.Row():
219
- with gr.Column():
220
- image = gr.Image(type="pil", label="Image", visible=True)
221
- video = gr.Video(label="Video", visible=False)
222
- input_type = gr.Radio(
223
- choices=["Image", "Video"],
224
- value="Image",
225
- label="Input Type",
226
- )
227
- model_id = gr.Dropdown(
228
- label="Model Name",
229
- choices=[
230
- 'yolov13n.pt', 'yolov13s.pt', 'yolov13l.pt', 'yolov13x.pt',
231
- ],
232
- value="yolov13n.pt",
233
- )
234
- conf_threshold = gr.Slider(minimum=0, maximum=1, value=0.35, label="Confidence Threshold")
235
- iou_threshold = gr.Slider(minimum=0, maximum=1, value=0.45, label="IoU Threshold")
236
- max_detection = gr.Slider(minimum=1, maximum=300, step=1, value=300, label="Max Detection")
237
- infer_button = gr.Button("Detect Objects", variant="primary")
238
- with gr.Column():
239
- output_image = gr.Image(type="pil", show_label=False, show_share_button=False, visible=True)
240
- output_video = gr.Video(show_label=False, show_share_button=False, visible=False)
241
- gr.DeepLinkButton(variant="primary")
242
-
243
- input_type.change(
244
- fn=update_visibility,
245
- inputs=input_type,
246
- outputs=[image, video, output_image, output_video],
247
- )
248
-
249
- infer_button.click(
250
- fn=yolo_inference,
251
- inputs=[input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection],
252
- outputs=[output_image, output_video],
253
- )
254
-
255
- gr.Examples(
256
- examples=[
257
- ["zidane.jpg", "yolov13s.pt", 0.35, 0.45, 300],
258
- ["bus.jpg", "yolov13l.pt", 0.35, 0.45, 300],
259
- ["yolo_vision.jpg", "yolov13x.pt", 0.35, 0.45, 300],
260
- ],
261
- fn=yolo_inference_for_examples,
262
- inputs=[image, model_id, conf_threshold, iou_threshold, max_detection],
263
- outputs=[output_image],
264
- label="Examples (Images)",
265
- )
266
-
267
- if __name__ == '__main__':
268
- app.launch(mcp_server=True)