John Ho commited on
Commit
099215d
·
1 Parent(s): 3b261d0

cloned from SkalskiP/RF-DETR on huggingface Space

Browse files
Files changed (7) hide show
  1. .gitignore +5 -0
  2. README.md +14 -2
  3. app.py +252 -0
  4. requirements.txt +4 -0
  5. utils/__init__.py +0 -0
  6. utils/image.py +16 -0
  7. utils/video.py +26 -0
.gitignore CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  # Byte-compiled / optimized / DLL files
2
  __pycache__/
3
  *.py[cod]
 
1
+ # project specific
2
+ .idea/
3
+ venv/
4
+ *.pth
5
+
6
  # Byte-compiled / optimized / DLL files
7
  __pycache__/
8
  *.py[cod]
README.md CHANGED
@@ -1,2 +1,14 @@
1
- # hfs-rf-detr
2
- code for RF-DETR demo on HuggingFace Space
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: RF-DETR
3
+ emoji: 🔥
4
+ colorFrom: yellow
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 5.22.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ short_description: 'SOTA real-time object detection model '
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import TypeVar
3
+
4
+ from tqdm import tqdm
5
+ import gradio as gr
6
+ import numpy as np
7
+ import supervision as sv
8
+ from PIL import Image
9
+ from rfdetr import RFDETRBase, RFDETRLarge
10
+ from rfdetr.detr import RFDETR
11
+ from rfdetr.util.coco_classes import COCO_CLASSES
12
+
13
+ from utils.image import calculate_resolution_wh
14
+ from utils.video import create_directory, generate_unique_name
15
+
16
+ ImageType = TypeVar("ImageType", Image.Image, np.ndarray)
17
+
18
+ MARKDOWN = """
19
+ # RF-DETR 🔥
20
+
21
+ [`[code]`](https://github.com/roboflow/rf-detr)
22
+ [`[blog]`](https://blog.roboflow.com/rf-detr)
23
+ [`[notebook]`](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-finetune-rf-detr-on-detection-dataset.ipynb)
24
+
25
+ RF-DETR is a real-time, transformer-based object detection model architecture developed
26
+ by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
27
+ """
28
+
29
+ IMAGE_PROCESSING_EXAMPLES = [
30
+ ['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 728, "large"],
31
+ ['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 728, "large"],
32
+ ['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 560, "base"],
33
+ ]
34
+ VIDEO_PROCESSING_EXAMPLES = [
35
+ ["videos/people-walking.mp4", 0.3, 728, "large"],
36
+ ["videos/vehicles.mp4", 0.3, 728, "large"],
37
+ ]
38
+
39
+ COLOR = sv.ColorPalette.from_hex([
40
+ "#ffff00", "#ff9b00", "#ff8080", "#ff66b2", "#ff66ff", "#b266ff",
41
+ "#9999ff", "#3399ff", "#66ffff", "#33ff99", "#66ff66", "#99ff00"
42
+ ])
43
+
44
+ MAX_VIDEO_LENGTH_SECONDS = 5
45
+ VIDEO_SCALE_FACTOR = 0.5
46
+ VIDEO_TARGET_DIRECTORY = "tmp"
47
+
48
+ create_directory(directory_path=VIDEO_TARGET_DIRECTORY)
49
+
50
+
51
+ def detect_and_annotate(
52
+ model: RFDETR,
53
+ image: ImageType,
54
+ confidence: float
55
+ ) -> ImageType:
56
+ detections = model.predict(image, threshold=confidence)
57
+
58
+ resolution_wh = calculate_resolution_wh(image)
59
+ text_scale = sv.calculate_optimal_text_scale(resolution_wh=resolution_wh) - 0.2
60
+ thickness = sv.calculate_optimal_line_thickness(resolution_wh=resolution_wh)
61
+
62
+ bbox_annotator = sv.BoxAnnotator(color=COLOR, thickness=thickness)
63
+ label_annotator = sv.LabelAnnotator(
64
+ color=COLOR,
65
+ text_color=sv.Color.BLACK,
66
+ text_scale=text_scale
67
+ )
68
+
69
+ labels = [
70
+ f"{COCO_CLASSES[class_id]} {confidence:.2f}"
71
+ for class_id, confidence
72
+ in zip(detections.class_id, detections.confidence)
73
+ ]
74
+
75
+ annotated_image = image.copy()
76
+ annotated_image = bbox_annotator.annotate(annotated_image, detections)
77
+ annotated_image = label_annotator.annotate(annotated_image, detections, labels)
78
+ return annotated_image
79
+
80
+
81
+ def load_model(resolution: int, checkpoint: str) -> RFDETR:
82
+ if checkpoint == "base":
83
+ return RFDETRBase(resolution=resolution)
84
+ elif checkpoint == "large":
85
+ return RFDETRLarge(resolution=resolution)
86
+ raise TypeError("Checkpoint must be a base or large.")
87
+
88
+
89
+ def image_processing_inference(
90
+ input_image: Image.Image,
91
+ confidence: float,
92
+ resolution: int,
93
+ checkpoint: str
94
+ ):
95
+ model = load_model(resolution=resolution, checkpoint=checkpoint)
96
+ return detect_and_annotate(model=model, image=input_image, confidence=confidence)
97
+
98
+
99
+ def video_processing_inference(
100
+ input_video: str,
101
+ confidence: float,
102
+ resolution: int,
103
+ checkpoint: str,
104
+ progress=gr.Progress(track_tqdm=True)
105
+ ):
106
+ model = load_model(resolution=resolution, checkpoint=checkpoint)
107
+
108
+ name = generate_unique_name()
109
+ output_video = os.path.join(VIDEO_TARGET_DIRECTORY, f"{name}.mp4")
110
+
111
+ video_info = sv.VideoInfo.from_video_path(input_video)
112
+ video_info.width = int(video_info.width * VIDEO_SCALE_FACTOR)
113
+ video_info.height = int(video_info.height * VIDEO_SCALE_FACTOR)
114
+
115
+ total = min(video_info.total_frames, video_info.fps * MAX_VIDEO_LENGTH_SECONDS)
116
+ frames_generator = sv.get_video_frames_generator(input_video, end=total)
117
+
118
+ with sv.VideoSink(output_video, video_info=video_info) as sink:
119
+ for frame in tqdm(frames_generator, total=total):
120
+ annotated_frame = detect_and_annotate(
121
+ model=model,
122
+ image=frame,
123
+ confidence=confidence
124
+ )
125
+ annotated_frame = sv.scale_image(annotated_frame, VIDEO_SCALE_FACTOR)
126
+ sink.write_frame(annotated_frame)
127
+
128
+ return output_video
129
+
130
+ with gr.Blocks() as demo:
131
+ gr.Markdown(MARKDOWN)
132
+ with gr.Tab("Image"):
133
+ with gr.Row():
134
+ image_processing_input_image = gr.Image(
135
+ label="Upload image",
136
+ image_mode='RGB',
137
+ type='pil',
138
+ height=600
139
+ )
140
+ image_processing_output_image = gr.Image(
141
+ label="Output image",
142
+ image_mode='RGB',
143
+ type='pil',
144
+ height=600
145
+ )
146
+ with gr.Row():
147
+ with gr.Column():
148
+ image_processing_confidence_slider = gr.Slider(
149
+ label="Confidence",
150
+ minimum=0.0,
151
+ maximum=1.0,
152
+ step=0.05,
153
+ value=0.5,
154
+ )
155
+ image_processing_resolution_slider = gr.Slider(
156
+ label="Inference resolution",
157
+ minimum=560,
158
+ maximum=1120,
159
+ step=56,
160
+ value=728,
161
+ )
162
+ image_processing_checkpoint_dropdown = gr.Dropdown(
163
+ label="Checkpoint",
164
+ choices=["base", "large"],
165
+ value="base"
166
+ )
167
+ with gr.Column():
168
+ image_processing_submit_button = gr.Button("Submit", value="primary")
169
+
170
+ gr.Examples(
171
+ fn=image_processing_inference,
172
+ examples=IMAGE_PROCESSING_EXAMPLES,
173
+ inputs=[
174
+ image_processing_input_image,
175
+ image_processing_confidence_slider,
176
+ image_processing_resolution_slider,
177
+ image_processing_checkpoint_dropdown
178
+ ],
179
+ outputs=image_processing_output_image,
180
+ cache_examples=True,
181
+ run_on_click=True
182
+ )
183
+
184
+ image_processing_submit_button.click(
185
+ image_processing_inference,
186
+ inputs=[
187
+ image_processing_input_image,
188
+ image_processing_confidence_slider,
189
+ image_processing_resolution_slider,
190
+ image_processing_checkpoint_dropdown
191
+ ],
192
+ outputs=image_processing_output_image,
193
+ )
194
+ with gr.Tab("Video"):
195
+ with gr.Row():
196
+ video_processing_input_video = gr.Video(
197
+ label='Upload video',
198
+ height=600
199
+ )
200
+ video_processing_output_video = gr.Video(
201
+ label='Output video',
202
+ height=600
203
+ )
204
+ with gr.Row():
205
+ with gr.Column():
206
+ video_processing_confidence_slider = gr.Slider(
207
+ label="Confidence",
208
+ minimum=0.0,
209
+ maximum=1.0,
210
+ step=0.05,
211
+ value=0.5,
212
+ )
213
+ video_processing_resolution_slider = gr.Slider(
214
+ label="Inference resolution",
215
+ minimum=560,
216
+ maximum=1120,
217
+ step=56,
218
+ value=728,
219
+ )
220
+ video_processing_checkpoint_dropdown = gr.Dropdown(
221
+ label="Checkpoint",
222
+ choices=["base", "large"],
223
+ value="base"
224
+ )
225
+ with gr.Column():
226
+ video_processing_submit_button = gr.Button("Submit", value="primary")
227
+
228
+ gr.Examples(
229
+ fn=video_processing_inference,
230
+ examples=VIDEO_PROCESSING_EXAMPLES,
231
+ inputs=[
232
+ video_processing_input_video,
233
+ video_processing_confidence_slider,
234
+ video_processing_resolution_slider,
235
+ video_processing_checkpoint_dropdown
236
+ ],
237
+ outputs=video_processing_output_video,
238
+ run_on_click=True
239
+ )
240
+
241
+ video_processing_submit_button.click(
242
+ video_processing_inference,
243
+ inputs=[
244
+ video_processing_input_video,
245
+ video_processing_confidence_slider,
246
+ video_processing_resolution_slider,
247
+ video_processing_checkpoint_dropdown
248
+ ],
249
+ outputs=video_processing_output_video
250
+ )
251
+
252
+ demo.launch(debug=False, show_error=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ spaces
3
+ rfdetr
4
+ tqdm
utils/__init__.py ADDED
File without changes
utils/image.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple, Union
2
+ from PIL import Image
3
+ import numpy as np
4
+
5
+ def calculate_resolution_wh(image: Union[Image.Image, np.ndarray]) -> Tuple[int, int]:
6
+
7
+ if isinstance(image, Image.Image):
8
+ return image.size
9
+ elif isinstance(image, np.ndarray):
10
+ if image.ndim >= 2:
11
+ h, w = image.shape[:2]
12
+ return w, h
13
+ else:
14
+ raise ValueError("Input numpy array image must have at least 2 dimensions (height, width).")
15
+ else:
16
+ raise TypeError("Input image must be a Pillow Image or a numpy array.")
utils/video.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import os
3
+ import shutil
4
+ import uuid
5
+
6
+
7
+ def create_directory(directory_path: str) -> None:
8
+ if not os.path.exists(directory_path):
9
+ os.makedirs(directory_path)
10
+
11
+
12
+ def delete_directory(directory_path: str) -> None:
13
+ if not os.path.exists(directory_path):
14
+ raise FileNotFoundError(f"Directory '{directory_path}' does not exist.")
15
+
16
+ try:
17
+ shutil.rmtree(directory_path)
18
+ except PermissionError:
19
+ raise PermissionError(
20
+ f"Permission denied: Unable to delete '{directory_path}'.")
21
+
22
+
23
+ def generate_unique_name():
24
+ current_datetime = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
25
+ unique_id = uuid.uuid4()
26
+ return f"{current_datetime}_{unique_id}"