gradio-runtime-fixes

#1
by onuralpszr - opened
Files changed (2) hide show
  1. app.py +28 -109
  2. requirements.txt +1 -1
app.py CHANGED
@@ -6,11 +6,7 @@ import gradio as gr
6
  import numpy as np
7
  import supervision as sv
8
  from PIL import Image
9
- from rfdetr import (
10
- RFDETRNano, RFDETRSmall, RFDETRMedium, RFDETRBase, RFDETRLarge,
11
- RFDETRSegNano, RFDETRSegSmall, RFDETRSegMedium,
12
- RFDETRSegLarge, RFDETRSegXLarge, RFDETRSeg2XLarge,
13
- )
14
  from rfdetr.detr import RFDETR
15
  from rfdetr.util.coco_classes import COCO_CLASSES
16
 
@@ -21,27 +17,23 @@ ImageType = TypeVar("ImageType", Image.Image, np.ndarray)
21
 
22
  MARKDOWN = """
23
  # RF-DETR 🔥
 
24
  [`[code]`](https://github.com/roboflow/rf-detr)
25
  [`[blog]`](https://blog.roboflow.com/rf-detr)
26
  [`[notebook]`](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-finetune-rf-detr-on-detection-dataset.ipynb)
 
27
  RF-DETR is a real-time, transformer-based object detection model architecture developed
28
  by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
29
  """
30
 
31
  IMAGE_PROCESSING_EXAMPLES = [
32
- ['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 1024, "medium (object detection)"],
33
- ['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 1024, "medium (object detection)"],
34
- ['https://media.roboflow.com/supervision/image-examples/motorbike.png', 0.3, 1024, "medium (object detection)"],
35
- ['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 512, "nano (object detection)"],
36
- ['https://media.roboflow.com/notebooks/examples/dog-3.jpeg', 0.5, 512, "nano (object detection)"],
37
- ['https://media.roboflow.com/supervision/image-examples/basketball-1.png', 0.5, 512, "nano (object detection)"],
38
- ['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 512, "medium (instance segmentation)"],
39
- ['https://media.roboflow.com/notebooks/examples/dog-3.jpeg', 0.5, 512, "medium (instance segmentation)"],
40
- ['https://media.roboflow.com/supervision/image-examples/basketball-1.png', 0.5, 512, "medium (instance segmentation)"],
41
  ]
42
  VIDEO_PROCESSING_EXAMPLES = [
43
- ["videos/people-walking.mp4", 0.3, 1024, "medium (object detection)"],
44
- ["videos/vehicles.mp4", 0.3, 1024, "medium (object detection)"],
45
  ]
46
 
47
  COLOR = sv.ColorPalette.from_hex([
@@ -59,8 +51,7 @@ create_directory(directory_path=VIDEO_TARGET_DIRECTORY)
59
  def detect_and_annotate(
60
  model: RFDETR,
61
  image: ImageType,
62
- confidence: float,
63
- checkpoint: str = "medium (object detection)"
64
  ) -> ImageType:
65
  detections = model.predict(image, threshold=confidence)
66
 
@@ -68,7 +59,6 @@ def detect_and_annotate(
68
  text_scale = sv.calculate_optimal_text_scale(resolution_wh=resolution_wh) - 0.2
69
  thickness = sv.calculate_optimal_line_thickness(resolution_wh=resolution_wh)
70
 
71
- mask_annotator = sv.MaskAnnotator(color=COLOR)
72
  bbox_annotator = sv.BoxAnnotator(color=COLOR, thickness=thickness)
73
  label_annotator = sv.LabelAnnotator(
74
  color=COLOR,
@@ -81,71 +71,19 @@ def detect_and_annotate(
81
  for class_id, confidence
82
  in zip(detections.class_id, detections.confidence)
83
  ]
84
- print(detections)
85
  annotated_image = image.copy()
86
- if checkpoint in SEGMENTATION_CHECKPOINTS:
87
- annotated_image = mask_annotator.annotate(annotated_image, detections)
88
  annotated_image = bbox_annotator.annotate(annotated_image, detections)
89
  annotated_image = label_annotator.annotate(annotated_image, detections, labels)
90
  return annotated_image
91
 
92
 
93
  def load_model(resolution: int, checkpoint: str) -> RFDETR:
94
- if checkpoint == "nano (object detection)":
95
- return RFDETRNano(resolution=resolution)
96
- if checkpoint == "small (object detection)":
97
- return RFDETRSmall(resolution=resolution)
98
- if checkpoint == "medium (object detection)":
99
- return RFDETRMedium(resolution=resolution)
100
- if checkpoint == "base (object detection)":
101
  return RFDETRBase(resolution=resolution)
102
- if checkpoint == "large (object detection)":
103
  return RFDETRLarge(resolution=resolution)
104
- if checkpoint == "nano (instance segmentation)":
105
- return RFDETRSegNano(resolution=resolution)
106
- if checkpoint == "small (instance segmentation)":
107
- return RFDETRSegSmall(resolution=resolution)
108
- if checkpoint == "medium (instance segmentation)":
109
- return RFDETRSegMedium(resolution=resolution)
110
- if checkpoint == "large (instance segmentation)":
111
- return RFDETRSegLarge(resolution=resolution)
112
- if checkpoint == "xlarge (instance segmentation)":
113
- return RFDETRSegXLarge(resolution=resolution)
114
- if checkpoint == "2xlarge (instance segmentation)":
115
- return RFDETRSeg2XLarge(resolution=resolution)
116
- raise TypeError(f"Unknown checkpoint: {checkpoint}")
117
-
118
-
119
- SEGMENTATION_CHECKPOINTS = {
120
- "nano (instance segmentation)",
121
- "small (instance segmentation)",
122
- "medium (instance segmentation)",
123
- "large (instance segmentation)",
124
- "xlarge (instance segmentation)",
125
- "2xlarge (instance segmentation)",
126
- }
127
-
128
-
129
- def adjust_resolution(checkpoint: str, resolution: int) -> int:
130
- if checkpoint in SEGMENTATION_CHECKPOINTS:
131
- divisor = 24
132
- elif checkpoint in {"nano (object detection)", "small (object detection)", "medium (object detection)"}:
133
- divisor = 32
134
- elif checkpoint in {"base (object detection)", "large (object detection)"}:
135
- divisor = 56
136
- else:
137
- raise ValueError(f"Unknown checkpoint: {checkpoint}")
138
-
139
- remainder = resolution % divisor
140
- if remainder == 0:
141
- return resolution
142
- lower = resolution - remainder
143
- upper = lower + divisor
144
-
145
- if resolution - lower < upper - resolution:
146
- return lower
147
- else:
148
- return upper
149
 
150
 
151
  def image_processing_inference(
@@ -154,9 +92,8 @@ def image_processing_inference(
154
  resolution: int,
155
  checkpoint: str
156
  ):
157
- resolution = adjust_resolution(checkpoint=checkpoint, resolution=resolution)
158
  model = load_model(resolution=resolution, checkpoint=checkpoint)
159
- return detect_and_annotate(model=model, image=input_image, confidence=confidence, checkpoint=checkpoint)
160
 
161
 
162
  def video_processing_inference(
@@ -164,8 +101,8 @@ def video_processing_inference(
164
  confidence: float,
165
  resolution: int,
166
  checkpoint: str,
 
167
  ):
168
- resolution = adjust_resolution(checkpoint=checkpoint, resolution=resolution)
169
  model = load_model(resolution=resolution, checkpoint=checkpoint)
170
 
171
  name = generate_unique_name()
@@ -183,8 +120,7 @@ def video_processing_inference(
183
  annotated_frame = detect_and_annotate(
184
  model=model,
185
  image=frame,
186
- confidence=confidence,
187
- checkpoint=checkpoint
188
  )
189
  annotated_frame = sv.scale_image(annotated_frame, VIDEO_SCALE_FACTOR)
190
  sink.write_frame(annotated_frame)
@@ -218,25 +154,15 @@ with gr.Blocks() as demo:
218
  )
219
  image_processing_resolution_slider = gr.Slider(
220
  label="Inference resolution",
221
- minimum=224,
222
- maximum=2240,
223
- step=1,
224
- value=896,
225
  )
226
  image_processing_checkpoint_dropdown = gr.Dropdown(
227
  label="Checkpoint",
228
- choices=[
229
- "nano (object detection)",
230
- "small (object detection)",
231
- "medium (object detection)",
232
- "nano (instance segmentation)",
233
- "small (instance segmentation)",
234
- "medium (instance segmentation)",
235
- "large (instance segmentation)",
236
- "xlarge (instance segmentation)",
237
- "2xlarge (instance segmentation)",
238
- ],
239
- value="medium (object detection)"
240
  )
241
  with gr.Column():
242
  image_processing_submit_button = gr.Button("Submit", value="primary")
@@ -251,6 +177,8 @@ with gr.Blocks() as demo:
251
  image_processing_checkpoint_dropdown
252
  ],
253
  outputs=image_processing_output_image,
 
 
254
  )
255
 
256
  image_processing_submit_button.click(
@@ -291,18 +219,8 @@ with gr.Blocks() as demo:
291
  )
292
  video_processing_checkpoint_dropdown = gr.Dropdown(
293
  label="Checkpoint",
294
- choices=[
295
- "nano (object detection)",
296
- "small (object detection)",
297
- "medium (object detection)",
298
- "nano (instance segmentation)",
299
- "small (instance segmentation)",
300
- "medium (instance segmentation)",
301
- "large (instance segmentation)",
302
- "xlarge (instance segmentation)",
303
- "2xlarge (instance segmentation)",
304
- ],
305
- value="medium (object detection)"
306
  )
307
  with gr.Column():
308
  video_processing_submit_button = gr.Button("Submit", value="primary")
@@ -316,7 +234,8 @@ with gr.Blocks() as demo:
316
  video_processing_resolution_slider,
317
  video_processing_checkpoint_dropdown
318
  ],
319
- outputs=video_processing_output_video
 
320
  )
321
 
322
  video_processing_submit_button.click(
 
6
  import numpy as np
7
  import supervision as sv
8
  from PIL import Image
9
+ from rfdetr import RFDETRBase, RFDETRLarge
 
 
 
 
10
  from rfdetr.detr import RFDETR
11
  from rfdetr.util.coco_classes import COCO_CLASSES
12
 
 
17
 
18
  MARKDOWN = """
19
  # RF-DETR 🔥
20
+
21
  [`[code]`](https://github.com/roboflow/rf-detr)
22
  [`[blog]`](https://blog.roboflow.com/rf-detr)
23
  [`[notebook]`](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-finetune-rf-detr-on-detection-dataset.ipynb)
24
+
25
  RF-DETR is a real-time, transformer-based object detection model architecture developed
26
  by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
27
  """
28
 
29
  IMAGE_PROCESSING_EXAMPLES = [
30
+ ['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 728, "large"],
31
+ ['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 728, "large"],
32
+ ['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 560, "base"],
 
 
 
 
 
 
33
  ]
34
  VIDEO_PROCESSING_EXAMPLES = [
35
+ ["videos/people-walking.mp4", 0.3, 728, "large"],
36
+ ["videos/vehicles.mp4", 0.3, 728, "large"],
37
  ]
38
 
39
  COLOR = sv.ColorPalette.from_hex([
 
51
  def detect_and_annotate(
52
  model: RFDETR,
53
  image: ImageType,
54
+ confidence: float
 
55
  ) -> ImageType:
56
  detections = model.predict(image, threshold=confidence)
57
 
 
59
  text_scale = sv.calculate_optimal_text_scale(resolution_wh=resolution_wh) - 0.2
60
  thickness = sv.calculate_optimal_line_thickness(resolution_wh=resolution_wh)
61
 
 
62
  bbox_annotator = sv.BoxAnnotator(color=COLOR, thickness=thickness)
63
  label_annotator = sv.LabelAnnotator(
64
  color=COLOR,
 
71
  for class_id, confidence
72
  in zip(detections.class_id, detections.confidence)
73
  ]
74
+
75
  annotated_image = image.copy()
 
 
76
  annotated_image = bbox_annotator.annotate(annotated_image, detections)
77
  annotated_image = label_annotator.annotate(annotated_image, detections, labels)
78
  return annotated_image
79
 
80
 
81
  def load_model(resolution: int, checkpoint: str) -> RFDETR:
82
+ if checkpoint == "base":
 
 
 
 
 
 
83
  return RFDETRBase(resolution=resolution)
84
+ elif checkpoint == "large":
85
  return RFDETRLarge(resolution=resolution)
86
+ raise TypeError("Checkpoint must be a base or large.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
 
89
  def image_processing_inference(
 
92
  resolution: int,
93
  checkpoint: str
94
  ):
 
95
  model = load_model(resolution=resolution, checkpoint=checkpoint)
96
+ return detect_and_annotate(model=model, image=input_image, confidence=confidence)
97
 
98
 
99
  def video_processing_inference(
 
101
  confidence: float,
102
  resolution: int,
103
  checkpoint: str,
104
+ progress=gr.Progress(track_tqdm=True)
105
  ):
 
106
  model = load_model(resolution=resolution, checkpoint=checkpoint)
107
 
108
  name = generate_unique_name()
 
120
  annotated_frame = detect_and_annotate(
121
  model=model,
122
  image=frame,
123
+ confidence=confidence
 
124
  )
125
  annotated_frame = sv.scale_image(annotated_frame, VIDEO_SCALE_FACTOR)
126
  sink.write_frame(annotated_frame)
 
154
  )
155
  image_processing_resolution_slider = gr.Slider(
156
  label="Inference resolution",
157
+ minimum=560,
158
+ maximum=1120,
159
+ step=56,
160
+ value=728,
161
  )
162
  image_processing_checkpoint_dropdown = gr.Dropdown(
163
  label="Checkpoint",
164
+ choices=["base", "large"],
165
+ value="base"
 
 
 
 
 
 
 
 
 
 
166
  )
167
  with gr.Column():
168
  image_processing_submit_button = gr.Button("Submit", value="primary")
 
177
  image_processing_checkpoint_dropdown
178
  ],
179
  outputs=image_processing_output_image,
180
+ cache_examples=True,
181
+ run_on_click=True
182
  )
183
 
184
  image_processing_submit_button.click(
 
219
  )
220
  video_processing_checkpoint_dropdown = gr.Dropdown(
221
  label="Checkpoint",
222
+ choices=["base", "large"],
223
+ value="base"
 
 
 
 
 
 
 
 
 
 
224
  )
225
  with gr.Column():
226
  video_processing_submit_button = gr.Button("Submit", value="primary")
 
234
  video_processing_resolution_slider,
235
  video_processing_checkpoint_dropdown
236
  ],
237
+ outputs=video_processing_output_video,
238
+ run_on_click=True
239
  )
240
 
241
  video_processing_submit_button.click(
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
  gradio
2
  spaces
3
- rfdetr==1.6.5.post2
4
  tqdm
 
1
  gradio
2
  spaces
3
+ rfdetr
4
  tqdm