Spaces:

Roboflow
/

RF-DETR

Running on T4

App Files Files Community

gradio-runtime-fixes

by onuralpszr - opened Jul 20, 2025

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+29

-110

Files changed (2) hide show

app.py +28 -109
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -6,11 +6,7 @@ import gradio as gr
 import numpy as np
 import supervision as sv
 from PIL import Image
-from rfdetr import (
-    RFDETRNano, RFDETRSmall, RFDETRMedium, RFDETRBase, RFDETRLarge,
-    RFDETRSegNano, RFDETRSegSmall, RFDETRSegMedium,
-    RFDETRSegLarge, RFDETRSegXLarge, RFDETRSeg2XLarge,
-)
 from rfdetr.detr import RFDETR
 from rfdetr.util.coco_classes import COCO_CLASSES
@@ -21,27 +17,23 @@ ImageType = TypeVar("ImageType", Image.Image, np.ndarray)
 MARKDOWN = """
 # RF-DETR 🔥
 [`[code]`](https://github.com/roboflow/rf-detr)
 [`[blog]`](https://blog.roboflow.com/rf-detr)
 [`[notebook]`](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-finetune-rf-detr-on-detection-dataset.ipynb)
 RF-DETR is a real-time, transformer-based object detection model architecture developed
 by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
 """
 IMAGE_PROCESSING_EXAMPLES = [
-    ['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 1024, "medium (object detection)"],
-    ['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 1024, "medium (object detection)"],
-    ['https://media.roboflow.com/supervision/image-examples/motorbike.png', 0.3, 1024, "medium (object detection)"],
-    ['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 512, "nano (object detection)"],
-    ['https://media.roboflow.com/notebooks/examples/dog-3.jpeg', 0.5, 512, "nano (object detection)"],
-    ['https://media.roboflow.com/supervision/image-examples/basketball-1.png', 0.5, 512, "nano (object detection)"],
-    ['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 512, "medium (instance segmentation)"],
-    ['https://media.roboflow.com/notebooks/examples/dog-3.jpeg', 0.5, 512, "medium (instance segmentation)"],
-    ['https://media.roboflow.com/supervision/image-examples/basketball-1.png', 0.5, 512, "medium (instance segmentation)"],
 ]
 VIDEO_PROCESSING_EXAMPLES = [
-    ["videos/people-walking.mp4", 0.3, 1024, "medium (object detection)"],
-    ["videos/vehicles.mp4", 0.3, 1024, "medium (object detection)"],
 ]
 COLOR = sv.ColorPalette.from_hex([
@@ -59,8 +51,7 @@ create_directory(directory_path=VIDEO_TARGET_DIRECTORY)
 def detect_and_annotate(
         model: RFDETR,
         image: ImageType,
-        confidence: float,
-        checkpoint: str = "medium (object detection)"
 ) -> ImageType:
     detections = model.predict(image, threshold=confidence)
@@ -68,7 +59,6 @@ def detect_and_annotate(
     text_scale = sv.calculate_optimal_text_scale(resolution_wh=resolution_wh) - 0.2
     thickness = sv.calculate_optimal_line_thickness(resolution_wh=resolution_wh)
-    mask_annotator = sv.MaskAnnotator(color=COLOR)
     bbox_annotator = sv.BoxAnnotator(color=COLOR, thickness=thickness)
     label_annotator = sv.LabelAnnotator(
         color=COLOR,
@@ -81,71 +71,19 @@ def detect_and_annotate(
         for class_id, confidence
         in zip(detections.class_id, detections.confidence)
     ]
-    print(detections)
     annotated_image = image.copy()
-    if checkpoint in SEGMENTATION_CHECKPOINTS:
-        annotated_image = mask_annotator.annotate(annotated_image, detections)
     annotated_image = bbox_annotator.annotate(annotated_image, detections)
     annotated_image = label_annotator.annotate(annotated_image, detections, labels)
     return annotated_image
 def load_model(resolution: int, checkpoint: str) -> RFDETR:
-    if checkpoint == "nano (object detection)":
-        return RFDETRNano(resolution=resolution)
-    if checkpoint == "small (object detection)":
-        return RFDETRSmall(resolution=resolution)
-    if checkpoint == "medium (object detection)":
-        return RFDETRMedium(resolution=resolution)
-    if checkpoint == "base (object detection)":
         return RFDETRBase(resolution=resolution)
-    if checkpoint == "large (object detection)":
         return RFDETRLarge(resolution=resolution)
-    if checkpoint == "nano (instance segmentation)":
-        return RFDETRSegNano(resolution=resolution)
-    if checkpoint == "small (instance segmentation)":
-        return RFDETRSegSmall(resolution=resolution)
-    if checkpoint == "medium (instance segmentation)":
-        return RFDETRSegMedium(resolution=resolution)
-    if checkpoint == "large (instance segmentation)":
-        return RFDETRSegLarge(resolution=resolution)
-    if checkpoint == "xlarge (instance segmentation)":
-        return RFDETRSegXLarge(resolution=resolution)
-    if checkpoint == "2xlarge (instance segmentation)":
-        return RFDETRSeg2XLarge(resolution=resolution)
-    raise TypeError(f"Unknown checkpoint: {checkpoint}")
-SEGMENTATION_CHECKPOINTS = {
-    "nano (instance segmentation)",
-    "small (instance segmentation)",
-    "medium (instance segmentation)",
-    "large (instance segmentation)",
-    "xlarge (instance segmentation)",
-    "2xlarge (instance segmentation)",
-}
-def adjust_resolution(checkpoint: str, resolution: int) -> int:
-    if checkpoint in SEGMENTATION_CHECKPOINTS:
-        divisor = 24
-    elif checkpoint in {"nano (object detection)", "small (object detection)", "medium (object detection)"}:
-        divisor = 32
-    elif checkpoint in {"base (object detection)", "large (object detection)"}:
-        divisor = 56
-    else:
-        raise ValueError(f"Unknown checkpoint: {checkpoint}")
-    remainder = resolution % divisor
-    if remainder == 0:
-        return resolution
-    lower = resolution - remainder
-    upper = lower + divisor
-    if resolution - lower < upper - resolution:
-        return lower
-    else:
-        return upper
 def image_processing_inference(
@@ -154,9 +92,8 @@ def image_processing_inference(
         resolution: int,
         checkpoint: str
 ):
-    resolution = adjust_resolution(checkpoint=checkpoint, resolution=resolution)
     model = load_model(resolution=resolution, checkpoint=checkpoint)
-    return detect_and_annotate(model=model, image=input_image, confidence=confidence, checkpoint=checkpoint)
 def video_processing_inference(
@@ -164,8 +101,8 @@ def video_processing_inference(
         confidence: float,
         resolution: int,
         checkpoint: str,
 ):
-    resolution = adjust_resolution(checkpoint=checkpoint, resolution=resolution)
     model = load_model(resolution=resolution, checkpoint=checkpoint)
     name = generate_unique_name()
@@ -183,8 +120,7 @@ def video_processing_inference(
             annotated_frame = detect_and_annotate(
                 model=model,
                 image=frame,
-                confidence=confidence,
-                checkpoint=checkpoint
             )
             annotated_frame = sv.scale_image(annotated_frame, VIDEO_SCALE_FACTOR)
             sink.write_frame(annotated_frame)
@@ -218,25 +154,15 @@ with gr.Blocks() as demo:
                 )
                 image_processing_resolution_slider = gr.Slider(
                     label="Inference resolution",
-                    minimum=224,
-                    maximum=2240,
-                    step=1,
-                    value=896,
                 )
                 image_processing_checkpoint_dropdown = gr.Dropdown(
                     label="Checkpoint",
-                    choices=[
-                        "nano (object detection)",
-                        "small (object detection)",
-                        "medium (object detection)",
-                        "nano (instance segmentation)",
-                        "small (instance segmentation)",
-                        "medium (instance segmentation)",
-                        "large (instance segmentation)",
-                        "xlarge (instance segmentation)",
-                        "2xlarge (instance segmentation)",
-                    ],
-                    value="medium (object detection)"
                 )
             with gr.Column():
                 image_processing_submit_button = gr.Button("Submit", value="primary")
@@ -251,6 +177,8 @@ with gr.Blocks() as demo:
                 image_processing_checkpoint_dropdown
             ],
             outputs=image_processing_output_image,
         )
         image_processing_submit_button.click(
@@ -291,18 +219,8 @@ with gr.Blocks() as demo:
                 )
                 video_processing_checkpoint_dropdown = gr.Dropdown(
                     label="Checkpoint",
-                    choices=[
-                        "nano (object detection)",
-                        "small (object detection)",
-                        "medium (object detection)",
-                        "nano (instance segmentation)",
-                        "small (instance segmentation)",
-                        "medium (instance segmentation)",
-                        "large (instance segmentation)",
-                        "xlarge (instance segmentation)",
-                        "2xlarge (instance segmentation)",
-                    ],
-                    value="medium (object detection)"
                 )
             with gr.Column():
                 video_processing_submit_button = gr.Button("Submit", value="primary")
@@ -316,7 +234,8 @@ with gr.Blocks() as demo:
                 video_processing_resolution_slider,
                 video_processing_checkpoint_dropdown
             ],
-            outputs=video_processing_output_video
         )
         video_processing_submit_button.click(

 import numpy as np
 import supervision as sv
 from PIL import Image
+from rfdetr import RFDETRBase, RFDETRLarge
 from rfdetr.detr import RFDETR
 from rfdetr.util.coco_classes import COCO_CLASSES
 MARKDOWN = """
 # RF-DETR 🔥
 [`[code]`](https://github.com/roboflow/rf-detr)
 [`[blog]`](https://blog.roboflow.com/rf-detr)
 [`[notebook]`](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-finetune-rf-detr-on-detection-dataset.ipynb)
 RF-DETR is a real-time, transformer-based object detection model architecture developed
 by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
 """
 IMAGE_PROCESSING_EXAMPLES = [
+    ['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 728, "large"],
+    ['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 728, "large"],
+    ['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 560, "base"],
 ]
 VIDEO_PROCESSING_EXAMPLES = [
+    ["videos/people-walking.mp4", 0.3, 728, "large"],
+    ["videos/vehicles.mp4", 0.3, 728, "large"],
 ]
 COLOR = sv.ColorPalette.from_hex([
 def detect_and_annotate(
         model: RFDETR,
         image: ImageType,
+        confidence: float
 ) -> ImageType:
     detections = model.predict(image, threshold=confidence)
     text_scale = sv.calculate_optimal_text_scale(resolution_wh=resolution_wh) - 0.2
     thickness = sv.calculate_optimal_line_thickness(resolution_wh=resolution_wh)
     bbox_annotator = sv.BoxAnnotator(color=COLOR, thickness=thickness)
     label_annotator = sv.LabelAnnotator(
         color=COLOR,
         for class_id, confidence
         in zip(detections.class_id, detections.confidence)
     ]
     annotated_image = image.copy()
     annotated_image = bbox_annotator.annotate(annotated_image, detections)
     annotated_image = label_annotator.annotate(annotated_image, detections, labels)
     return annotated_image
 def load_model(resolution: int, checkpoint: str) -> RFDETR:
+    if checkpoint == "base":
         return RFDETRBase(resolution=resolution)
+    elif checkpoint == "large":
         return RFDETRLarge(resolution=resolution)
+    raise TypeError("Checkpoint must be a base or large.")
 def image_processing_inference(
         resolution: int,
         checkpoint: str
 ):
     model = load_model(resolution=resolution, checkpoint=checkpoint)
+    return detect_and_annotate(model=model, image=input_image, confidence=confidence)
 def video_processing_inference(
         confidence: float,
         resolution: int,
         checkpoint: str,
+        progress=gr.Progress(track_tqdm=True)
 ):
     model = load_model(resolution=resolution, checkpoint=checkpoint)
     name = generate_unique_name()
             annotated_frame = detect_and_annotate(
                 model=model,
                 image=frame,
+                confidence=confidence
             )
             annotated_frame = sv.scale_image(annotated_frame, VIDEO_SCALE_FACTOR)
             sink.write_frame(annotated_frame)
                 )
                 image_processing_resolution_slider = gr.Slider(
                     label="Inference resolution",
+                    minimum=560,
+                    maximum=1120,
+                    step=56,
+                    value=728,
                 )
                 image_processing_checkpoint_dropdown = gr.Dropdown(
                     label="Checkpoint",
+                    choices=["base", "large"],
+                    value="base"
                 )
             with gr.Column():
                 image_processing_submit_button = gr.Button("Submit", value="primary")
                 image_processing_checkpoint_dropdown
             ],
             outputs=image_processing_output_image,
+            cache_examples=True,
+            run_on_click=True
         )
         image_processing_submit_button.click(
                 )
                 video_processing_checkpoint_dropdown = gr.Dropdown(
                     label="Checkpoint",
+                    choices=["base", "large"],
+                    value="base"
                 )
             with gr.Column():
                 video_processing_submit_button = gr.Button("Submit", value="primary")
                 video_processing_resolution_slider,
                 video_processing_checkpoint_dropdown
             ],
+            outputs=video_processing_output_video,
+            run_on_click=True
         )
         video_processing_submit_button.click(

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
 gradio
 spaces
-rfdetr==1.6.5.post2
 tqdm

 gradio
 spaces
+rfdetr
 tqdm