Spaces:
Running on Zero
Running on Zero
| import gradio as gr | |
| import spaces | |
| import supervision as sv | |
| import torch | |
| from transformers import ( | |
| AutoImageProcessor, | |
| RfDetrForInstanceSegmentation, | |
| RfDetrForObjectDetection, | |
| ) | |
| def _is_segmentation_model(model_basename: str) -> bool: | |
| return "seg" in model_basename | |
| def infer(model_name, image, confidence_threshold): | |
| # Dynamically scale text and boxes based on image size | |
| width, height = image.size | |
| text_scale = (width / 1000) * 0.5 | |
| text_thickness = max(1, int(round(width / 500))) | |
| label_annotator = sv.LabelAnnotator( | |
| text_padding=4, | |
| text_scale=text_scale, | |
| text_thickness=text_thickness, | |
| smart_position=True, | |
| ) | |
| box_annotator = sv.BoxAnnotator() | |
| mask_annotator = sv.MaskAnnotator( | |
| color_lookup=sv.ColorLookup.CLASS, | |
| opacity=0.5, | |
| ) | |
| hub_model_id = f"stevenbucaille/{model_name}" | |
| segmentation = _is_segmentation_model(model_name) | |
| processor = AutoImageProcessor.from_pretrained(hub_model_id) | |
| if segmentation: | |
| model = RfDetrForInstanceSegmentation.from_pretrained(hub_model_id) | |
| else: | |
| model = RfDetrForObjectDetection.from_pretrained(hub_model_id) | |
| inputs = processor(images=image, return_tensors="pt") | |
| device = next(model.parameters()).device | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| outputs = model(**inputs) | |
| if segmentation: | |
| target_sizes = [image.size[::-1]] | |
| results = processor.post_process_instance_segmentation( | |
| outputs, | |
| target_sizes=target_sizes, | |
| threshold=confidence_threshold, | |
| )[0] | |
| else: | |
| target_sizes = torch.tensor([image.size[::-1]]) | |
| results = processor.post_process_object_detection( | |
| outputs, target_sizes=target_sizes, threshold=confidence_threshold | |
| )[0] | |
| detections = sv.Detections.from_transformers( | |
| transformers_results=results, id2label=model.config.id2label | |
| ) | |
| labels = [ | |
| f"{class_name} {confidence:.2f}" | |
| for class_name, confidence in zip( | |
| detections["class_name"], detections.confidence | |
| ) | |
| ] | |
| if segmentation: | |
| image = mask_annotator.annotate(image, detections) | |
| image = label_annotator.annotate(image, detections, labels) | |
| else: | |
| image = box_annotator.annotate(image, detections) | |
| image = label_annotator.annotate(image, detections, labels) | |
| return image | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# RF-DETR Object Detection") | |
| gr.Markdown( | |
| "RF-DETR is a transformer-based object detection model that is trained on the Objects365 and COCO datasets." | |
| ) | |
| gr.Markdown( | |
| "This space is a demo of the RF-DETR model. You can select a model and an image and see the results." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| model = gr.Radio( | |
| [ | |
| "rf-detr-base", | |
| "rf-detr-base-2", | |
| "rf-detr-large", | |
| "rf-detr-medium", | |
| "rf-detr-nano", | |
| "rf-detr-seg-large", | |
| "rf-detr-seg-medium", | |
| "rf-detr-seg-nano", | |
| "rf-detr-seg-preview", | |
| "rf-detr-seg-small", | |
| "rf-detr-seg-xlarge", | |
| "rf-detr-seg-xxlarge", | |
| "rf-detr-segmentation", | |
| "rf-detr-small", | |
| ], | |
| value="rf-detr-base", | |
| label="Model", | |
| ) | |
| confidence_threshold = gr.Slider( | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=0.3, | |
| step=0.1, | |
| label="Confidence Threshold", | |
| ) | |
| input_image = gr.Image(label="Input Image", type="pil") | |
| send_btn = gr.Button("Infer", variant="primary") | |
| with gr.Column(): | |
| output_image = gr.Image(label="Output Image", type="pil") | |
| gr.Examples( | |
| examples=[ | |
| "samples/cats.jpg", | |
| "samples/detectron2.png", | |
| "samples/cat.jpg", | |
| "samples/hotdog.jpg", | |
| ], | |
| inputs=input_image, | |
| ) | |
| send_btn.click( | |
| fn=infer, | |
| inputs=[model, input_image, confidence_threshold], | |
| outputs=[output_image], | |
| ) | |
| demo.launch(debug=True) | |