gl-ai / object_detection.py
graceliying82
some adjustment
fe2ec56
from transformers import DetrImageProcessor, DetrForObjectDetection
import torch
from PIL import Image, ImageDraw, ImageFont
import gradio as gr
def render_result_in_image(image):
"""
Render detected objects in the input image.
Args:
image (PIL.Image): Input image.
Returns:
PIL.Image: Image with bounding boxes and labels drawn.
"""
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)
# convert outputs (bounding boxes and class logits) to COCO API
# let's only keep detections with score > 0.9
target_sizes = torch.tensor([image.size[::-1]])
results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
# Create id to label mapping
id2label = {idx: model.config.id2label[idx] for idx in range(len(model.config.id2label))}
# Render results in the image
rendered_image = render_result_in_image_helper(image.copy(), results, id2label)
return rendered_image
def render_result_in_image_helper(image, results, id2label):
"""
Helper function to render detected objects in the input image.
Args:
image (PIL.Image): Input image.
results (dict): Detection results containing 'scores', 'labels', and 'boxes'.
id2label (dict): Mapping from class indices to class labels.
Returns:
PIL.Image: Image with bounding boxes and labels drawn.
"""
draw = ImageDraw.Draw(image)
font = ImageFont.load_default()
for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
# Draw bounding box
draw.rectangle(box.tolist(), outline="red", width=20)
# Draw label
label_text = f"{id2label[label.item()]}: {score:.4f}"
draw.text((box[0], box[1]), label_text, fill="white", font=font)
return image