Spaces:
Runtime error
Runtime error
| import requests | |
| from PIL import Image, ImageDraw, ImageFont | |
| import torch | |
| import os | |
| from transformers import Owlv2Processor, Owlv2ForObjectDetection | |
| # Load the model and processor | |
| processor = Owlv2Processor.from_pretrained("google/owlv2-large-patch14-ensemble") | |
| model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-large-patch14-ensemble") | |
| # Option 1: Load image from local file | |
| image_path = "image.jpg" # Replace with your image path | |
| image = Image.open(image_path) | |
| # Define what you want to detect | |
| text_labels = [["a person with a hat"]] | |
| # Process the image and text | |
| inputs = processor(text=text_labels, images=image, return_tensors="pt") | |
| outputs = model(**inputs) | |
| # Target image sizes (height, width) to rescale box predictions [batch_size, 2] | |
| target_sizes = torch.tensor([(image.height, image.width)]) | |
| # Convert outputs (bounding boxes and class logits) to Pascal VOC format (xmin, ymin, xmax, ymax) | |
| results = processor.post_process_grounded_object_detection( | |
| outputs=outputs, target_sizes=target_sizes, threshold=0.1, text_labels=text_labels | |
| ) | |
| # Retrieve predictions for the first image | |
| result = results[0] | |
| boxes, scores, text_labels_detected = result["boxes"], result["scores"], result["text_labels"] | |
| # Create a copy of the original image for drawing | |
| output_image = image.copy() | |
| draw = ImageDraw.Draw(output_image) | |
| # Try to use a default font, fallback to default if not available | |
| try: | |
| font = ImageFont.truetype("arial.ttf", 16) | |
| except OSError: | |
| font = ImageFont.load_default() | |
| # Colors for different detections | |
| colors = ["red", "blue", "green", "orange", "purple", "yellow", "pink", "cyan"] | |
| print("Detection Results:") | |
| print("-" * 50) | |
| # Draw bounding boxes and labels | |
| for i, (box, score, text_label) in enumerate(zip(boxes, scores, text_labels_detected)): | |
| box = [round(i, 2) for i in box.tolist()] | |
| confidence = round(score.item(), 3) | |
| print(f"Detected {text_label} with confidence {confidence} at location {box}") | |
| # Get coordinates | |
| xmin, ymin, xmax, ymax = box | |
| # Choose color | |
| color = colors[i % len(colors)] | |
| # Draw bounding box | |
| draw.rectangle([xmin, ymin, xmax, ymax], outline=color, width=3) | |
| # Draw label with confidence | |
| label_text = f"{text_label}: {confidence}" | |
| # Get text bounding box for background | |
| bbox = draw.textbbox((xmin, ymin - 25), label_text, font=font) | |
| # Draw background rectangle for text | |
| draw.rectangle([bbox[0]-2, bbox[1]-2, bbox[2]+2, bbox[3]+2], fill=color) | |
| # Draw text | |
| draw.text((xmin, ymin - 25), label_text, fill="white", font=font) | |
| # Save the output image | |
| output_path = "output_img.jpg" | |
| output_image.save(output_path) | |
| print(f"\nOutput image saved as: {output_path}") | |