Spaces:
Sleeping
Sleeping
File size: 2,201 Bytes
10f0dc2 0352ab9 10f0dc2 83db32b 10f0dc2 83db32b 0352ab9 83db32b 0352ab9 10f0dc2 b6eb957 0352ab9 10f0dc2 0352ab9 10f0dc2 83db32b 10f0dc2 b6eb957 0352ab9 b6eb957 83db32b 10f0dc2 b6eb957 10f0dc2 83db32b 10f0dc2 83db32b 10f0dc2 0352ab9 10f0dc2 b6eb957 50ee613 83db32b 10f0dc2 83db32b 10f0dc2 b6eb957 0352ab9 10f0dc2 b6eb957 0352ab9 83db32b 0352ab9 83db32b 10f0dc2 f94528a 83db32b 0352ab9 83db32b 50ee613 b6eb957 10f0dc2 0352ab9 10f0dc2 b6eb957 d01d490 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | import torch
import gradio as gr
from transformers import Owlv2Processor, Owlv2ForObjectDetection
import cv2
import spaces
# ===============================
# DEVICE
# ===============================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Owlv2ForObjectDetection.from_pretrained(
"google/owlv2-base-patch16-ensemble"
).to(device)
processor = Owlv2Processor.from_pretrained(
"google/owlv2-base-patch16-ensemble"
)
# ===============================
# MAIN FUNCTION
# ===============================
@spaces.GPU
def query_image(img, text_queries, score_threshold):
# Split queries (still required internally)
text_queries = text_queries.split(",")
# Prepare inputs
inputs = processor(
text=text_queries,
images=img,
return_tensors="pt"
).to(device)
with torch.no_grad():
outputs = model(**inputs)
# Move outputs to CPU
outputs.logits = outputs.logits.cpu()
outputs.pred_boxes = outputs.pred_boxes.cpu()
# Correct target size (IMPORTANT)
target_sizes = torch.tensor([img.shape[:2]])
# ✅ FIXED METHOD (important!)
results = processor.post_process_grounded_object_detection(
outputs=outputs,
target_sizes=target_sizes
)[0]
boxes = results["boxes"]
scores = results["scores"]
output_boxes = []
# Process detections
for box, score in zip(boxes, scores):
if score < score_threshold:
continue
x1, y1, x2, y2 = map(int, box.tolist())
# Save ONLY coordinates
output_boxes.append([x1, y1, x2, y2])
# Draw rectangle ONLY (no labels)
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
return img, output_boxes
# ===============================
# GRADIO UI
# ===============================
demo = gr.Interface(
fn=query_image,
inputs=[
gr.Image(type="numpy"),
gr.Textbox(label="Classes (comma separated)"),
gr.Slider(0, 1, value=0.1)
],
outputs=[
gr.Image(label="Bounding Boxes"),
gr.JSON(label="Coordinates Only")
],
title="OWLv2 Bounding Box Coordinates Only"
)
# Launch app
demo.launch() |