Spaces:

Nadun102
/

zeropredict

Sleeping

App Files Files Community

Nadun102 commited on Apr 9

Commit

d01d490

verified ·

1 Parent(s): f94528a

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -35

app.py CHANGED Viewed

@@ -1,16 +1,12 @@
 import torch
 import gradio as gr
 from transformers import Owlv2Processor, Owlv2ForObjectDetection
-import spaces
-# --------------------------
 # Device
-# --------------------------
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# --------------------------
 # Load model
-# --------------------------
 model = Owlv2ForObjectDetection.from_pretrained(
     "google/owlv2-base-patch16-ensemble"
 ).to(device)
@@ -19,75 +15,85 @@ processor = Owlv2Processor.from_pretrained(
     "google/owlv2-base-patch16-ensemble"
 )
-# --------------------------
-# Detection
-# --------------------------
-@spaces.GPU
 def query_image(img, text_queries, score_threshold):
-    text_queries = [q.strip() for q in text_queries.split(",")]
-    # Correct size
     h, w = img.shape[:2]
     target_sizes = torch.tensor([[h, w]])
     inputs = processor(
-        text=text_queries,
         images=img,
         return_tensors="pt"
     ).to(device)
     with torch.no_grad():
         outputs = model(**inputs)
     outputs.logits = outputs.logits.cpu()
     outputs.pred_boxes = outputs.pred_boxes.cpu()
-    # ✅ FIXED FUNCTION NAME
     results = processor.post_process_grounded_object_detection(
         outputs=outputs,
-        target_sizes=target_sizes
     )
     boxes = results[0]["boxes"]
     scores = results[0]["scores"]
     labels = results[0]["labels"]
-    detections = []
     for box, score, label in zip(boxes, scores, labels):
-        if score < score_threshold:
-            continue
-        x1, y1, x2, y2 = box.tolist()
-        detections.append({
-            "box": [round(x1,2), round(y1,2), round(x2,2), round(y2,2)],
-            "label": text_queries[label.item()],
-            "score": round(float(score), 3)
-        })
-    return img, detections
-# --------------------------
 # UI
-# --------------------------
 demo = gr.Interface(
     fn=query_image,
     inputs=[
         gr.Image(type="numpy"),
-        gr.Textbox(value="person, car, dog"),
-        gr.Slider(0, 1, value=0.2)
     ],
     outputs=gr.AnnotatedImage(),
-    title="OWLv2 Detection",
-    description="Enter objects like: person, car, dog"
 )
-# --------------------------
-# Run
-# --------------------------
-if __name__ == "__main__":
-    demo.launch()

 import torch
 import gradio as gr
+import cv2
 from transformers import Owlv2Processor, Owlv2ForObjectDetection
 # Device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Load model
 model = Owlv2ForObjectDetection.from_pretrained(
     "google/owlv2-base-patch16-ensemble"
 ).to(device)
     "google/owlv2-base-patch16-ensemble"
 )
+# ------------------------------
+# MAIN FUNCTION
+# ------------------------------
 def query_image(img, text_queries, score_threshold):
+    # Convert text input
+    queries = [q.strip() for q in text_queries.split(",")]
+    # Get image size
     h, w = img.shape[:2]
     target_sizes = torch.tensor([[h, w]])
+    # Preprocess
     inputs = processor(
+        text=queries,
         images=img,
         return_tensors="pt"
     ).to(device)
+    # Inference
     with torch.no_grad():
         outputs = model(**inputs)
+    # Move to CPU
     outputs.logits = outputs.logits.cpu()
     outputs.pred_boxes = outputs.pred_boxes.cpu()
+    # ✅ CORRECT FUNCTION
     results = processor.post_process_grounded_object_detection(
         outputs=outputs,
+        target_sizes=target_sizes,
+        threshold=score_threshold
     )
     boxes = results[0]["boxes"]
     scores = results[0]["scores"]
     labels = results[0]["labels"]
+    annotated_labels = []
+    # Draw boxes
     for box, score, label in zip(boxes, scores, labels):
+        x1, y1, x2, y2 = [int(i) for i in box.tolist()]
+        class_name = queries[label.item()]
+        confidence = float(score)
+        # Label text
+        text = f"{class_name} ({confidence:.2f})"
+        # Draw on image
+        cv2.rectangle(img, (x1, y1), (x2, y2), (0,255,0), 2)
+        cv2.putText(img, text, (x1, y1-10),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.5,
+                    (0,255,0), 2)
+        # ✅ IMPORTANT: Only (box, label)
+        annotated_labels.append((
+            [x1, y1, x2, y2],
+            text
+        ))
+    return img, annotated_labels
+# ------------------------------
 # UI
+# ------------------------------
 demo = gr.Interface(
     fn=query_image,
     inputs=[
         gr.Image(type="numpy"),
+        gr.Textbox(label="Objects (comma separated)"),
+        gr.Slider(0, 1, value=0.2, label="Confidence Threshold")
     ],
     outputs=gr.AnnotatedImage(),
+    title="OWLv2 Object Detection (Fixed)",
 )
+# Launch
+demo.launch()