Spaces:

Nadun102
/

zeropredict

Sleeping

App Files Files Community

Nadun102 commited on Apr 9

Commit

f94528a

verified ·

1 Parent(s): 64fb345

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -32

app.py CHANGED Viewed

@@ -4,12 +4,12 @@ from transformers import Owlv2Processor, Owlv2ForObjectDetection
 import spaces
 # --------------------------
-# Device setup
 # --------------------------
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # --------------------------
-# Load OWLv2 model
 # --------------------------
 model = Owlv2ForObjectDetection.from_pretrained(
     "google/owlv2-base-patch16-ensemble"
@@ -20,35 +20,31 @@ processor = Owlv2Processor.from_pretrained(
 )
 # --------------------------
-# Detection function
 # --------------------------
 @spaces.GPU
 def query_image(img, text_queries, score_threshold):
-    # Convert query string to list
     text_queries = [q.strip() for q in text_queries.split(",")]
-    # ✅ FIX: Use actual image size
     h, w = img.shape[:2]
     target_sizes = torch.tensor([[h, w]])
-    # Prepare inputs
     inputs = processor(
         text=text_queries,
         images=img,
         return_tensors="pt"
     ).to(device)
-    # Run model
     with torch.no_grad():
         outputs = model(**inputs)
-    # Move outputs to CPU
     outputs.logits = outputs.logits.cpu()
     outputs.pred_boxes = outputs.pred_boxes.cpu()
-    # Post-process predictions
-    results = processor.post_process_object_detection(
         outputs=outputs,
         target_sizes=target_sizes
     )
@@ -67,7 +63,7 @@ def query_image(img, text_queries, score_threshold):
         x1, y1, x2, y2 = box.tolist()
         detections.append({
-            "box": [round(x1, 2), round(y1, 2), round(x2, 2), round(y2, 2)],
             "label": text_queries[label.item()],
             "score": round(float(score), 3)
         })
@@ -76,35 +72,22 @@ def query_image(img, text_queries, score_threshold):
 # --------------------------
-# Gradio UI
 # --------------------------
 demo = gr.Interface(
     fn=query_image,
     inputs=[
-        gr.Image(type="numpy", label="Upload Image"),
-        gr.Textbox(
-            label="Enter objects (comma separated)",
-            value="person, car, dog"
-        ),
-        gr.Slider(
-            minimum=0,
-            maximum=1,
-            value=0.2,
-            step=0.01,
-            label="Score Threshold"
-        )
     ],
-    outputs=gr.AnnotatedImage(label="Detection Results"),
-    title="OWLv2 Zero-Shot Object Detection",
-    description=(
-        "Upload an image and type objects to detect.\n\n"
-        "Example: 'person, car, dog'\n\n"
-        "Tip: Use natural phrases like 'photo of a car' for better results."
-    )
 )
 # --------------------------
-# Run app
 # --------------------------
 if __name__ == "__main__":
     demo.launch()

 import spaces
 # --------------------------
+# Device
 # --------------------------
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # --------------------------
+# Load model
 # --------------------------
 model = Owlv2ForObjectDetection.from_pretrained(
     "google/owlv2-base-patch16-ensemble"
 )
 # --------------------------
+# Detection
 # --------------------------
 @spaces.GPU
 def query_image(img, text_queries, score_threshold):
     text_queries = [q.strip() for q in text_queries.split(",")]
+    # Correct size
     h, w = img.shape[:2]
     target_sizes = torch.tensor([[h, w]])
     inputs = processor(
         text=text_queries,
         images=img,
         return_tensors="pt"
     ).to(device)
     with torch.no_grad():
         outputs = model(**inputs)
     outputs.logits = outputs.logits.cpu()
     outputs.pred_boxes = outputs.pred_boxes.cpu()
+    # ✅ FIXED FUNCTION NAME
+    results = processor.post_process_grounded_object_detection(
         outputs=outputs,
         target_sizes=target_sizes
     )
         x1, y1, x2, y2 = box.tolist()
         detections.append({
+            "box": [round(x1,2), round(y1,2), round(x2,2), round(y2,2)],
             "label": text_queries[label.item()],
             "score": round(float(score), 3)
         })
 # --------------------------
+# UI
 # --------------------------
 demo = gr.Interface(
     fn=query_image,
     inputs=[
+        gr.Image(type="numpy"),
+        gr.Textbox(value="person, car, dog"),
+        gr.Slider(0, 1, value=0.2)
     ],
+    outputs=gr.AnnotatedImage(),
+    title="OWLv2 Detection",
+    description="Enter objects like: person, car, dog"
 )
 # --------------------------
+# Run
 # --------------------------
 if __name__ == "__main__":
     demo.launch()