Spaces:

Nadun102
/

zeropredict

Sleeping

App Files Files Community

Nadun102 commited on Apr 9

Commit

83db32b

verified ·

1 Parent(s): 4bd4f2d

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -50

app.py CHANGED Viewed

@@ -1,12 +1,14 @@
 import torch
 import gradio as gr
 import cv2
 from transformers import Owlv2Processor, Owlv2ForObjectDetection
-# Device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Load model
 model = Owlv2ForObjectDetection.from_pretrained(
     "google/owlv2-base-patch16-ensemble"
 ).to(device)
@@ -15,85 +17,118 @@ processor = Owlv2Processor.from_pretrained(
     "google/owlv2-base-patch16-ensemble"
 )
-# ------------------------------
 # MAIN FUNCTION
-# ------------------------------
 def query_image(img, text_queries, score_threshold):
-    # Convert text input
-    queries = [q.strip() for q in text_queries.split(",")]
-    # Get image size
-    h, w = img.shape[:2]
-    target_sizes = torch.tensor([[h, w]])
-    # Preprocess
     inputs = processor(
-        text=queries,
         images=img,
         return_tensors="pt"
     ).to(device)
-    # Inference
     with torch.no_grad():
         outputs = model(**inputs)
-    # Move to CPU
-    outputs.logits = outputs.logits.cpu()
-    outputs.pred_boxes = outputs.pred_boxes.cpu()
-    # ✅ CORRECT FUNCTION
     results = processor.post_process_grounded_object_detection(
         outputs=outputs,
-        target_sizes=target_sizes,
-        threshold=score_threshold
-    )
-    boxes = results[0]["boxes"]
-    scores = results[0]["scores"]
-    labels = results[0]["labels"]
-    annotated_labels = []
     # Draw boxes
     for box, score, label in zip(boxes, scores, labels):
-        x1, y1, x2, y2 = [int(i) for i in box.tolist()]
-        class_name = queries[label.item()]
-        confidence = float(score)
-        # Label text
-        text = f"{class_name} ({confidence:.2f})"
         # Draw on image
         cv2.rectangle(img, (x1, y1), (x2, y2), (0,255,0), 2)
-        cv2.putText(img, text, (x1, y1-10),
-                    cv2.FONT_HERSHEY_SIMPLEX, 0.5,
-                    (0,255,0), 2)
-        # ✅ IMPORTANT: Only (box, label)
-        annotated_labels.append((
-            [x1, y1, x2, y2],
-            text
-        ))
-    return img, annotated_labels
-# ------------------------------
-# UI
-# ------------------------------
 demo = gr.Interface(
     fn=query_image,
     inputs=[
         gr.Image(type="numpy"),
-        gr.Textbox(label="Objects (comma separated)"),
-        gr.Slider(0, 1, value=0.2, label="Confidence Threshold")
     ],
-    outputs=gr.AnnotatedImage(),
-    title="OWLv2 Object Detection (Fixed)",
 )
-# Launch
 demo.launch()

 import torch
 import gradio as gr
+import numpy as np
 import cv2
 from transformers import Owlv2Processor, Owlv2ForObjectDetection
+# ===============================
+# DEVICE
+# ===============================
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = Owlv2ForObjectDetection.from_pretrained(
     "google/owlv2-base-patch16-ensemble"
 ).to(device)
     "google/owlv2-base-patch16-ensemble"
 )
+# ===============================
+# YOUR PREPROCESSING
+# ===============================
+def advanced_preprocessing(img_array: np.ndarray,
+                           crop_ratio=(0.25, 0.75),
+                           target_size=(512, 512),
+                           grayscale=True,
+                           tile=(1,1)):
+    h, w = img_array.shape[:2]
+    x1, x2 = int(crop_ratio[0]*w), int(crop_ratio[1]*w)
+    y1, y2 = int(crop_ratio[0]*h), int(crop_ratio[1]*h)
+    img_cropped = img_array[y1:y2, x1:x2]
+    img_resized = cv2.resize(img_cropped, target_size)
+    if grayscale:
+        gray = cv2.cvtColor(img_resized, cv2.COLOR_RGB2GRAY)
+        img_resized = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
+    img_stretch = np.zeros_like(img_resized)
+    for c in range(3):
+        img_stretch[:,:,c] = cv2.normalize(
+            img_resized[:,:,c], None, 0, 255, cv2.NORM_MINMAX
+        )
+    if tile != (1,1):
+        img_stretch = np.tile(img_stretch, (tile[0], tile[1], 1))
+    return img_stretch
+# ===============================
 # MAIN FUNCTION
+# ===============================
 def query_image(img, text_queries, score_threshold):
+    # preprocess
+    img = advanced_preprocessing(img)
+    text_queries = [q.strip() for q in text_queries.split(",")]
     inputs = processor(
+        text=text_queries,
         images=img,
         return_tensors="pt"
     ).to(device)
     with torch.no_grad():
         outputs = model(**inputs)
+    # IMPORTANT FIX
+    target_sizes = torch.tensor([img.shape[:2]])
     results = processor.post_process_grounded_object_detection(
         outputs=outputs,
+        target_sizes=target_sizes
+    )[0]
+    boxes = results["boxes"]
+    scores = results["scores"]
+    labels = results["labels"]
+    output_data = []
     # Draw boxes
     for box, score, label in zip(boxes, scores, labels):
+        if score < score_threshold:
+            continue
+        x1, y1, x2, y2 = map(int, box.tolist())
+        class_name = text_queries[label.item()]
+        conf = float(score)
+        # Save structured output
+        output_data.append({
+            "box": [x1, y1, x2, y2],
+            "label": class_name,
+            "score": round(conf, 3)
+        })
         # Draw on image
         cv2.rectangle(img, (x1, y1), (x2, y2), (0,255,0), 2)
+        cv2.putText(
+            img,
+            f"{class_name} {conf:.2f}",
+            (x1, y1-5),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.5,
+            (0,255,0),
+            2
+        )
+    return img, output_data
+# ===============================
+# GRADIO UI
+# ===============================
 demo = gr.Interface(
     fn=query_image,
     inputs=[
         gr.Image(type="numpy"),
+        gr.Textbox(label="Classes (comma separated)"),
+        gr.Slider(0, 1, value=0.2)
+    ],
+    outputs=[
+        gr.Image(label="Result"),
+        gr.JSON(label="Detections")
     ],
+    title="Correct Bounding Box Detection (OWLv2)"
 )
 demo.launch()