Nadun102 commited on
Commit
d01d490
·
verified ·
1 Parent(s): f94528a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -35
app.py CHANGED
@@ -1,16 +1,12 @@
1
  import torch
2
  import gradio as gr
 
3
  from transformers import Owlv2Processor, Owlv2ForObjectDetection
4
- import spaces
5
 
6
- # --------------------------
7
  # Device
8
- # --------------------------
9
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
 
11
- # --------------------------
12
  # Load model
13
- # --------------------------
14
  model = Owlv2ForObjectDetection.from_pretrained(
15
  "google/owlv2-base-patch16-ensemble"
16
  ).to(device)
@@ -19,75 +15,85 @@ processor = Owlv2Processor.from_pretrained(
19
  "google/owlv2-base-patch16-ensemble"
20
  )
21
 
22
- # --------------------------
23
- # Detection
24
- # --------------------------
25
- @spaces.GPU
26
  def query_image(img, text_queries, score_threshold):
27
 
28
- text_queries = [q.strip() for q in text_queries.split(",")]
 
29
 
30
- # Correct size
31
  h, w = img.shape[:2]
32
  target_sizes = torch.tensor([[h, w]])
33
 
 
34
  inputs = processor(
35
- text=text_queries,
36
  images=img,
37
  return_tensors="pt"
38
  ).to(device)
39
 
 
40
  with torch.no_grad():
41
  outputs = model(**inputs)
42
 
 
43
  outputs.logits = outputs.logits.cpu()
44
  outputs.pred_boxes = outputs.pred_boxes.cpu()
45
 
46
- # ✅ FIXED FUNCTION NAME
47
  results = processor.post_process_grounded_object_detection(
48
  outputs=outputs,
49
- target_sizes=target_sizes
 
50
  )
51
 
52
  boxes = results[0]["boxes"]
53
  scores = results[0]["scores"]
54
  labels = results[0]["labels"]
55
 
56
- detections = []
57
 
 
58
  for box, score, label in zip(boxes, scores, labels):
59
 
60
- if score < score_threshold:
61
- continue
62
 
63
- x1, y1, x2, y2 = box.tolist()
 
64
 
65
- detections.append({
66
- "box": [round(x1,2), round(y1,2), round(x2,2), round(y2,2)],
67
- "label": text_queries[label.item()],
68
- "score": round(float(score), 3)
69
- })
70
 
71
- return img, detections
 
 
 
 
72
 
 
 
 
 
 
73
 
74
- # --------------------------
 
 
 
75
  # UI
76
- # --------------------------
77
  demo = gr.Interface(
78
  fn=query_image,
79
  inputs=[
80
  gr.Image(type="numpy"),
81
- gr.Textbox(value="person, car, dog"),
82
- gr.Slider(0, 1, value=0.2)
83
  ],
84
  outputs=gr.AnnotatedImage(),
85
- title="OWLv2 Detection",
86
- description="Enter objects like: person, car, dog"
87
  )
88
 
89
- # --------------------------
90
- # Run
91
- # --------------------------
92
- if __name__ == "__main__":
93
- demo.launch()
 
1
  import torch
2
  import gradio as gr
3
+ import cv2
4
  from transformers import Owlv2Processor, Owlv2ForObjectDetection
 
5
 
 
6
  # Device
 
7
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
 
 
9
  # Load model
 
10
  model = Owlv2ForObjectDetection.from_pretrained(
11
  "google/owlv2-base-patch16-ensemble"
12
  ).to(device)
 
15
  "google/owlv2-base-patch16-ensemble"
16
  )
17
 
18
+ # ------------------------------
19
+ # MAIN FUNCTION
20
+ # ------------------------------
 
21
  def query_image(img, text_queries, score_threshold):
22
 
23
+ # Convert text input
24
+ queries = [q.strip() for q in text_queries.split(",")]
25
 
26
+ # Get image size
27
  h, w = img.shape[:2]
28
  target_sizes = torch.tensor([[h, w]])
29
 
30
+ # Preprocess
31
  inputs = processor(
32
+ text=queries,
33
  images=img,
34
  return_tensors="pt"
35
  ).to(device)
36
 
37
+ # Inference
38
  with torch.no_grad():
39
  outputs = model(**inputs)
40
 
41
+ # Move to CPU
42
  outputs.logits = outputs.logits.cpu()
43
  outputs.pred_boxes = outputs.pred_boxes.cpu()
44
 
45
+ # ✅ CORRECT FUNCTION
46
  results = processor.post_process_grounded_object_detection(
47
  outputs=outputs,
48
+ target_sizes=target_sizes,
49
+ threshold=score_threshold
50
  )
51
 
52
  boxes = results[0]["boxes"]
53
  scores = results[0]["scores"]
54
  labels = results[0]["labels"]
55
 
56
+ annotated_labels = []
57
 
58
+ # Draw boxes
59
  for box, score, label in zip(boxes, scores, labels):
60
 
61
+ x1, y1, x2, y2 = [int(i) for i in box.tolist()]
 
62
 
63
+ class_name = queries[label.item()]
64
+ confidence = float(score)
65
 
66
+ # Label text
67
+ text = f"{class_name} ({confidence:.2f})"
 
 
 
68
 
69
+ # Draw on image
70
+ cv2.rectangle(img, (x1, y1), (x2, y2), (0,255,0), 2)
71
+ cv2.putText(img, text, (x1, y1-10),
72
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5,
73
+ (0,255,0), 2)
74
 
75
+ # ✅ IMPORTANT: Only (box, label)
76
+ annotated_labels.append((
77
+ [x1, y1, x2, y2],
78
+ text
79
+ ))
80
 
81
+ return img, annotated_labels
82
+
83
+
84
+ # ------------------------------
85
  # UI
86
+ # ------------------------------
87
  demo = gr.Interface(
88
  fn=query_image,
89
  inputs=[
90
  gr.Image(type="numpy"),
91
+ gr.Textbox(label="Objects (comma separated)"),
92
+ gr.Slider(0, 1, value=0.2, label="Confidence Threshold")
93
  ],
94
  outputs=gr.AnnotatedImage(),
95
+ title="OWLv2 Object Detection (Fixed)",
 
96
  )
97
 
98
+ # Launch
99
+ demo.launch()