Nadun102 commited on
Commit
50ee613
·
verified ·
1 Parent(s): 83db32b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -30
app.py CHANGED
@@ -18,7 +18,7 @@ processor = Owlv2Processor.from_pretrained(
18
  )
19
 
20
  # ===============================
21
- # YOUR PREPROCESSING
22
  # ===============================
23
  def advanced_preprocessing(img_array: np.ndarray,
24
  crop_ratio=(0.25, 0.75),
@@ -40,8 +40,8 @@ def advanced_preprocessing(img_array: np.ndarray,
40
 
41
  img_stretch = np.zeros_like(img_resized)
42
  for c in range(3):
43
- img_stretch[:,:,c] = cv2.normalize(
44
- img_resized[:,:,c], None, 0, 255, cv2.NORM_MINMAX
45
  )
46
 
47
  if tile != (1,1):
@@ -51,13 +51,14 @@ def advanced_preprocessing(img_array: np.ndarray,
51
 
52
 
53
  # ===============================
54
- # MAIN FUNCTION
55
  # ===============================
56
  def query_image(img, text_queries, score_threshold):
57
 
58
- # preprocess
59
  img = advanced_preprocessing(img)
60
 
 
61
  text_queries = [q.strip() for q in text_queries.split(",")]
62
 
63
  inputs = processor(
@@ -69,7 +70,6 @@ def query_image(img, text_queries, score_threshold):
69
  with torch.no_grad():
70
  outputs = model(**inputs)
71
 
72
- # IMPORTANT FIX
73
  target_sizes = torch.tensor([img.shape[:2]])
74
 
75
  results = processor.post_process_grounded_object_detection(
@@ -79,37 +79,22 @@ def query_image(img, text_queries, score_threshold):
79
 
80
  boxes = results["boxes"]
81
  scores = results["scores"]
82
- labels = results["labels"]
83
 
84
  output_data = []
85
 
86
- # Draw boxes
87
- for box, score, label in zip(boxes, scores, labels):
 
88
  if score < score_threshold:
89
  continue
90
 
91
  x1, y1, x2, y2 = map(int, box.tolist())
92
- class_name = text_queries[label.item()]
93
- conf = float(score)
94
 
95
- # Save structured output
96
- output_data.append({
97
- "box": [x1, y1, x2, y2],
98
- "label": class_name,
99
- "score": round(conf, 3)
100
- })
101
 
102
- # Draw on image
103
  cv2.rectangle(img, (x1, y1), (x2, y2), (0,255,0), 2)
104
- cv2.putText(
105
- img,
106
- f"{class_name} {conf:.2f}",
107
- (x1, y1-5),
108
- cv2.FONT_HERSHEY_SIMPLEX,
109
- 0.5,
110
- (0,255,0),
111
- 2
112
- )
113
 
114
  return img, output_data
115
 
@@ -125,10 +110,10 @@ demo = gr.Interface(
125
  gr.Slider(0, 1, value=0.2)
126
  ],
127
  outputs=[
128
- gr.Image(label="Result"),
129
- gr.JSON(label="Detections")
130
  ],
131
- title="Correct Bounding Box Detection (OWLv2)"
132
  )
133
 
134
  demo.launch()
 
18
  )
19
 
20
  # ===============================
21
+ # PREPROCESSING
22
  # ===============================
23
  def advanced_preprocessing(img_array: np.ndarray,
24
  crop_ratio=(0.25, 0.75),
 
40
 
41
  img_stretch = np.zeros_like(img_resized)
42
  for c in range(3):
43
+ img_stretch[:, :, c] = cv2.normalize(
44
+ img_resized[:, :, c], None, 0, 255, cv2.NORM_MINMAX
45
  )
46
 
47
  if tile != (1,1):
 
51
 
52
 
53
  # ===============================
54
+ # MAIN FUNCTION (ONLY BOXES)
55
  # ===============================
56
  def query_image(img, text_queries, score_threshold):
57
 
58
+ # preprocess image
59
  img = advanced_preprocessing(img)
60
 
61
+ # still needed internally (model requirement)
62
  text_queries = [q.strip() for q in text_queries.split(",")]
63
 
64
  inputs = processor(
 
70
  with torch.no_grad():
71
  outputs = model(**inputs)
72
 
 
73
  target_sizes = torch.tensor([img.shape[:2]])
74
 
75
  results = processor.post_process_grounded_object_detection(
 
79
 
80
  boxes = results["boxes"]
81
  scores = results["scores"]
 
82
 
83
  output_data = []
84
 
85
+ # ONLY bounding boxes
86
+ for box, score in zip(boxes, scores):
87
+
88
  if score < score_threshold:
89
  continue
90
 
91
  x1, y1, x2, y2 = map(int, box.tolist())
 
 
92
 
93
+ # store only coordinates
94
+ output_data.append([x1, y1, x2, y2])
 
 
 
 
95
 
96
+ # draw rectangle ONLY (no labels)
97
  cv2.rectangle(img, (x1, y1), (x2, y2), (0,255,0), 2)
 
 
 
 
 
 
 
 
 
98
 
99
  return img, output_data
100
 
 
110
  gr.Slider(0, 1, value=0.2)
111
  ],
112
  outputs=[
113
+ gr.Image(label="Bounding Boxes"),
114
+ gr.JSON(label="Coordinates Only")
115
  ],
116
+ title="Bounding Box Coordinates Only (OWLv2)"
117
  )
118
 
119
  demo.launch()