iammraat commited on
Commit
019f2ad
·
verified ·
1 Parent(s): b364284

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -22
app.py CHANGED
@@ -171,13 +171,14 @@ model_inputs = session.get_inputs()
171
  input_names = [i.name for i in model_inputs]
172
  output_names = [o.name for o in session.get_outputs()]
173
 
 
 
174
  LABELS = {0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"}
175
 
176
  def preprocess_image(image, target_size=(800, 800)):
177
- # Original dimensions
178
- orig_h, orig_w = image.shape[:2]
179
 
180
- # 1. Resize (Warping to 800x800 is required by this graph)
181
  img_resized = cv2.resize(image, target_size, interpolation=cv2.INTER_LINEAR)
182
 
183
  # 2. Normalize
@@ -190,13 +191,13 @@ def preprocess_image(image, target_size=(800, 800)):
190
  img_data = img_data.transpose(2, 0, 1)[None, :, :, :]
191
 
192
  # 4. Prepare Metadata Inputs
193
- # scale_factor = resized_shape / original_shape
194
- scale_factor = np.array([target_size[0] / orig_h, target_size[1] / orig_w], dtype=np.float32).reshape(1, 2)
195
 
196
- # --- CRITICAL FIX: im_shape must be the ORIGINAL image size ---
197
- # This tells the model the valid area to keep boxes.
198
- # If we put 800x800 here, it clips valid boxes on large documents.
199
- im_shape = np.array([orig_h, orig_w], dtype=np.float32).reshape(1, 2)
 
 
200
 
201
  return img_data, scale_factor, im_shape
202
 
@@ -219,27 +220,28 @@ def analyze_layout(input_image):
219
  elif 'shape' in name:
220
  inputs[name] = im_shape
221
 
222
- # Run ONNX
223
  outputs = session.run(output_names, inputs)
224
-
225
- # --- PARSE RESULTS ---
226
  detections = outputs[0]
227
  if len(detections.shape) == 3:
228
  detections = detections[0]
229
 
 
 
 
 
 
 
230
  viz_image = image_np.copy()
231
  log = []
232
 
233
- # DEBUG: Print max score to check if model is working at all
234
- if len(detections) > 0:
235
- max_score = np.max(detections[:, 1])
236
- print(f"DEBUG: Max confidence score found: {max_score}")
237
 
238
  for det in detections:
239
  score = det[1]
240
 
241
- # Lowered threshold to 0.2 to catch faint detections
242
- if score < 0.2: continue
243
 
244
  class_id = int(det[0])
245
  bbox = det[2:]
@@ -247,7 +249,6 @@ def analyze_layout(input_image):
247
  # Map labels
248
  label_name = LABELS.get(class_id, f"Class {class_id}")
249
 
250
- # Draw Box
251
  try:
252
  x1, y1, x2, y2 = map(int, bbox)
253
 
@@ -273,9 +274,8 @@ def analyze_layout(input_image):
273
 
274
  return viz_image, "\n".join(log)
275
 
276
- with gr.Blocks(title="ONNX Layout Analysis") as demo:
277
- gr.Markdown("## ⚡ Fast V3 Layout Analysis (ONNX)")
278
- gr.Markdown(f"Running `{onnx_filename}` via ONNX Runtime.")
279
 
280
  with gr.Row():
281
  with gr.Column():
 
171
  input_names = [i.name for i in model_inputs]
172
  output_names = [o.name for o in session.get_outputs()]
173
 
174
+ print(f"Model expects inputs: {input_names}")
175
+
176
  LABELS = {0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"}
177
 
178
  def preprocess_image(image, target_size=(800, 800)):
179
+ h, w = image.shape[:2]
 
180
 
181
+ # 1. Resize
182
  img_resized = cv2.resize(image, target_size, interpolation=cv2.INTER_LINEAR)
183
 
184
  # 2. Normalize
 
191
  img_data = img_data.transpose(2, 0, 1)[None, :, :, :]
192
 
193
  # 4. Prepare Metadata Inputs
 
 
194
 
195
+ # Scale Factor: Ratio of resized / original
196
+ scale_factor = np.array([target_size[0] / h, target_size[1] / w], dtype=np.float32).reshape(1, 2)
197
+
198
+ # --- DEBUG CHANGE: Try passing target_size as im_shape ---
199
+ # Some exports want the INPUT size (800,800), not the ORIGINAL size.
200
+ im_shape = np.array([target_size[0], target_size[1]], dtype=np.float32).reshape(1, 2)
201
 
202
  return img_data, scale_factor, im_shape
203
 
 
220
  elif 'shape' in name:
221
  inputs[name] = im_shape
222
 
 
223
  outputs = session.run(output_names, inputs)
 
 
224
  detections = outputs[0]
225
  if len(detections.shape) == 3:
226
  detections = detections[0]
227
 
228
+ # --- RAW DEBUG LOGGING ---
229
+ print(f"\n[DEBUG] Raw Detections Shape: {detections.shape}")
230
+ print(f"[DEBUG] Top 3 Raw Detections (Class, Score, BBox):")
231
+ for i in range(min(3, len(detections))):
232
+ print(f" {detections[i]}")
233
+
234
  viz_image = image_np.copy()
235
  log = []
236
 
237
+ # Sort by score descending to find the best ones
238
+ # detections = detections[detections[:, 1].argsort()[::-1]]
 
 
239
 
240
  for det in detections:
241
  score = det[1]
242
 
243
+ # Lower threshold strictly for debugging
244
+ if score < 0.3: continue
245
 
246
  class_id = int(det[0])
247
  bbox = det[2:]
 
249
  # Map labels
250
  label_name = LABELS.get(class_id, f"Class {class_id}")
251
 
 
252
  try:
253
  x1, y1, x2, y2 = map(int, bbox)
254
 
 
274
 
275
  return viz_image, "\n".join(log)
276
 
277
+ with gr.Blocks(title="ONNX Layout Analysis (Debug)") as demo:
278
+ gr.Markdown("## ⚡ Layout Analysis (Debug Mode)")
 
279
 
280
  with gr.Row():
281
  with gr.Column():