Spaces:

iammraat
/

document

Running

App Files Files Community

iammraat commited on Feb 3

Commit

019f2ad

verified ·

1 Parent(s): b364284

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -22

app.py CHANGED Viewed

@@ -171,13 +171,14 @@ model_inputs = session.get_inputs()
 input_names = [i.name for i in model_inputs]
 output_names = [o.name for o in session.get_outputs()]
 LABELS = {0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"}
 def preprocess_image(image, target_size=(800, 800)):
-    # Original dimensions
-    orig_h, orig_w = image.shape[:2]
-    # 1. Resize (Warping to 800x800 is required by this graph)
     img_resized = cv2.resize(image, target_size, interpolation=cv2.INTER_LINEAR)
     # 2. Normalize
@@ -190,13 +191,13 @@ def preprocess_image(image, target_size=(800, 800)):
     img_data = img_data.transpose(2, 0, 1)[None, :, :, :]
     # 4. Prepare Metadata Inputs
-    # scale_factor = resized_shape / original_shape
-    scale_factor = np.array([target_size[0] / orig_h, target_size[1] / orig_w], dtype=np.float32).reshape(1, 2)
-    # --- CRITICAL FIX: im_shape must be the ORIGINAL image size ---
-    # This tells the model the valid area to keep boxes.
-    # If we put 800x800 here, it clips valid boxes on large documents.
-    im_shape = np.array([orig_h, orig_w], dtype=np.float32).reshape(1, 2)
     return img_data, scale_factor, im_shape
@@ -219,27 +220,28 @@ def analyze_layout(input_image):
         elif 'shape' in name:
             inputs[name] = im_shape
-    # Run ONNX
     outputs = session.run(output_names, inputs)
-    # --- PARSE RESULTS ---
     detections = outputs[0]
     if len(detections.shape) == 3:
         detections = detections[0]
     viz_image = image_np.copy()
     log = []
-    # DEBUG: Print max score to check if model is working at all
-    if len(detections) > 0:
-        max_score = np.max(detections[:, 1])
-        print(f"DEBUG: Max confidence score found: {max_score}")
     for det in detections:
         score = det[1]
-        # Lowered threshold to 0.2 to catch faint detections
-        if score < 0.2: continue
         class_id = int(det[0])
         bbox = det[2:]
@@ -247,7 +249,6 @@ def analyze_layout(input_image):
         # Map labels
         label_name = LABELS.get(class_id, f"Class {class_id}")
-        # Draw Box
         try:
             x1, y1, x2, y2 = map(int, bbox)
@@ -273,9 +274,8 @@ def analyze_layout(input_image):
     return viz_image, "\n".join(log)
-with gr.Blocks(title="ONNX Layout Analysis") as demo:
-    gr.Markdown("## ⚡ Fast V3 Layout Analysis (ONNX)")
-    gr.Markdown(f"Running `{onnx_filename}` via ONNX Runtime.")
     with gr.Row():
         with gr.Column():

 input_names = [i.name for i in model_inputs]
 output_names = [o.name for o in session.get_outputs()]
+print(f"Model expects inputs: {input_names}")
 LABELS = {0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"}
 def preprocess_image(image, target_size=(800, 800)):
+    h, w = image.shape[:2]
+    # 1. Resize
     img_resized = cv2.resize(image, target_size, interpolation=cv2.INTER_LINEAR)
     # 2. Normalize
     img_data = img_data.transpose(2, 0, 1)[None, :, :, :]
     # 4. Prepare Metadata Inputs
+    # Scale Factor: Ratio of resized / original
+    scale_factor = np.array([target_size[0] / h, target_size[1] / w], dtype=np.float32).reshape(1, 2)
+    # --- DEBUG CHANGE: Try passing target_size as im_shape ---
+    # Some exports want the INPUT size (800,800), not the ORIGINAL size.
+    im_shape = np.array([target_size[0], target_size[1]], dtype=np.float32).reshape(1, 2)
     return img_data, scale_factor, im_shape
         elif 'shape' in name:
             inputs[name] = im_shape
     outputs = session.run(output_names, inputs)
     detections = outputs[0]
     if len(detections.shape) == 3:
         detections = detections[0]
+    # --- RAW DEBUG LOGGING ---
+    print(f"\n[DEBUG] Raw Detections Shape: {detections.shape}")
+    print(f"[DEBUG] Top 3 Raw Detections (Class, Score, BBox):")
+    for i in range(min(3, len(detections))):
+        print(f"  {detections[i]}")
     viz_image = image_np.copy()
     log = []
+    # Sort by score descending to find the best ones
+    # detections = detections[detections[:, 1].argsort()[::-1]]
     for det in detections:
         score = det[1]
+        # Lower threshold strictly for debugging
+        if score < 0.3: continue
         class_id = int(det[0])
         bbox = det[2:]
         # Map labels
         label_name = LABELS.get(class_id, f"Class {class_id}")
         try:
             x1, y1, x2, y2 = map(int, bbox)
     return viz_image, "\n".join(log)
+with gr.Blocks(title="ONNX Layout Analysis (Debug)") as demo:
+    gr.Markdown("## ⚡ Layout Analysis (Debug Mode)")
     with gr.Row():
         with gr.Column():