Update app.py
Browse files
app.py
CHANGED
|
@@ -171,13 +171,14 @@ model_inputs = session.get_inputs()
|
|
| 171 |
input_names = [i.name for i in model_inputs]
|
| 172 |
output_names = [o.name for o in session.get_outputs()]
|
| 173 |
|
|
|
|
|
|
|
| 174 |
LABELS = {0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"}
|
| 175 |
|
| 176 |
def preprocess_image(image, target_size=(800, 800)):
|
| 177 |
-
|
| 178 |
-
orig_h, orig_w = image.shape[:2]
|
| 179 |
|
| 180 |
-
# 1. Resize
|
| 181 |
img_resized = cv2.resize(image, target_size, interpolation=cv2.INTER_LINEAR)
|
| 182 |
|
| 183 |
# 2. Normalize
|
|
@@ -190,13 +191,13 @@ def preprocess_image(image, target_size=(800, 800)):
|
|
| 190 |
img_data = img_data.transpose(2, 0, 1)[None, :, :, :]
|
| 191 |
|
| 192 |
# 4. Prepare Metadata Inputs
|
| 193 |
-
# scale_factor = resized_shape / original_shape
|
| 194 |
-
scale_factor = np.array([target_size[0] / orig_h, target_size[1] / orig_w], dtype=np.float32).reshape(1, 2)
|
| 195 |
|
| 196 |
-
#
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
|
|
|
|
|
|
| 200 |
|
| 201 |
return img_data, scale_factor, im_shape
|
| 202 |
|
|
@@ -219,27 +220,28 @@ def analyze_layout(input_image):
|
|
| 219 |
elif 'shape' in name:
|
| 220 |
inputs[name] = im_shape
|
| 221 |
|
| 222 |
-
# Run ONNX
|
| 223 |
outputs = session.run(output_names, inputs)
|
| 224 |
-
|
| 225 |
-
# --- PARSE RESULTS ---
|
| 226 |
detections = outputs[0]
|
| 227 |
if len(detections.shape) == 3:
|
| 228 |
detections = detections[0]
|
| 229 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
viz_image = image_np.copy()
|
| 231 |
log = []
|
| 232 |
|
| 233 |
-
#
|
| 234 |
-
|
| 235 |
-
max_score = np.max(detections[:, 1])
|
| 236 |
-
print(f"DEBUG: Max confidence score found: {max_score}")
|
| 237 |
|
| 238 |
for det in detections:
|
| 239 |
score = det[1]
|
| 240 |
|
| 241 |
-
#
|
| 242 |
-
if score < 0.
|
| 243 |
|
| 244 |
class_id = int(det[0])
|
| 245 |
bbox = det[2:]
|
|
@@ -247,7 +249,6 @@ def analyze_layout(input_image):
|
|
| 247 |
# Map labels
|
| 248 |
label_name = LABELS.get(class_id, f"Class {class_id}")
|
| 249 |
|
| 250 |
-
# Draw Box
|
| 251 |
try:
|
| 252 |
x1, y1, x2, y2 = map(int, bbox)
|
| 253 |
|
|
@@ -273,9 +274,8 @@ def analyze_layout(input_image):
|
|
| 273 |
|
| 274 |
return viz_image, "\n".join(log)
|
| 275 |
|
| 276 |
-
with gr.Blocks(title="ONNX Layout Analysis") as demo:
|
| 277 |
-
gr.Markdown("## ⚡
|
| 278 |
-
gr.Markdown(f"Running `{onnx_filename}` via ONNX Runtime.")
|
| 279 |
|
| 280 |
with gr.Row():
|
| 281 |
with gr.Column():
|
|
|
|
| 171 |
input_names = [i.name for i in model_inputs]
|
| 172 |
output_names = [o.name for o in session.get_outputs()]
|
| 173 |
|
| 174 |
+
print(f"Model expects inputs: {input_names}")
|
| 175 |
+
|
| 176 |
LABELS = {0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"}
|
| 177 |
|
| 178 |
def preprocess_image(image, target_size=(800, 800)):
|
| 179 |
+
h, w = image.shape[:2]
|
|
|
|
| 180 |
|
| 181 |
+
# 1. Resize
|
| 182 |
img_resized = cv2.resize(image, target_size, interpolation=cv2.INTER_LINEAR)
|
| 183 |
|
| 184 |
# 2. Normalize
|
|
|
|
| 191 |
img_data = img_data.transpose(2, 0, 1)[None, :, :, :]
|
| 192 |
|
| 193 |
# 4. Prepare Metadata Inputs
|
|
|
|
|
|
|
| 194 |
|
| 195 |
+
# Scale Factor: Ratio of resized / original
|
| 196 |
+
scale_factor = np.array([target_size[0] / h, target_size[1] / w], dtype=np.float32).reshape(1, 2)
|
| 197 |
+
|
| 198 |
+
# --- DEBUG CHANGE: Try passing target_size as im_shape ---
|
| 199 |
+
# Some exports want the INPUT size (800,800), not the ORIGINAL size.
|
| 200 |
+
im_shape = np.array([target_size[0], target_size[1]], dtype=np.float32).reshape(1, 2)
|
| 201 |
|
| 202 |
return img_data, scale_factor, im_shape
|
| 203 |
|
|
|
|
| 220 |
elif 'shape' in name:
|
| 221 |
inputs[name] = im_shape
|
| 222 |
|
|
|
|
| 223 |
outputs = session.run(output_names, inputs)
|
|
|
|
|
|
|
| 224 |
detections = outputs[0]
|
| 225 |
if len(detections.shape) == 3:
|
| 226 |
detections = detections[0]
|
| 227 |
|
| 228 |
+
# --- RAW DEBUG LOGGING ---
|
| 229 |
+
print(f"\n[DEBUG] Raw Detections Shape: {detections.shape}")
|
| 230 |
+
print(f"[DEBUG] Top 3 Raw Detections (Class, Score, BBox):")
|
| 231 |
+
for i in range(min(3, len(detections))):
|
| 232 |
+
print(f" {detections[i]}")
|
| 233 |
+
|
| 234 |
viz_image = image_np.copy()
|
| 235 |
log = []
|
| 236 |
|
| 237 |
+
# Sort by score descending to find the best ones
|
| 238 |
+
# detections = detections[detections[:, 1].argsort()[::-1]]
|
|
|
|
|
|
|
| 239 |
|
| 240 |
for det in detections:
|
| 241 |
score = det[1]
|
| 242 |
|
| 243 |
+
# Lower threshold strictly for debugging
|
| 244 |
+
if score < 0.3: continue
|
| 245 |
|
| 246 |
class_id = int(det[0])
|
| 247 |
bbox = det[2:]
|
|
|
|
| 249 |
# Map labels
|
| 250 |
label_name = LABELS.get(class_id, f"Class {class_id}")
|
| 251 |
|
|
|
|
| 252 |
try:
|
| 253 |
x1, y1, x2, y2 = map(int, bbox)
|
| 254 |
|
|
|
|
| 274 |
|
| 275 |
return viz_image, "\n".join(log)
|
| 276 |
|
| 277 |
+
with gr.Blocks(title="ONNX Layout Analysis (Debug)") as demo:
|
| 278 |
+
gr.Markdown("## ⚡ Layout Analysis (Debug Mode)")
|
|
|
|
| 279 |
|
| 280 |
with gr.Row():
|
| 281 |
with gr.Column():
|