iammraat commited on
Commit
e55fda2
·
verified ·
1 Parent(s): 593f815

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -45
app.py CHANGED
@@ -1,65 +1,105 @@
1
  import gradio as gr
2
  import cv2
3
  import numpy as np
4
- from paddleocr import PPStructureV3 # Explicitly import the class that exists
 
5
 
6
- # --- INITIALIZATION ---
7
- # We do NOT pass a custom model path. We let PPStructureV3 download its own default model.
8
- # This avoids the "ValueError: Unknown argument" crashes.
9
- layout_engine = PPStructureV3(
10
- use_doc_orientation_classify=True, # Standard V3 argument for orientation
11
- enable_mkldnn=False # CRITICAL: Keeps CPU from crashing
12
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  def analyze_layout(input_image):
15
  if input_image is None:
16
  return None, "No image uploaded"
17
 
 
18
  image_np = np.array(input_image)
19
-
20
- # Run Inference
21
- try:
22
- # V3 returns a generator, so we convert to list immediately
23
- results = list(layout_engine(image_np))
24
- except Exception as e:
25
- return image_np, f"Error running layout analysis: {e}"
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  viz_image = image_np.copy()
28
- detections_text = []
29
 
30
- if not results:
31
- return viz_image, "No layout detected."
32
-
33
- # --- VISUALIZATION ---
34
- for region in results:
35
- if not isinstance(region, dict): continue
36
-
37
- # V3 usually puts the box in 'layout_bbox' or 'bbox'
38
- box = region.get('layout_bbox') or region.get('bbox')
39
- label = region.get('label', 'unknown')
40
 
41
- if box is None: continue
 
42
 
43
- try:
44
- x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
45
-
46
- # Color coding
47
- color = (0, 255, 0) # Green (Default)
48
- if label == 'title': color = (0, 0, 255) # Red
49
- elif label == 'figure': color = (255, 0, 0) # Blue
50
- elif label == 'table': color = (255, 255, 0)# Cyan
51
 
52
- cv2.rectangle(viz_image, (x1, y1), (x2, y2), color, 3)
53
- cv2.putText(viz_image, str(label), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
54
- detections_text.append(f"Found {label} at {box}")
55
- except Exception:
56
- pass
 
57
 
58
- return viz_image, "\n".join(detections_text)
59
 
60
- with gr.Blocks(title="PP-DocLayoutV3 Explorer") as demo:
61
- gr.Markdown("## 📄 PP-DocLayoutV3 Explorer")
62
- gr.Markdown("Auto-downloading the latest V3 weights for structure analysis.")
63
 
64
  with gr.Row():
65
  with gr.Column():
@@ -68,7 +108,7 @@ with gr.Blocks(title="PP-DocLayoutV3 Explorer") as demo:
68
 
69
  with gr.Column():
70
  output_img = gr.Image(label="Layout Visualization")
71
- output_log = gr.Textbox(label="Detected Regions", lines=10)
72
 
73
  submit_btn.click(fn=analyze_layout, inputs=input_img, outputs=[output_img, output_log])
74
 
 
1
  import gradio as gr
2
  import cv2
3
  import numpy as np
4
+ import onnxruntime as ort
5
+ from huggingface_hub import hf_hub_download
6
 
7
+ # --- STEP 1: Download the ONNX Model ---
8
+ print("Downloading ONNX model...")
9
+ model_path = hf_hub_download(repo_id="alex-dinh/PP-DocLayoutV3-ONNX", filename="model.onnx")
10
+ print(f"Model downloaded to: {model_path}")
11
+
12
+ # --- STEP 2: Initialize ONNX Engine ---
13
+ # This loads the AI "brain" directly without needing Paddle
14
+ session = ort.InferenceSession(model_path)
15
+ input_names = [i.name for i in session.get_inputs()]
16
+ output_names = [o.name for o in session.get_outputs()]
17
+
18
+ # Define labels map (Standard for PP-DocLayout)
19
+ LABELS = {1: "Text", 2: "Title", 3: "List", 4: "Table", 5: "Figure"}
20
+
21
+ def preprocess_image(image, target_size=(800, 800)):
22
+ """
23
+ Prepares the image exactly how the AI expects it (Resize -> Normalize).
24
+ """
25
+ h, w = image.shape[:2]
26
+
27
+ # 1. Resize
28
+ # We do NOT keep aspect ratio for the input blob, but we keep scales to fix boxes later
29
+ img_resized = cv2.resize(image, target_size)
30
+
31
+ # 2. Normalize (Standard ImageNet mean/std)
32
+ img_data = img_resized.astype(np.float32) / 255.0
33
+ mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
34
+ std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
35
+ img_data = (img_data - mean) / std
36
+
37
+ # 3. Transpose to (Batch, Channel, Height, Width)
38
+ img_data = img_data.transpose(2, 0, 1)[None, :, :, :]
39
+
40
+ # Calculate scale factors to map detections back to original image
41
+ scale_factor = np.array([target_size[0] / h, target_size[1] / w], dtype=np.float32).reshape(1, 2)
42
+
43
+ return img_data, scale_factor
44
 
45
  def analyze_layout(input_image):
46
  if input_image is None:
47
  return None, "No image uploaded"
48
 
49
+ # Convert PIL to Numpy/OpenCV
50
  image_np = np.array(input_image)
51
+ orig_h, orig_w = image_np.shape[:2]
 
 
 
 
 
 
52
 
53
+ # --- INFERENCE ---
54
+ input_blob, scale_factor = preprocess_image(image_np)
55
+
56
+ # ONNX Runtime inputs
57
+ inputs = {
58
+ input_names[0]: input_blob, # The image data
59
+ input_names[1]: scale_factor # The resize scale
60
+ }
61
+
62
+ # Run!
63
+ outputs = session.run(output_names, inputs)
64
+
65
+ # --- POST-PROCESSING ---
66
+ # Output format is typically [Batch, N, 6] -> [Class, Score, X1, Y1, X2, Y2]
67
+ detections = outputs[0]
68
+
69
  viz_image = image_np.copy()
70
+ log = []
71
 
72
+ for det in detections:
73
+ class_id = int(det[0])
74
+ score = det[1]
75
+ bbox = det[2:]
76
+
77
+ if score < 0.5: continue # Filter weak detections
78
+
79
+ # Map labels
80
+ label_name = LABELS.get(class_id, "Unknown")
 
81
 
82
+ # Coordinates
83
+ x1, y1, x2, y2 = map(int, bbox)
84
 
85
+ # Color coding
86
+ color = (0, 255, 0) # Green
87
+ if label_name == "Title": color = (0, 0, 255)
88
+ elif label_name == "Table": color = (255, 255, 0)
89
+ elif label_name == "Figure": color = (255, 0, 0)
 
 
 
90
 
91
+ # Draw
92
+ cv2.rectangle(viz_image, (x1, y1), (x2, y2), color, 3)
93
+ cv2.putText(viz_image, f"{label_name} {score:.2f}", (x1, y1-10),
94
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
95
+
96
+ log.append(f"Found {label_name} at [{x1}, {y1}, {x2}, {y2}] (Conf: {score:.2f})")
97
 
98
+ return viz_image, "\n".join(log)
99
 
100
+ with gr.Blocks(title="ONNX Layout Analysis") as demo:
101
+ gr.Markdown("## Fast V3 Layout Analysis (ONNX)")
102
+ gr.Markdown("Uses **PP-DocLayoutV3** via ONNX Runtime. No Paddle dependencies.")
103
 
104
  with gr.Row():
105
  with gr.Column():
 
108
 
109
  with gr.Column():
110
  output_img = gr.Image(label="Layout Visualization")
111
+ output_log = gr.Textbox(label="Detections", lines=10)
112
 
113
  submit_btn.click(fn=analyze_layout, inputs=input_img, outputs=[output_img, output_log])
114