alex-dinh commited on
Commit
5e30a26
·
verified ·
1 Parent(s): 070f2d2

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +89 -11
README.md CHANGED
@@ -1,11 +1,89 @@
1
- ---
2
- base_model: paddlepaddle/PP-DocLayoutV2
3
- tags:
4
- - ocr
5
- - onnx
6
- - layout-detection
7
- - paddle
8
- license: apache-2.0
9
- ---
10
-
11
- This model is an ONNX version of [`paddlepaddle/PP-DocLayoutV2`](https://huggingface.co/PaddlePaddle/PP-DocLayoutV2), created with [Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: paddlepaddle/PP-DocLayoutV2
3
+ tags:
4
+ - ocr
5
+ - onnx
6
+ - layout-detection
7
+ - paddle
8
+ license: apache-2.0
9
+ ---
10
+
11
+ This model is an ONNX version of [`paddlepaddle/PP-DocLayoutV2`](https://huggingface.co/PaddlePaddle/PP-DocLayoutV2), created with [Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX).
12
+ ---
13
+
14
+ Example Python code to run this model:
15
+
16
+
17
+ ```
18
+ # Install dependencies:
19
+ # pip install numpy opencv-python onnxruntime
20
+
21
+ import numpy as np
22
+ import cv2
23
+ import onnxruntime as ort
24
+
25
+ def preprocess_image_doclayout(image, target_input_size=(800, 800)):
26
+ """
27
+ Preprocessing for DocLayoutV2 with 800x800 input
28
+ """
29
+ orig_h, orig_w = image.shape[:2]
30
+ # Resize, do not preserve aspect ratio
31
+ target_h, target_w = target_input_size
32
+ scale_h = target_h / orig_h
33
+ scale_w = target_w / orig_w
34
+
35
+ new_h, new_w = int(orig_h * scale_h), int(orig_w * scale_w)
36
+ resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
37
+
38
+ # Convert to RGB and normalize
39
+ padded = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
40
+ input_blob = padded.astype(np.float32) / 255.0
41
+
42
+ # ImageNet normalization
43
+ mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
44
+ std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
45
+ input_blob = (input_blob - mean) / std
46
+
47
+ # Transpose to CHW format and add batch dimension
48
+ input_blob = input_blob.transpose(2, 0, 1)[np.newaxis, ...]
49
+
50
+ return input_blob, scale_h, scale_w
51
+
52
+
53
+ def run_doclayout_onnx():
54
+ # Specify onnx model path here
55
+ model = ort.InferenceSession('path/to/PP-DocLayoutV2.onnx') # Update your path to ONNX model here
56
+ input_names = [i.name for i in model.get_inputs()]
57
+ output_names = [o.name for o in model.get_outputs()]
58
+
59
+ image_path = 'path/to/input_image.png' # Update path to your input image here
60
+ image = cv2.imread(image_path)
61
+ input_blob, scale_h, scale_w = preprocess_image_doclayout(image)
62
+ preprocess_shape = [np.array([800, 800], dtype=np.float32)]
63
+ input_feed = {input_names[0]: preprocess_shape,
64
+ input_names[1]: input_blob,
65
+ input_names[2]: [[scale_h, scale_w]]}
66
+
67
+ # shape=(300, 8), First 6 values are [label_index, score, xmin, ymin, xmax, ymax]
68
+ output = model.run(output_names, input_feed)[0]
69
+
70
+ # Filter out low-confidence boxes
71
+ boxes = output[output[:, 1] > 0.5]
72
+ print('--- DocLayoutV2 ONNX Output: ---')
73
+ print_doclayout_res(boxes[np.argsort(boxes[:, 2])])
74
+
75
+
76
+ def print_doclayout_res(boxes):
77
+ print('cls_id\tscore\txmin\tymin\txmax\tymax')
78
+ if isinstance(boxes, np.ndarray):
79
+ for box in boxes:
80
+ print(f"{box[0]:.0f}\t{box[1]:.3f}\t{box[2]:.2f}\t{box[3]:.2f}\t{box[4]:.2f}\t{box[5]:.2f}")
81
+ else:
82
+ for box in sorted(boxes, key=lambda x: x['coordinate'][0]):
83
+ xmin, ymin, xmax, ymax = box['coordinate']
84
+ print(f"{box['cls_id']:.0f}\t{box['score']:.3f}"
85
+ f"\t{xmin:.2f}\t{ymin:.2f}\t{xmax:.2f}\t{ymax:.2f}")
86
+
87
+ if __name__ == '__main__':
88
+ run_doclayout_onnx()
89
+ ```