Update README.md
Browse files
README.md
CHANGED
|
@@ -1,11 +1,89 @@
|
|
| 1 |
-
---
|
| 2 |
-
base_model: paddlepaddle/PP-DocLayoutV2
|
| 3 |
-
tags:
|
| 4 |
-
- ocr
|
| 5 |
-
- onnx
|
| 6 |
-
- layout-detection
|
| 7 |
-
- paddle
|
| 8 |
-
license: apache-2.0
|
| 9 |
-
---
|
| 10 |
-
|
| 11 |
-
This model is an ONNX version of [`paddlepaddle/PP-DocLayoutV2`](https://huggingface.co/PaddlePaddle/PP-DocLayoutV2), created with [Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
base_model: paddlepaddle/PP-DocLayoutV2
|
| 3 |
+
tags:
|
| 4 |
+
- ocr
|
| 5 |
+
- onnx
|
| 6 |
+
- layout-detection
|
| 7 |
+
- paddle
|
| 8 |
+
license: apache-2.0
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
This model is an ONNX version of [`paddlepaddle/PP-DocLayoutV2`](https://huggingface.co/PaddlePaddle/PP-DocLayoutV2), created with [Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX).
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
Example Python code to run this model:
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
```
|
| 18 |
+
# Install dependencies:
|
| 19 |
+
# pip install numpy opencv-python onnxruntime
|
| 20 |
+
|
| 21 |
+
import numpy as np
|
| 22 |
+
import cv2
|
| 23 |
+
import onnxruntime as ort
|
| 24 |
+
|
| 25 |
+
def preprocess_image_doclayout(image, target_input_size=(800, 800)):
|
| 26 |
+
"""
|
| 27 |
+
Preprocessing for DocLayoutV2 with 800x800 input
|
| 28 |
+
"""
|
| 29 |
+
orig_h, orig_w = image.shape[:2]
|
| 30 |
+
# Resize, do not preserve aspect ratio
|
| 31 |
+
target_h, target_w = target_input_size
|
| 32 |
+
scale_h = target_h / orig_h
|
| 33 |
+
scale_w = target_w / orig_w
|
| 34 |
+
|
| 35 |
+
new_h, new_w = int(orig_h * scale_h), int(orig_w * scale_w)
|
| 36 |
+
resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
|
| 37 |
+
|
| 38 |
+
# Convert to RGB and normalize
|
| 39 |
+
padded = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
|
| 40 |
+
input_blob = padded.astype(np.float32) / 255.0
|
| 41 |
+
|
| 42 |
+
# ImageNet normalization
|
| 43 |
+
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
|
| 44 |
+
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
|
| 45 |
+
input_blob = (input_blob - mean) / std
|
| 46 |
+
|
| 47 |
+
# Transpose to CHW format and add batch dimension
|
| 48 |
+
input_blob = input_blob.transpose(2, 0, 1)[np.newaxis, ...]
|
| 49 |
+
|
| 50 |
+
return input_blob, scale_h, scale_w
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def run_doclayout_onnx():
|
| 54 |
+
# Specify onnx model path here
|
| 55 |
+
model = ort.InferenceSession('path/to/PP-DocLayoutV2.onnx') # Update your path to ONNX model here
|
| 56 |
+
input_names = [i.name for i in model.get_inputs()]
|
| 57 |
+
output_names = [o.name for o in model.get_outputs()]
|
| 58 |
+
|
| 59 |
+
image_path = 'path/to/input_image.png' # Update path to your input image here
|
| 60 |
+
image = cv2.imread(image_path)
|
| 61 |
+
input_blob, scale_h, scale_w = preprocess_image_doclayout(image)
|
| 62 |
+
preprocess_shape = [np.array([800, 800], dtype=np.float32)]
|
| 63 |
+
input_feed = {input_names[0]: preprocess_shape,
|
| 64 |
+
input_names[1]: input_blob,
|
| 65 |
+
input_names[2]: [[scale_h, scale_w]]}
|
| 66 |
+
|
| 67 |
+
# shape=(300, 8), First 6 values are [label_index, score, xmin, ymin, xmax, ymax]
|
| 68 |
+
output = model.run(output_names, input_feed)[0]
|
| 69 |
+
|
| 70 |
+
# Filter out low-confidence boxes
|
| 71 |
+
boxes = output[output[:, 1] > 0.5]
|
| 72 |
+
print('--- DocLayoutV2 ONNX Output: ---')
|
| 73 |
+
print_doclayout_res(boxes[np.argsort(boxes[:, 2])])
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def print_doclayout_res(boxes):
|
| 77 |
+
print('cls_id\tscore\txmin\tymin\txmax\tymax')
|
| 78 |
+
if isinstance(boxes, np.ndarray):
|
| 79 |
+
for box in boxes:
|
| 80 |
+
print(f"{box[0]:.0f}\t{box[1]:.3f}\t{box[2]:.2f}\t{box[3]:.2f}\t{box[4]:.2f}\t{box[5]:.2f}")
|
| 81 |
+
else:
|
| 82 |
+
for box in sorted(boxes, key=lambda x: x['coordinate'][0]):
|
| 83 |
+
xmin, ymin, xmax, ymax = box['coordinate']
|
| 84 |
+
print(f"{box['cls_id']:.0f}\t{box['score']:.3f}"
|
| 85 |
+
f"\t{xmin:.2f}\t{ymin:.2f}\t{xmax:.2f}\t{ymax:.2f}")
|
| 86 |
+
|
| 87 |
+
if __name__ == '__main__':
|
| 88 |
+
run_doclayout_onnx()
|
| 89 |
+
```
|