ningpp commited on
Commit
931c329
·
verified ·
1 Parent(s): 17f4af2

Upload 5 files

Browse files
Files changed (5) hide show
  1. README.md +22 -3
  2. config.json +107 -0
  3. inference.yml +100 -0
  4. model.onnx +3 -0
  5. preprocessor_config.json +36 -0
README.md CHANGED
@@ -1,3 +1,22 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ pipeline_tag: object-detection
4
+ tags:
5
+ - PaddleOCR
6
+ - PaddlePaddle
7
+ - image-segmentation
8
+ - ocr
9
+ - layout
10
+ - layout_detection
11
+ language:
12
+ - en
13
+ - zh
14
+ - multilingual
15
+ library_name: onnxruntime
16
+ ---
17
+
18
+ # ONNX model for [PP-DocLayoutV3_safetensors](https://huggingface.co/PaddlePaddle/PP-DocLayoutV3_safetensors)
19
+
20
+ ## try with [ningpp/flux](https://github.com/ningpp/flux)
21
+
22
+ Flux is a Java-based OCR
config.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "activation_function": "silu",
4
+ "anchor_image_size": null,
5
+ "architectures": [
6
+ "PPDocLayoutV3ForObjectDetection"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "backbone": null,
10
+ "backbone_config": {
11
+ "model_type": "hgnet_v2",
12
+ "arch": "L",
13
+ "return_idx": [0, 1, 2, 3],
14
+ "freeze_stem_only": true,
15
+ "freeze_at": 0,
16
+ "freeze_norm": true,
17
+ "lr_mult_list": [0, 0.05, 0.05, 0.05, 0.05],
18
+ "out_features": ["stage1", "stage2", "stage3", "stage4"]
19
+ },
20
+ "backbone_kwargs": null,
21
+ "batch_norm_eps": 1e-05,
22
+ "box_noise_scale": 1.0,
23
+ "d_model": 256,
24
+ "decoder_activation_function": "relu",
25
+ "decoder_attention_heads": 8,
26
+ "decoder_ffn_dim": 1024,
27
+ "decoder_in_channels": [
28
+ 256,
29
+ 256,
30
+ 256
31
+ ],
32
+ "decoder_layers": 6,
33
+ "decoder_n_points": 4,
34
+ "disable_custom_kernels": true,
35
+ "dropout": 0.0,
36
+ "encode_proj_layers": [
37
+ 2
38
+ ],
39
+ "encoder_activation_function": "gelu",
40
+ "encoder_attention_heads": 8,
41
+ "encoder_ffn_dim": 1024,
42
+ "encoder_hidden_dim": 256,
43
+ "encoder_in_channels": [
44
+ 512,
45
+ 1024,
46
+ 2048
47
+ ],
48
+ "encoder_layers": 1,
49
+ "eos_coefficient": 0.0001,
50
+ "eval_size": null,
51
+ "feature_strides": [
52
+ 8,
53
+ 16,
54
+ 32
55
+ ],
56
+ "hidden_expansion": 1.0,
57
+ "id2label": {
58
+ "0": "abstract",
59
+ "1": "algorithm",
60
+ "2": "aside_text",
61
+ "3": "chart",
62
+ "4": "content",
63
+ "5": "formula",
64
+ "6": "doc_title",
65
+ "7": "figure_title",
66
+ "8": "footer",
67
+ "9": "footer",
68
+ "10": "footnote",
69
+ "11": "formula_number",
70
+ "12": "header",
71
+ "13": "header",
72
+ "14": "image",
73
+ "15": "formula",
74
+ "16": "number",
75
+ "17": "paragraph_title",
76
+ "18": "reference",
77
+ "19": "reference_content",
78
+ "20": "seal",
79
+ "21": "table",
80
+ "22": "text",
81
+ "23": "text",
82
+ "24": "vision_footnote"
83
+ },
84
+ "initializer_range": 0.01,
85
+ "is_encoder_decoder": true,
86
+ "label2id": {},
87
+ "label_noise_ratio": 0.5,
88
+ "layer_norm_eps": 1e-05,
89
+ "learn_initial_query": false,
90
+ "matcher_alpha": 0.25,
91
+ "matcher_bbox_cost": 5.0,
92
+ "matcher_class_cost": 2.0,
93
+ "matcher_gamma": 2.0,
94
+ "matcher_giou_cost": 2.0,
95
+ "model_type": "pp_doclayout_v3",
96
+ "normalize_before": false,
97
+ "num_denoising": 100,
98
+ "num_feature_levels": 3,
99
+ "num_queries": 300,
100
+ "positional_encoding_temperature": 10000,
101
+ "torch_dtype": "float32",
102
+ "use_pretrained_backbone": false,
103
+ "use_timm_backbone": false,
104
+ "global_pointer_head_size": 64,
105
+ "mask_feature_channels": [64, 64],
106
+ "x4_feat_dim": 128
107
+ }
inference.yml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mode: paddle
2
+ draw_threshold: 0.5
3
+ metric: COCO
4
+ use_dynamic_shape: false
5
+ Global:
6
+ model_name: PP-DocLayoutV3
7
+ arch: DETR
8
+ min_subgraph_size: 3
9
+ Preprocess:
10
+ - interp: 2
11
+ keep_ratio: false
12
+ target_size:
13
+ - 800
14
+ - 800
15
+ type: Resize
16
+ - mean:
17
+ - 0.0
18
+ - 0.0
19
+ - 0.0
20
+ norm_type: none
21
+ std:
22
+ - 1.0
23
+ - 1.0
24
+ - 1.0
25
+ type: NormalizeImage
26
+ - type: Permute
27
+ label_list:
28
+ - abstract
29
+ - algorithm
30
+ - aside_text
31
+ - chart
32
+ - content
33
+ - display_formula
34
+ - doc_title
35
+ - figure_title
36
+ - footer
37
+ - footer_image
38
+ - footnote
39
+ - formula_number
40
+ - header
41
+ - header_image
42
+ - image
43
+ - inline_formula
44
+ - number
45
+ - paragraph_title
46
+ - reference
47
+ - reference_content
48
+ - seal
49
+ - table
50
+ - text
51
+ - vertical_text
52
+ - vision_footnote
53
+ Hpi:
54
+ backend_configs:
55
+ paddle_infer:
56
+ trt_dynamic_shapes: &id001
57
+ image:
58
+ - - 1
59
+ - 3
60
+ - 800
61
+ - 800
62
+ - - 1
63
+ - 3
64
+ - 800
65
+ - 800
66
+ - - 8
67
+ - 3
68
+ - 800
69
+ - 800
70
+ scale_factor:
71
+ - - 1
72
+ - 2
73
+ - - 1
74
+ - 2
75
+ - - 8
76
+ - 2
77
+ trt_dynamic_shape_input_data:
78
+ scale_factor:
79
+ - - 2
80
+ - 2
81
+ - - 1
82
+ - 1
83
+ - - 0.67
84
+ - 0.67
85
+ - 0.67
86
+ - 0.67
87
+ - 0.67
88
+ - 0.67
89
+ - 0.67
90
+ - 0.67
91
+ - 0.67
92
+ - 0.67
93
+ - 0.67
94
+ - 0.67
95
+ - 0.67
96
+ - 0.67
97
+ - 0.67
98
+ - 0.67
99
+ tensorrt:
100
+ dynamic_shapes: *id001
model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fdd41ef5d97509e7b654d4d52bfd7eb741d7a746b3227fbf0ac767873b3c569
3
+ size 133666194
preprocessor_config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "do_resize",
5
+ "size",
6
+ "resample",
7
+ "do_rescale",
8
+ "rescale_factor",
9
+ "do_normalize",
10
+ "image_mean",
11
+ "image_std",
12
+ "return_tensors",
13
+ "data_format",
14
+ "input_data_format"
15
+ ],
16
+ "do_normalize": true,
17
+ "do_rescale": true,
18
+ "do_resize": true,
19
+ "image_mean": [
20
+ 0,
21
+ 0,
22
+ 0
23
+ ],
24
+ "image_processor_type": "PPDocLayoutV3ImageProcessor",
25
+ "image_std": [
26
+ 1,
27
+ 1,
28
+ 1
29
+ ],
30
+ "resample": 3,
31
+ "rescale_factor": 0.00392156862745098,
32
+ "size": {
33
+ "height": 800,
34
+ "width": 800
35
+ }
36
+ }