Upload 5 files

Browse files

Files changed (5) hide show

README.md +22 -3
config.json +107 -0
inference.yml +100 -0
model.onnx +3 -0
preprocessor_config.json +36 -0

README.md CHANGED Viewed

@@ -1,3 +1,22 @@
----
-license: apache-2.0
----

+---
+license: apache-2.0
+pipeline_tag: object-detection
+tags:
+- PaddleOCR
+- PaddlePaddle
+- image-segmentation
+- ocr
+- layout
+- layout_detection
+language:
+- en
+- zh
+- multilingual
+library_name: onnxruntime
+---
+# ONNX model for [PP-DocLayoutV3_safetensors](https://huggingface.co/PaddlePaddle/PP-DocLayoutV3_safetensors)
+## try with [ningpp/flux](https://github.com/ningpp/flux)
+Flux is a Java-based OCR

config.json ADDED Viewed

	@@ -0,0 +1,107 @@

+{
+  "activation_dropout": 0.0,
+  "activation_function": "silu",
+  "anchor_image_size": null,
+  "architectures": [
+    "PPDocLayoutV3ForObjectDetection"
+  ],
+  "attention_dropout": 0.0,
+  "backbone": null,
+  "backbone_config": {
+    "model_type": "hgnet_v2",
+    "arch": "L",
+    "return_idx": [0, 1, 2, 3],
+    "freeze_stem_only": true,
+    "freeze_at": 0,
+    "freeze_norm": true,
+    "lr_mult_list": [0, 0.05, 0.05, 0.05, 0.05],
+    "out_features": ["stage1", "stage2", "stage3", "stage4"]
+  },
+  "backbone_kwargs": null,
+  "batch_norm_eps": 1e-05,
+  "box_noise_scale": 1.0,
+  "d_model": 256,
+  "decoder_activation_function": "relu",
+  "decoder_attention_heads": 8,
+  "decoder_ffn_dim": 1024,
+  "decoder_in_channels": [
+    256,
+    256,
+    256
+  ],
+  "decoder_layers": 6,
+  "decoder_n_points": 4,
+  "disable_custom_kernels": true,
+  "dropout": 0.0,
+  "encode_proj_layers": [
+    2
+  ],
+  "encoder_activation_function": "gelu",
+  "encoder_attention_heads": 8,
+  "encoder_ffn_dim": 1024,
+  "encoder_hidden_dim": 256,
+  "encoder_in_channels": [
+    512,
+    1024,
+    2048
+  ],
+  "encoder_layers": 1,
+  "eos_coefficient": 0.0001,
+  "eval_size": null,
+  "feature_strides": [
+    8,
+    16,
+    32
+  ],
+  "hidden_expansion": 1.0,
+  "id2label": {
+    "0": "abstract",
+    "1": "algorithm",
+    "2": "aside_text",
+    "3": "chart",
+    "4": "content",
+    "5": "formula",
+    "6": "doc_title",
+    "7": "figure_title",
+    "8": "footer",
+    "9": "footer",
+    "10": "footnote",
+    "11": "formula_number",
+    "12": "header",
+    "13": "header",
+    "14": "image",
+    "15": "formula",
+    "16": "number",
+    "17": "paragraph_title",
+    "18": "reference",
+    "19": "reference_content",
+    "20": "seal",
+    "21": "table",
+    "22": "text",
+    "23": "text",
+    "24": "vision_footnote"
+  },
+  "initializer_range": 0.01,
+  "is_encoder_decoder": true,
+  "label2id": {},
+  "label_noise_ratio": 0.5,
+  "layer_norm_eps": 1e-05,
+  "learn_initial_query": false,
+  "matcher_alpha": 0.25,
+  "matcher_bbox_cost": 5.0,
+  "matcher_class_cost": 2.0,
+  "matcher_gamma": 2.0,
+  "matcher_giou_cost": 2.0,
+  "model_type": "pp_doclayout_v3",
+  "normalize_before": false,
+  "num_denoising": 100,
+  "num_feature_levels": 3,
+  "num_queries": 300,
+  "positional_encoding_temperature": 10000,
+  "torch_dtype": "float32",
+  "use_pretrained_backbone": false,
+  "use_timm_backbone": false,
+  "global_pointer_head_size": 64,
+  "mask_feature_channels": [64, 64],
+  "x4_feat_dim": 128
+}

inference.yml ADDED Viewed

	@@ -0,0 +1,100 @@

+mode: paddle
+draw_threshold: 0.5
+metric: COCO
+use_dynamic_shape: false
+Global:
+  model_name: PP-DocLayoutV3
+arch: DETR
+min_subgraph_size: 3
+Preprocess:
+- interp: 2
+  keep_ratio: false
+  target_size:
+  - 800
+  - 800
+  type: Resize
+- mean:
+  - 0.0
+  - 0.0
+  - 0.0
+  norm_type: none
+  std:
+  - 1.0
+  - 1.0
+  - 1.0
+  type: NormalizeImage
+- type: Permute
+label_list:
+- abstract
+- algorithm
+- aside_text
+- chart
+- content
+- display_formula
+- doc_title
+- figure_title
+- footer
+- footer_image
+- footnote
+- formula_number
+- header
+- header_image
+- image
+- inline_formula
+- number
+- paragraph_title
+- reference
+- reference_content
+- seal
+- table
+- text
+- vertical_text
+- vision_footnote
+Hpi:
+  backend_configs:
+    paddle_infer:
+      trt_dynamic_shapes: &id001
+        image:
+        - - 1
+          - 3
+          - 800
+          - 800
+        - - 1
+          - 3
+          - 800
+          - 800
+        - - 8
+          - 3
+          - 800
+          - 800
+        scale_factor:
+        - - 1
+          - 2
+        - - 1
+          - 2
+        - - 8
+          - 2
+      trt_dynamic_shape_input_data:
+        scale_factor:
+        - - 2
+          - 2
+        - - 1
+          - 1
+        - - 0.67
+          - 0.67
+          - 0.67
+          - 0.67
+          - 0.67
+          - 0.67
+          - 0.67
+          - 0.67
+          - 0.67
+          - 0.67
+          - 0.67
+          - 0.67
+          - 0.67
+          - 0.67
+          - 0.67
+          - 0.67
+    tensorrt:
+      dynamic_shapes: *id001

model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1fdd41ef5d97509e7b654d4d52bfd7eb741d7a746b3227fbf0ac767873b3c569
+size 133666194

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_valid_processor_keys": [
+    "images",
+    "do_resize",
+    "size",
+    "resample",
+    "do_rescale",
+    "rescale_factor",
+    "do_normalize",
+    "image_mean",
+    "image_std",
+    "return_tensors",
+    "data_format",
+    "input_data_format"
+  ],
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0,
+    0,
+    0
+  ],
+  "image_processor_type": "PPDocLayoutV3ImageProcessor",
+  "image_std": [
+    1,
+    1,
+    1
+  ],
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 800,
+    "width": 800
+  }
+}