Autopus
/

global_kdda_index_v2

PyTorch

layoutlmv2

Model card Files Files and versions

xet

Community

Autopus commited on Aug 14, 2024

Commit

6f87f8f

verified ·

1 Parent(s): e0a7e63

Upload 3 files

Browse files

Files changed (3) hide show

README.md +101 -3
config.json +166 -0
pytorch_model.bin +3 -0

README.md CHANGED Viewed

@@ -1,3 +1,101 @@
----
-license: apache-2.0
----

+---
+title: KDDA Global Model - Invoices
+emoji: 🐨
+---
+# Configuration
+`title`: _string_
+Display title for the Space
+`emoji`: _string_
+Space emoji (emoji-only character allowed)
+`colorFrom`: _string_
+Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
+`colorTo`: _string_
+Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
+`sdk`: _string_
+Can be either `gradio` or `streamlit`
+`app_file`: _string_
+Path to your main application file (which contains either `gradio` or `streamlit` Python code).
+Path is relative to the root of the repository.
+`pinned`: _boolean_
+Whether the Space stays on top of your list.
+# Custom LayoutLM Model for Invoice Processing
+This repository hosts a custom implementation of the [LayoutLM](https://huggingface.co/microsoft/layoutlm-base-uncased) model, specifically fine-tuned for extracting key information from invoices. The model is designed to identify and extract various fields such as amounts, dates, and names from invoice documents.
+## Model Overview
+This model is based on the LayoutLMv2 architecture and has been fine-tuned on a custom dataset of invoices. It is capable of performing token classification to extract the following entities:
+- **Amount Including Tax**
+- **Due Date**
+- **Reference Number**
+- **Customer Name**
+- **Vendor Name**
+- **Issue Date**
+- **Amount**
+The model uses a custom set of labels to identify and classify these entities within the invoice documents.
+## Label Mapping
+The model has been trained with the following `label2id` and `id2label` mappings:
+### `label2id` Mapping
+```json
+{
+    "I-Amount Including tax": 0,
+    "I-Due Date": 1,
+    "I-Reference Number": 2,
+    "B-Amount Including tax": 3,
+    "I-Customer Name": 4,
+    "O": 5,
+    "I-Issue Date": 6,
+    "B-Amount": 7,
+    "B-Vendor Name": 8,
+    "I-Vendor Name": 9,
+    "B-Customer Name": 10,
+    "B-Due Date": 11,
+    "I-Amount": 12,
+    "B-Reference Number": 13,
+    "B-Issue Date": 14
+}
+### `label2id` Mapping
+{
+    0: "I-Amount Including tax",
+    1: "I-Due Date",
+    2: "I-Reference Number",
+    3: "B-Amount Including tax",
+    4: "I-Customer Name",
+    5: "O",
+    6: "I-Issue Date",
+    7: "B-Amount",
+    8: "B-Vendor Name",
+    9: "I-Vendor Name",
+    10: "B-Customer Name",
+    11: "B-Due Date",
+    12: "I-Amount",
+    13: "B-Reference Number",
+    14: "B-Issue Date"
+}
+## Citation
+@article{Xu2020LayoutLMv2MP,
+  title={LayoutLMv2: Multi-modal Pre-training for Visually-Rich Document Understanding},
+  author={Yiheng Xu and Minghao Li and Lei Cui and Shaohan Huang and Furu Wei and Ming Zhou},
+  journal={ArXiv},
+  year={2020},
+  volume={abs/2012.14740}
+}

config.json ADDED Viewed

	@@ -0,0 +1,166 @@

+{
+  "_name_or_path": "/content/drive/MyDrive/ner/Train_10/Checkpoints",
+  "architectures": [
+    "LayoutLMv2ForTokenClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "convert_sync_batchnorm": true,
+  "coordinate_size": 128,
+  "detectron2_config_args": {
+    "MODEL.ANCHOR_GENERATOR.SIZES": [
+      [
+        32
+      ],
+      [
+        64
+      ],
+      [
+        128
+      ],
+      [
+        256
+      ],
+      [
+        512
+      ]
+    ],
+    "MODEL.BACKBONE.NAME": "build_resnet_fpn_backbone",
+    "MODEL.FPN.IN_FEATURES": [
+      "res2",
+      "res3",
+      "res4",
+      "res5"
+    ],
+    "MODEL.MASK_ON": true,
+    "MODEL.PIXEL_STD": [
+      57.375,
+      57.12,
+      58.395
+    ],
+    "MODEL.POST_NMS_TOPK_TEST": 1000,
+    "MODEL.RESNETS.ASPECT_RATIOS": [
+      [
+        0.5,
+        1.0,
+        2.0
+      ]
+    ],
+    "MODEL.RESNETS.DEPTH": 101,
+    "MODEL.RESNETS.NUM_GROUPS": 32,
+    "MODEL.RESNETS.OUT_FEATURES": [
+      "res2",
+      "res3",
+      "res4",
+      "res5"
+    ],
+    "MODEL.RESNETS.SIZES": [
+      [
+        32
+      ],
+      [
+        64
+      ],
+      [
+        128
+      ],
+      [
+        256
+      ],
+      [
+        512
+      ]
+    ],
+    "MODEL.RESNETS.STRIDE_IN_1X1": false,
+    "MODEL.RESNETS.WIDTH_PER_GROUP": 8,
+    "MODEL.ROI_BOX_HEAD.NAME": "FastRCNNConvFCHead",
+    "MODEL.ROI_BOX_HEAD.NUM_FC": 2,
+    "MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION": 14,
+    "MODEL.ROI_HEADS.IN_FEATURES": [
+      "p2",
+      "p3",
+      "p4",
+      "p5"
+    ],
+    "MODEL.ROI_HEADS.NAME": "StandardROIHeads",
+    "MODEL.ROI_HEADS.NUM_CLASSES": 5,
+    "MODEL.ROI_MASK_HEAD.NAME": "MaskRCNNConvUpsampleHead",
+    "MODEL.ROI_MASK_HEAD.NUM_CONV": 4,
+    "MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION": 7,
+    "MODEL.RPN.IN_FEATURES": [
+      "p2",
+      "p3",
+      "p4",
+      "p5",
+      "p6"
+    ],
+    "MODEL.RPN.POST_NMS_TOPK_TRAIN": 1000,
+    "MODEL.RPN.PRE_NMS_TOPK_TEST": 1000,
+    "MODEL.RPN.PRE_NMS_TOPK_TRAIN": 2000
+  },
+  "fast_qkv": true,
+  "gradient_checkpointing": false,
+  "has_relative_attention_bias": true,
+  "has_spatial_attention_bias": true,
+  "has_visual_segment_embedding": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6",
+    "7": "LABEL_7",
+    "8": "LABEL_8",
+    "9": "LABEL_9",
+    "10": "LABEL_10",
+    "11": "LABEL_11",
+    "12": "LABEL_12",
+    "13": "LABEL_13",
+    "14": "LABEL_14"
+  },
+  "image_feature_pool_shape": [
+    7,
+    7,
+    256
+  ],
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_10": 10,
+    "LABEL_11": 11,
+    "LABEL_12": 12,
+    "LABEL_13": 13,
+    "LABEL_14": 14,
+    "LABEL_2": 2,
+    "LABEL_3": 3,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6,
+    "LABEL_7": 7,
+    "LABEL_8": 8,
+    "LABEL_9": 9
+  },
+  "layer_norm_eps": 1e-12,
+  "max_2d_position_embeddings": 1024,
+  "max_position_embeddings": 512,
+  "max_rel_2d_pos": 256,
+  "max_rel_pos": 128,
+  "model_type": "layoutlmv2",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "rel_2d_pos_bins": 64,
+  "rel_pos_bins": 32,
+  "shape_size": 128,
+  "torch_dtype": "float32",
+  "transformers_version": "4.10.0.dev0",
+  "type_vocab_size": 2,
+  "vocab_size": 30522
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5cfefbe82fb24c5651c60693b88177701fce1d50de5df5644c1f32194e8f3990
+size 802294051