Norm
/

ERNIE-Layout-Pytorch

Model card Files Files and versions

Norm commited on Dec 28, 2022

Commit

bf6623b

·

1 Parent(s): 50fd90a

Update README.md

Files changed (1) hide show

README.md +26 -5

README.md CHANGED Viewed

@@ -10,21 +10,40 @@ license: afl-3.0
 **A Quick Example**
 ```python
 from networks.modeling_erine_layout import ErnieLayoutConfig, ErnieLayoutForQuestionAnswering
 from networks.tokenizer import ErnieLayoutTokenizer
 pretrain_torch_model_or_path = "path/to/pretrained-model"
 # initialize tokenizer
 tokenizer = ErnieLayoutTokenizer.from_pretrained(pretrained_model_name_or_path=pretrain_torch_model_or_path)
-encodings = tokenizer.encode_plus(text="Question", text_pair="Answer", add_special_tokens=True,
-                                  max_length=512, truncation="only_second",
-                                  return_offsets_mapping=True, return_attention_mask=True,
-                                  return_overflowing_tokens=True)
 # initialize config
 config = ErnieLayoutConfig.from_pretrained(pretrained_model_name_or_path=pretrain_torch_model_or_path)
-config.num_classes = 2
 # initialize ERNIE for VQA
 model = ErnieLayoutForQuestionAnswering.from_pretrained(
@@ -32,4 +51,6 @@ model = ErnieLayoutForQuestionAnswering.from_pretrained(
     config=config,
 )
 ```

 **A Quick Example**
 ```python
 from networks.modeling_erine_layout import ErnieLayoutConfig, ErnieLayoutForQuestionAnswering
+from networks.feature_extractor import ErnieFeatureExtractor
 from networks.tokenizer import ErnieLayoutTokenizer
+from networks.model_util import ernie_qa_tokenize, prepare_context_info
+from PIL import Image
 pretrain_torch_model_or_path = "path/to/pretrained-model"
 # initialize tokenizer
 tokenizer = ErnieLayoutTokenizer.from_pretrained(pretrained_model_name_or_path=pretrain_torch_model_or_path)
+context = ['This is an example document', 'All ocr boxes are inserted into this list']
+layout = [[381, 91, 505, 115], [738, 96, 804, 122]]
+# intialize feature extractor
+feature_extractor = ErnieFeatureExtractor()
+# open the image of the document
+pil_image = Image.open("/path/to/image").convert("RGB")
+# Process image
+tokenized_res['pixel_values'] = feature_extractor(pil_image)
+# Tokenize context & questions
+context_encodings, = prepare_context_info(tokenizer, context, layout)
+question = "what is it?"
+tokenized_res = ernie_qa_tokenize(tokenizer, question, context_encodings)
+# answer start && end index
+tokenized_res['start_positions'] = 6
+tokenized_res['end_positions'] = 12
 # initialize config
 config = ErnieLayoutConfig.from_pretrained(pretrained_model_name_or_path=pretrain_torch_model_or_path)
+config.num_classes = 2 # start and end
 # initialize ERNIE for VQA
 model = ErnieLayoutForQuestionAnswering.from_pretrained(
     config=config,
 )
+output = model(**tokenized_res)
 ```