Inference-comparison-APP-Document-Understanding

Runtime error

pierreguillou commited on Apr 4, 2023

Commit

d2924de

1 Parent(s): 2618251

Update files/functions.py

Files changed (1) hide show

files/functions.py CHANGED Viewed

@@ -68,27 +68,36 @@ label2color = {
 # bounding boxes start and end of a sequence
 cls_box = [0, 0, 0, 0]
-sep_box = [1000, 1000, 1000, 1000]
-# model
-model_id = "pierreguillou/layout-xlm-base-finetuned-with-DocLayNet-base-at-paragraphlevel-ml512"
-# tokenizer
-tokenizer_id = "xlm-roberta-base"
 # (tokenization) The maximum length of a feature (sequence)
-if str(384) in model_id:
-  max_length = 384
-elif str(512) in model_id:
-  max_length = 512
 else:
-  print("Error with max_length of chunks!")
 # (tokenization) overlap
 doc_stride = 128 # The authorized overlap between two part of the context when splitting it is needed.
 # max PDF page images that will be displayed
-max_imgboxes = 2
 # get files
 examples_dir = 'files/'
@@ -97,7 +106,7 @@ from huggingface_hub import hf_hub_download
 files = ["example.pdf", "blank.pdf", "blank.png", "languages_iso.csv", "languages_tesseract.csv", "wo_content.png"]
 for file_name in files:
     path_to_file = hf_hub_download(
-        repo_id = "pierreguillou/Inference-APP-Document-Understanding-at-paragraphlevel-v2",
         filename = "files/" + file_name,
         repo_type = "space"
         )

 # bounding boxes start and end of a sequence
 cls_box = [0, 0, 0, 0]
+sep_box_lilt = cls_box
+sep_box_layoutxlm = [1000, 1000, 1000, 1000]
+# models
+model_id_lilt = "pierreguillou/lilt-xlm-roberta-base-finetuned-with-DocLayNet-base-at-paragraphlevel-ml512"
+model_id_layoutxlm = "pierreguillou/layout-xlm-base-finetuned-with-DocLayNet-base-at-paragraphlevel-ml512"
+# tokenizer for LayoutXLM
+tokenizer_id_layoutxlm = "xlm-roberta-base"
 # (tokenization) The maximum length of a feature (sequence)
+if str(384) in model_id_lilt:
+  max_length_lilt = 384
+elif str(512) in model_id_lilt:
+  max_length_lilt = 512
+else:
+  print("Error with max_length_lilt of chunks!")
+if str(384) in model_id_layoutxlm:
+  max_length_layoutxlm = 384
+elif str(512) in model_id_layoutxlm:
+  max_length_layoutxlm = 512
 else:
+  print("Error with max_length_layoutxlm of chunks!")
 # (tokenization) overlap
 doc_stride = 128 # The authorized overlap between two part of the context when splitting it is needed.
 # max PDF page images that will be displayed
+max_imgboxes = 1
 # get files
 examples_dir = 'files/'
 files = ["example.pdf", "blank.pdf", "blank.png", "languages_iso.csv", "languages_tesseract.csv", "wo_content.png"]
 for file_name in files:
     path_to_file = hf_hub_download(
+        repo_id = "pierreguillou/Inference-comparison-APP-Document-Understanding-at-paragraphlevel-v1",
         filename = "files/" + file_name,
         repo_type = "space"
         )