AntonV HF Staff commited on
Commit
b9e8bc7
·
verified ·
1 Parent(s): 0842879

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +5 -29
  2. tokenizer_config.json +9 -6
special_tokens_map.json CHANGED
@@ -1008,7 +1008,11 @@
1008
  "<|LOC_SEP|>",
1009
  "<|CROP_COL_SEP|>",
1010
  "<|CROP_ROW_SEP|>",
1011
- "<|IMAGE_SEP|>"
 
 
 
 
1012
  ],
1013
  "bos_token": {
1014
  "content": "<s>",
@@ -1031,20 +1035,6 @@
1031
  "rstrip": false,
1032
  "single_word": false
1033
  },
1034
- "header_end_token": {
1035
- "content": "<mask:7>",
1036
- "lstrip": false,
1037
- "normalized": false,
1038
- "rstrip": false,
1039
- "single_word": false
1040
- },
1041
- "header_start_token": {
1042
- "content": "<mask:6>",
1043
- "lstrip": false,
1044
- "normalized": false,
1045
- "rstrip": false,
1046
- "single_word": false
1047
- },
1048
  "mask_token": {
1049
  "content": "<mask:1>",
1050
  "lstrip": false,
@@ -1066,20 +1056,6 @@
1066
  "rstrip": false,
1067
  "single_word": false
1068
  },
1069
- "sys_end_token": {
1070
- "content": "<mask:5>",
1071
- "lstrip": false,
1072
- "normalized": false,
1073
- "rstrip": false,
1074
- "single_word": false
1075
- },
1076
- "sys_start_token": {
1077
- "content": "<mask:4>",
1078
- "lstrip": false,
1079
- "normalized": false,
1080
- "rstrip": false,
1081
- "single_word": false
1082
- },
1083
  "unk_token": {
1084
  "content": "<unk>",
1085
  "lstrip": false,
 
1008
  "<|LOC_SEP|>",
1009
  "<|CROP_COL_SEP|>",
1010
  "<|CROP_ROW_SEP|>",
1011
+ "<|IMAGE_SEP|>",
1012
+ "<mask:4>",
1013
+ "<mask:5>",
1014
+ "<mask:6>",
1015
+ "<mask:7>"
1016
  ],
1017
  "bos_token": {
1018
  "content": "<s>",
 
1035
  "rstrip": false,
1036
  "single_word": false
1037
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1038
  "mask_token": {
1039
  "content": "<mask:1>",
1040
  "lstrip": false,
 
1056
  "rstrip": false,
1057
  "single_word": false
1058
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1059
  "unk_token": {
1060
  "content": "<unk>",
1061
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -9245,22 +9245,25 @@
9245
  "<|LOC_SEP|>",
9246
  "<|CROP_COL_SEP|>",
9247
  "<|CROP_ROW_SEP|>",
9248
- "<|IMAGE_SEP|>"
 
 
 
 
9249
  ],
9250
  "bos_token": "<s>",
9251
  "clean_up_tokenization_spaces": false,
9252
  "cls_token": "<|begin_of_sentence|>",
9253
  "eos_token": "</s>",
9254
  "extra_special_tokens": {},
9255
- "header_end_token": "<mask:7>",
9256
- "header_start_token": "<mask:6>",
9257
  "mask_token": "<mask:1>",
9258
  "model_max_length": 131072,
9259
  "pad_token": "<unk>",
9260
  "sep_token": "<|end_of_sentence|>",
 
9261
  "spaces_between_special_tokens": false,
9262
- "sys_end_token": "<mask:5>",
9263
- "sys_start_token": "<mask:4>",
9264
  "tokenizer_class": "Ernie4_5Tokenizer",
9265
- "unk_token": "<unk>"
 
9266
  }
 
9245
  "<|LOC_SEP|>",
9246
  "<|CROP_COL_SEP|>",
9247
  "<|CROP_ROW_SEP|>",
9248
+ "<|IMAGE_SEP|>",
9249
+ "<mask:4>",
9250
+ "<mask:5>",
9251
+ "<mask:6>",
9252
+ "<mask:7>"
9253
  ],
9254
  "bos_token": "<s>",
9255
  "clean_up_tokenization_spaces": false,
9256
  "cls_token": "<|begin_of_sentence|>",
9257
  "eos_token": "</s>",
9258
  "extra_special_tokens": {},
9259
+ "legacy": true,
 
9260
  "mask_token": "<mask:1>",
9261
  "model_max_length": 131072,
9262
  "pad_token": "<unk>",
9263
  "sep_token": "<|end_of_sentence|>",
9264
+ "sp_model_kwargs": {},
9265
  "spaces_between_special_tokens": false,
 
 
9266
  "tokenizer_class": "Ernie4_5Tokenizer",
9267
+ "unk_token": "<unk>",
9268
+ "use_default_system_prompt": false
9269
  }