Upload tokenizer
Browse files- special_tokens_map.json +5 -29
- tokenizer_config.json +9 -6
special_tokens_map.json
CHANGED
|
@@ -1008,7 +1008,11 @@
|
|
| 1008 |
"<|LOC_SEP|>",
|
| 1009 |
"<|CROP_COL_SEP|>",
|
| 1010 |
"<|CROP_ROW_SEP|>",
|
| 1011 |
-
"<|IMAGE_SEP|>"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1012 |
],
|
| 1013 |
"bos_token": {
|
| 1014 |
"content": "<s>",
|
|
@@ -1031,20 +1035,6 @@
|
|
| 1031 |
"rstrip": false,
|
| 1032 |
"single_word": false
|
| 1033 |
},
|
| 1034 |
-
"header_end_token": {
|
| 1035 |
-
"content": "<mask:7>",
|
| 1036 |
-
"lstrip": false,
|
| 1037 |
-
"normalized": false,
|
| 1038 |
-
"rstrip": false,
|
| 1039 |
-
"single_word": false
|
| 1040 |
-
},
|
| 1041 |
-
"header_start_token": {
|
| 1042 |
-
"content": "<mask:6>",
|
| 1043 |
-
"lstrip": false,
|
| 1044 |
-
"normalized": false,
|
| 1045 |
-
"rstrip": false,
|
| 1046 |
-
"single_word": false
|
| 1047 |
-
},
|
| 1048 |
"mask_token": {
|
| 1049 |
"content": "<mask:1>",
|
| 1050 |
"lstrip": false,
|
|
@@ -1066,20 +1056,6 @@
|
|
| 1066 |
"rstrip": false,
|
| 1067 |
"single_word": false
|
| 1068 |
},
|
| 1069 |
-
"sys_end_token": {
|
| 1070 |
-
"content": "<mask:5>",
|
| 1071 |
-
"lstrip": false,
|
| 1072 |
-
"normalized": false,
|
| 1073 |
-
"rstrip": false,
|
| 1074 |
-
"single_word": false
|
| 1075 |
-
},
|
| 1076 |
-
"sys_start_token": {
|
| 1077 |
-
"content": "<mask:4>",
|
| 1078 |
-
"lstrip": false,
|
| 1079 |
-
"normalized": false,
|
| 1080 |
-
"rstrip": false,
|
| 1081 |
-
"single_word": false
|
| 1082 |
-
},
|
| 1083 |
"unk_token": {
|
| 1084 |
"content": "<unk>",
|
| 1085 |
"lstrip": false,
|
|
|
|
| 1008 |
"<|LOC_SEP|>",
|
| 1009 |
"<|CROP_COL_SEP|>",
|
| 1010 |
"<|CROP_ROW_SEP|>",
|
| 1011 |
+
"<|IMAGE_SEP|>",
|
| 1012 |
+
"<mask:4>",
|
| 1013 |
+
"<mask:5>",
|
| 1014 |
+
"<mask:6>",
|
| 1015 |
+
"<mask:7>"
|
| 1016 |
],
|
| 1017 |
"bos_token": {
|
| 1018 |
"content": "<s>",
|
|
|
|
| 1035 |
"rstrip": false,
|
| 1036 |
"single_word": false
|
| 1037 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1038 |
"mask_token": {
|
| 1039 |
"content": "<mask:1>",
|
| 1040 |
"lstrip": false,
|
|
|
|
| 1056 |
"rstrip": false,
|
| 1057 |
"single_word": false
|
| 1058 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1059 |
"unk_token": {
|
| 1060 |
"content": "<unk>",
|
| 1061 |
"lstrip": false,
|
tokenizer_config.json
CHANGED
|
@@ -9245,22 +9245,25 @@
|
|
| 9245 |
"<|LOC_SEP|>",
|
| 9246 |
"<|CROP_COL_SEP|>",
|
| 9247 |
"<|CROP_ROW_SEP|>",
|
| 9248 |
-
"<|IMAGE_SEP|>"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9249 |
],
|
| 9250 |
"bos_token": "<s>",
|
| 9251 |
"clean_up_tokenization_spaces": false,
|
| 9252 |
"cls_token": "<|begin_of_sentence|>",
|
| 9253 |
"eos_token": "</s>",
|
| 9254 |
"extra_special_tokens": {},
|
| 9255 |
-
"
|
| 9256 |
-
"header_start_token": "<mask:6>",
|
| 9257 |
"mask_token": "<mask:1>",
|
| 9258 |
"model_max_length": 131072,
|
| 9259 |
"pad_token": "<unk>",
|
| 9260 |
"sep_token": "<|end_of_sentence|>",
|
|
|
|
| 9261 |
"spaces_between_special_tokens": false,
|
| 9262 |
-
"sys_end_token": "<mask:5>",
|
| 9263 |
-
"sys_start_token": "<mask:4>",
|
| 9264 |
"tokenizer_class": "Ernie4_5Tokenizer",
|
| 9265 |
-
"unk_token": "<unk>"
|
|
|
|
| 9266 |
}
|
|
|
|
| 9245 |
"<|LOC_SEP|>",
|
| 9246 |
"<|CROP_COL_SEP|>",
|
| 9247 |
"<|CROP_ROW_SEP|>",
|
| 9248 |
+
"<|IMAGE_SEP|>",
|
| 9249 |
+
"<mask:4>",
|
| 9250 |
+
"<mask:5>",
|
| 9251 |
+
"<mask:6>",
|
| 9252 |
+
"<mask:7>"
|
| 9253 |
],
|
| 9254 |
"bos_token": "<s>",
|
| 9255 |
"clean_up_tokenization_spaces": false,
|
| 9256 |
"cls_token": "<|begin_of_sentence|>",
|
| 9257 |
"eos_token": "</s>",
|
| 9258 |
"extra_special_tokens": {},
|
| 9259 |
+
"legacy": true,
|
|
|
|
| 9260 |
"mask_token": "<mask:1>",
|
| 9261 |
"model_max_length": 131072,
|
| 9262 |
"pad_token": "<unk>",
|
| 9263 |
"sep_token": "<|end_of_sentence|>",
|
| 9264 |
+
"sp_model_kwargs": {},
|
| 9265 |
"spaces_between_special_tokens": false,
|
|
|
|
|
|
|
| 9266 |
"tokenizer_class": "Ernie4_5Tokenizer",
|
| 9267 |
+
"unk_token": "<unk>",
|
| 9268 |
+
"use_default_system_prompt": false
|
| 9269 |
}
|