Upload tokenizer
Browse files- added_tokens.json +1 -2
- special_tokens_map.json +1 -0
- spiece.model +2 -2
- tokenizer_config.json +20 -8
added_tokens.json
CHANGED
|
@@ -98,6 +98,5 @@
|
|
| 98 |
"<extra_id_97>": 1002,
|
| 99 |
"<extra_id_98>": 1001,
|
| 100 |
"<extra_id_99>": 1000,
|
| 101 |
-
"<extra_id_9>": 1090
|
| 102 |
-
"<pad>": 1100
|
| 103 |
}
|
|
|
|
| 98 |
"<extra_id_97>": 1002,
|
| 99 |
"<extra_id_98>": 1001,
|
| 100 |
"<extra_id_99>": 1000,
|
| 101 |
+
"<extra_id_9>": 1090
|
|
|
|
| 102 |
}
|
special_tokens_map.json
CHANGED
|
@@ -101,6 +101,7 @@
|
|
| 101 |
"<extra_id_98>",
|
| 102 |
"<extra_id_99>"
|
| 103 |
],
|
|
|
|
| 104 |
"eos_token": {
|
| 105 |
"content": "</s>",
|
| 106 |
"lstrip": false,
|
|
|
|
| 101 |
"<extra_id_98>",
|
| 102 |
"<extra_id_99>"
|
| 103 |
],
|
| 104 |
+
"bos_token": "<s>",
|
| 105 |
"eos_token": {
|
| 106 |
"content": "</s>",
|
| 107 |
"lstrip": false,
|
spiece.model
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d862dc059c77d41f3108bbbd7578cc6d4f80fe3306247647c77c6747a39ef717
|
| 3 |
+
size 253584
|
tokenizer_config.json
CHANGED
|
@@ -2,6 +2,14 @@
|
|
| 2 |
"add_prefix_space": true,
|
| 3 |
"added_tokens_decoder": {
|
| 4 |
"0": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"content": "<unk>",
|
| 6 |
"lstrip": false,
|
| 7 |
"normalized": false,
|
|
@@ -10,6 +18,14 @@
|
|
| 10 |
"special": true
|
| 11 |
},
|
| 12 |
"2": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
"content": "</s>",
|
| 14 |
"lstrip": false,
|
| 15 |
"normalized": false,
|
|
@@ -816,14 +832,6 @@
|
|
| 816 |
"rstrip": true,
|
| 817 |
"single_word": false,
|
| 818 |
"special": true
|
| 819 |
-
},
|
| 820 |
-
"1100": {
|
| 821 |
-
"content": "<pad>",
|
| 822 |
-
"lstrip": false,
|
| 823 |
-
"normalized": false,
|
| 824 |
-
"rstrip": false,
|
| 825 |
-
"single_word": false,
|
| 826 |
-
"special": true
|
| 827 |
}
|
| 828 |
},
|
| 829 |
"additional_special_tokens": [
|
|
@@ -928,12 +936,16 @@
|
|
| 928 |
"<extra_id_98>",
|
| 929 |
"<extra_id_99>"
|
| 930 |
],
|
|
|
|
| 931 |
"clean_up_tokenization_spaces": true,
|
|
|
|
|
|
|
| 932 |
"eos_token": "</s>",
|
| 933 |
"extra_ids": 100,
|
| 934 |
"legacy": true,
|
| 935 |
"model_max_length": 1000000000000000019884624838656,
|
| 936 |
"pad_token": "<pad>",
|
|
|
|
| 937 |
"sp_model_kwargs": {},
|
| 938 |
"tokenizer_class": "T5Tokenizer",
|
| 939 |
"unk_token": "<unk>"
|
|
|
|
| 2 |
"add_prefix_space": true,
|
| 3 |
"added_tokens_decoder": {
|
| 4 |
"0": {
|
| 5 |
+
"content": "<pad>",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": false,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
},
|
| 12 |
+
"1": {
|
| 13 |
"content": "<unk>",
|
| 14 |
"lstrip": false,
|
| 15 |
"normalized": false,
|
|
|
|
| 18 |
"special": true
|
| 19 |
},
|
| 20 |
"2": {
|
| 21 |
+
"content": "<s>",
|
| 22 |
+
"lstrip": false,
|
| 23 |
+
"normalized": false,
|
| 24 |
+
"rstrip": false,
|
| 25 |
+
"single_word": false,
|
| 26 |
+
"special": true
|
| 27 |
+
},
|
| 28 |
+
"3": {
|
| 29 |
"content": "</s>",
|
| 30 |
"lstrip": false,
|
| 31 |
"normalized": false,
|
|
|
|
| 832 |
"rstrip": true,
|
| 833 |
"single_word": false,
|
| 834 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 835 |
}
|
| 836 |
},
|
| 837 |
"additional_special_tokens": [
|
|
|
|
| 936 |
"<extra_id_98>",
|
| 937 |
"<extra_id_99>"
|
| 938 |
],
|
| 939 |
+
"bos_token": "<s>",
|
| 940 |
"clean_up_tokenization_spaces": true,
|
| 941 |
+
"do_basic_tokenize": true,
|
| 942 |
+
"do_lower_case": true,
|
| 943 |
"eos_token": "</s>",
|
| 944 |
"extra_ids": 100,
|
| 945 |
"legacy": true,
|
| 946 |
"model_max_length": 1000000000000000019884624838656,
|
| 947 |
"pad_token": "<pad>",
|
| 948 |
+
"padding": true,
|
| 949 |
"sp_model_kwargs": {},
|
| 950 |
"tokenizer_class": "T5Tokenizer",
|
| 951 |
"unk_token": "<unk>"
|