Upload tokenizer
Browse files- special_tokens_map.json +21 -3
- tokenizer.json +4 -2
special_tokens_map.json
CHANGED
|
@@ -101,7 +101,25 @@
|
|
| 101 |
"<extra_id_98>",
|
| 102 |
"<extra_id_99>"
|
| 103 |
],
|
| 104 |
-
"eos_token":
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
}
|
|
|
|
| 101 |
"<extra_id_98>",
|
| 102 |
"<extra_id_99>"
|
| 103 |
],
|
| 104 |
+
"eos_token": {
|
| 105 |
+
"content": "</s>",
|
| 106 |
+
"lstrip": false,
|
| 107 |
+
"normalized": false,
|
| 108 |
+
"rstrip": false,
|
| 109 |
+
"single_word": false
|
| 110 |
+
},
|
| 111 |
+
"pad_token": {
|
| 112 |
+
"content": "<pad>",
|
| 113 |
+
"lstrip": false,
|
| 114 |
+
"normalized": false,
|
| 115 |
+
"rstrip": false,
|
| 116 |
+
"single_word": false
|
| 117 |
+
},
|
| 118 |
+
"unk_token": {
|
| 119 |
+
"content": "<unk>",
|
| 120 |
+
"lstrip": false,
|
| 121 |
+
"normalized": false,
|
| 122 |
+
"rstrip": false,
|
| 123 |
+
"single_word": false
|
| 124 |
+
}
|
| 125 |
}
|
tokenizer.json
CHANGED
|
@@ -964,7 +964,8 @@
|
|
| 964 |
"pre_tokenizer": {
|
| 965 |
"type": "Metaspace",
|
| 966 |
"replacement": "▁",
|
| 967 |
-
"add_prefix_space": true
|
|
|
|
| 968 |
},
|
| 969 |
"post_processor": {
|
| 970 |
"type": "TemplateProcessing",
|
|
@@ -1023,7 +1024,8 @@
|
|
| 1023 |
"decoder": {
|
| 1024 |
"type": "Metaspace",
|
| 1025 |
"replacement": "▁",
|
| 1026 |
-
"add_prefix_space": true
|
|
|
|
| 1027 |
},
|
| 1028 |
"model": {
|
| 1029 |
"type": "Unigram",
|
|
|
|
| 964 |
"pre_tokenizer": {
|
| 965 |
"type": "Metaspace",
|
| 966 |
"replacement": "▁",
|
| 967 |
+
"add_prefix_space": true,
|
| 968 |
+
"prepend_scheme": "always"
|
| 969 |
},
|
| 970 |
"post_processor": {
|
| 971 |
"type": "TemplateProcessing",
|
|
|
|
| 1024 |
"decoder": {
|
| 1025 |
"type": "Metaspace",
|
| 1026 |
"replacement": "▁",
|
| 1027 |
+
"add_prefix_space": true,
|
| 1028 |
+
"prepend_scheme": "always"
|
| 1029 |
},
|
| 1030 |
"model": {
|
| 1031 |
"type": "Unigram",
|