Upload tokenizer
Browse files- tokenizer.json +2 -2
- tokenizer_config.json +4 -9
tokenizer.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0240ce510f08e6c2041724e9043e33be9d251d1e4a4d94eb68cd47b954b61d2
|
| 3 |
+
size 17078292
|
tokenizer_config.json
CHANGED
|
@@ -84,7 +84,7 @@
|
|
| 84 |
"special": true
|
| 85 |
},
|
| 86 |
"10": {
|
| 87 |
-
"content": "<
|
| 88 |
"lstrip": false,
|
| 89 |
"normalized": false,
|
| 90 |
"rstrip": false,
|
|
@@ -92,7 +92,7 @@
|
|
| 92 |
"special": true
|
| 93 |
},
|
| 94 |
"11": {
|
| 95 |
-
"content": "
|
| 96 |
"lstrip": false,
|
| 97 |
"normalized": false,
|
| 98 |
"rstrip": false,
|
|
@@ -100,7 +100,7 @@
|
|
| 100 |
"special": true
|
| 101 |
},
|
| 102 |
"12": {
|
| 103 |
-
"content": "
|
| 104 |
"lstrip": false,
|
| 105 |
"normalized": false,
|
| 106 |
"rstrip": false,
|
|
@@ -108,7 +108,7 @@
|
|
| 108 |
"special": true
|
| 109 |
},
|
| 110 |
"13": {
|
| 111 |
-
"content": "
|
| 112 |
"lstrip": false,
|
| 113 |
"normalized": false,
|
| 114 |
"rstrip": false,
|
|
@@ -8005,14 +8005,9 @@
|
|
| 8005 |
}
|
| 8006 |
},
|
| 8007 |
"bos_token": "<s>",
|
| 8008 |
-
"chat_template": "{{'<SPECIAL_10>System'}}{% for message in messages %}{% if message['role'] == 'system' %}{{'\n' + message['content'].strip()}}{% endif %}{% endfor %}{{'\n'}}{% for message in messages %}{% if message['role'] == 'user' %}{{ '\n<SPECIAL_11>User\n' + message['content'].strip() + '\n<SPECIAL_11>Assistant\n' }}{% elif message['role'] == 'assistant' %}{{ message['content'].strip() }}{% endif %}{% endfor %}",
|
| 8009 |
"clean_up_tokenization_spaces": false,
|
| 8010 |
"eos_token": "</s>",
|
| 8011 |
"extra_special_tokens": {},
|
| 8012 |
-
"model_input_names": [
|
| 8013 |
-
"input_ids",
|
| 8014 |
-
"attention_mask"
|
| 8015 |
-
],
|
| 8016 |
"model_max_length": 1000000000000000019884624838656,
|
| 8017 |
"tokenizer_class": "PreTrainedTokenizerFast",
|
| 8018 |
"unk_token": "<unk>"
|
|
|
|
| 84 |
"special": true
|
| 85 |
},
|
| 86 |
"10": {
|
| 87 |
+
"content": "<pad>",
|
| 88 |
"lstrip": false,
|
| 89 |
"normalized": false,
|
| 90 |
"rstrip": false,
|
|
|
|
| 92 |
"special": true
|
| 93 |
},
|
| 94 |
"11": {
|
| 95 |
+
"content": "[PREFIX]",
|
| 96 |
"lstrip": false,
|
| 97 |
"normalized": false,
|
| 98 |
"rstrip": false,
|
|
|
|
| 100 |
"special": true
|
| 101 |
},
|
| 102 |
"12": {
|
| 103 |
+
"content": "[MIDDLE]",
|
| 104 |
"lstrip": false,
|
| 105 |
"normalized": false,
|
| 106 |
"rstrip": false,
|
|
|
|
| 108 |
"special": true
|
| 109 |
},
|
| 110 |
"13": {
|
| 111 |
+
"content": "[SUFFIX]",
|
| 112 |
"lstrip": false,
|
| 113 |
"normalized": false,
|
| 114 |
"rstrip": false,
|
|
|
|
| 8005 |
}
|
| 8006 |
},
|
| 8007 |
"bos_token": "<s>",
|
|
|
|
| 8008 |
"clean_up_tokenization_spaces": false,
|
| 8009 |
"eos_token": "</s>",
|
| 8010 |
"extra_special_tokens": {},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8011 |
"model_max_length": 1000000000000000019884624838656,
|
| 8012 |
"tokenizer_class": "PreTrainedTokenizerFast",
|
| 8013 |
"unk_token": "<unk>"
|