Upload tokenizer
Browse files- tokenizer.json +1 -29
- tokenizer_config.json +2 -2
tokenizer.json
CHANGED
|
@@ -6953,12 +6953,6 @@
|
|
| 6953 |
"post_processor": {
|
| 6954 |
"type": "TemplateProcessing",
|
| 6955 |
"single": [
|
| 6956 |
-
{
|
| 6957 |
-
"SpecialToken": {
|
| 6958 |
-
"id": "<s>",
|
| 6959 |
-
"type_id": 0
|
| 6960 |
-
}
|
| 6961 |
-
},
|
| 6962 |
{
|
| 6963 |
"Sequence": {
|
| 6964 |
"id": "A",
|
|
@@ -6967,24 +6961,12 @@
|
|
| 6967 |
}
|
| 6968 |
],
|
| 6969 |
"pair": [
|
| 6970 |
-
{
|
| 6971 |
-
"SpecialToken": {
|
| 6972 |
-
"id": "<s>",
|
| 6973 |
-
"type_id": 0
|
| 6974 |
-
}
|
| 6975 |
-
},
|
| 6976 |
{
|
| 6977 |
"Sequence": {
|
| 6978 |
"id": "A",
|
| 6979 |
"type_id": 0
|
| 6980 |
}
|
| 6981 |
},
|
| 6982 |
-
{
|
| 6983 |
-
"SpecialToken": {
|
| 6984 |
-
"id": "<s>",
|
| 6985 |
-
"type_id": 1
|
| 6986 |
-
}
|
| 6987 |
-
},
|
| 6988 |
{
|
| 6989 |
"Sequence": {
|
| 6990 |
"id": "B",
|
|
@@ -6992,17 +6974,7 @@
|
|
| 6992 |
}
|
| 6993 |
}
|
| 6994 |
],
|
| 6995 |
-
"special_tokens": {
|
| 6996 |
-
"<s>": {
|
| 6997 |
-
"id": "<s>",
|
| 6998 |
-
"ids": [
|
| 6999 |
-
1
|
| 7000 |
-
],
|
| 7001 |
-
"tokens": [
|
| 7002 |
-
"<s>"
|
| 7003 |
-
]
|
| 7004 |
-
}
|
| 7005 |
-
}
|
| 7006 |
},
|
| 7007 |
"decoder": {
|
| 7008 |
"type": "Sequence",
|
|
|
|
| 6953 |
"post_processor": {
|
| 6954 |
"type": "TemplateProcessing",
|
| 6955 |
"single": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6956 |
{
|
| 6957 |
"Sequence": {
|
| 6958 |
"id": "A",
|
|
|
|
| 6961 |
}
|
| 6962 |
],
|
| 6963 |
"pair": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6964 |
{
|
| 6965 |
"Sequence": {
|
| 6966 |
"id": "A",
|
| 6967 |
"type_id": 0
|
| 6968 |
}
|
| 6969 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6970 |
{
|
| 6971 |
"Sequence": {
|
| 6972 |
"id": "B",
|
|
|
|
| 6974 |
}
|
| 6975 |
}
|
| 6976 |
],
|
| 6977 |
+
"special_tokens": {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6978 |
},
|
| 6979 |
"decoder": {
|
| 6980 |
"type": "Sequence",
|
tokenizer_config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"add_bos_token":
|
| 3 |
"add_eos_token": false,
|
| 4 |
"add_prefix_space": true,
|
| 5 |
"added_tokens_decoder": {
|
|
@@ -6174,7 +6174,7 @@
|
|
| 6174 |
},
|
| 6175 |
"bos_token": "<s>",
|
| 6176 |
"chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'].strip() + '' %}{% else %}{% set loop_messages = messages %}{% set system_message = '' %}{% endif %}{{ bos_token }}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 %}{% set content = system_message + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + eos_token }}{% endif %}{% endfor %}",
|
| 6177 |
-
"clean_up_tokenization_spaces":
|
| 6178 |
"eos_token": "</s>",
|
| 6179 |
"legacy": false,
|
| 6180 |
"model_max_length": 1000000000000000019884624838656,
|
|
|
|
| 1 |
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
"add_eos_token": false,
|
| 4 |
"add_prefix_space": true,
|
| 5 |
"added_tokens_decoder": {
|
|
|
|
| 6174 |
},
|
| 6175 |
"bos_token": "<s>",
|
| 6176 |
"chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'].strip() + '' %}{% else %}{% set loop_messages = messages %}{% set system_message = '' %}{% endif %}{{ bos_token }}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 %}{% set content = system_message + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + eos_token }}{% endif %}{% endfor %}",
|
| 6177 |
+
"clean_up_tokenization_spaces": true,
|
| 6178 |
"eos_token": "</s>",
|
| 6179 |
"legacy": false,
|
| 6180 |
"model_max_length": 1000000000000000019884624838656,
|