Update tokenizer_config.json
Browse files- tokenizer_config.json +1 -49
tokenizer_config.json
CHANGED
|
@@ -56,60 +56,12 @@
|
|
| 56 |
"rstrip": false,
|
| 57 |
"single_word": false,
|
| 58 |
"special": true
|
| 59 |
-
},
|
| 60 |
-
"7": {
|
| 61 |
-
"content": "<model_identity>",
|
| 62 |
-
"lstrip": false,
|
| 63 |
-
"normalized": false,
|
| 64 |
-
"rstrip": false,
|
| 65 |
-
"single_word": false,
|
| 66 |
-
"special": true
|
| 67 |
-
},
|
| 68 |
-
"8": {
|
| 69 |
-
"content": "</model_identity>",
|
| 70 |
-
"lstrip": false,
|
| 71 |
-
"normalized": false,
|
| 72 |
-
"rstrip": false,
|
| 73 |
-
"single_word": false,
|
| 74 |
-
"special": true
|
| 75 |
-
},
|
| 76 |
-
"9": {
|
| 77 |
-
"content": "<repo_name>",
|
| 78 |
-
"lstrip": false,
|
| 79 |
-
"normalized": false,
|
| 80 |
-
"rstrip": false,
|
| 81 |
-
"single_word": false,
|
| 82 |
-
"special": true
|
| 83 |
-
},
|
| 84 |
-
"10": {
|
| 85 |
-
"content": "<reponame>",
|
| 86 |
-
"lstrip": false,
|
| 87 |
-
"normalized": false,
|
| 88 |
-
"rstrip": false,
|
| 89 |
-
"single_word": false,
|
| 90 |
-
"special": true
|
| 91 |
-
},
|
| 92 |
-
"11": {
|
| 93 |
-
"content": "<file_sep>",
|
| 94 |
-
"lstrip": false,
|
| 95 |
-
"normalized": false,
|
| 96 |
-
"rstrip": false,
|
| 97 |
-
"single_word": false,
|
| 98 |
-
"special": true
|
| 99 |
}
|
| 100 |
},
|
| 101 |
"bos_token": "<|im_start|>",
|
| 102 |
-
"chat_template": "{% if message['role'] == 'system' %}<
|
| 103 |
"clean_up_tokenization_spaces": false,
|
| 104 |
"eos_token": "<|im_end|>",
|
| 105 |
-
"extra_special_tokens": {
|
| 106 |
-
"think_token": "<think>",
|
| 107 |
-
"think_end_token": "</think>",
|
| 108 |
-
"output_token": "<output>",
|
| 109 |
-
"output_end_token": "</output>",
|
| 110 |
-
"model_identity_token": "<model_identity>",
|
| 111 |
-
"model_identity_end_token": "</model_identity>"
|
| 112 |
-
},
|
| 113 |
"model_max_length": 1000000000000000019884624838656,
|
| 114 |
"pad_token": "<|endoftext|>",
|
| 115 |
"tokenizer_class": "GPT2Tokenizer",
|
|
|
|
| 56 |
"rstrip": false,
|
| 57 |
"single_word": false,
|
| 58 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
}
|
| 60 |
},
|
| 61 |
"bos_token": "<|im_start|>",
|
| 62 |
+
"chat_template": "{%- for message in messages -%}\n{%- if message['role'] == 'system' -%}\n<|im_start|>system\nYour name is Superthoughts lite by Pinkstack. You are an open weights AI model released in 2025 with built-in information up to 2024.\n<|im_end|>\n{%- elif message['role'] == 'user' -%}\n<|im_start|>user\n{{ message.content }}<|im_end|>\n{%- elif message['role'] == 'assistant' -%}\n<|im_start|>assistant\n<think>\n{{ message.thinking_content if 'thinking_content' in message else '' }}</think>\n<output>\n{{ message.content }}</output>\n<|im_end|>\n{%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n<|im_start|>assistant\n{%- endif -%}",
|
| 63 |
"clean_up_tokenization_spaces": false,
|
| 64 |
"eos_token": "<|im_end|>",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
"model_max_length": 1000000000000000019884624838656,
|
| 66 |
"pad_token": "<|endoftext|>",
|
| 67 |
"tokenizer_class": "GPT2Tokenizer",
|