itriedcoding/OkusUncensored-bucket / tokenizer_config.json
itriedcoding's picture
download
raw
1.33 kB
{
"add_bos_token": true,
"add_eos_token": false,
"added_tokens_decoder": {
"128000": {"content": "<|begin_of_text|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
"128001": {"content": "<|end_of_text|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
"128009": {"content": "<|eot_id|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}
},
"bos_token": "<|begin_of_text|>",
"chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% if message['role'] == 'system' %}{{ '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}{% elif message['role'] == 'user' %}{{ '<|start_header_id|>user<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}{% elif message['role'] == 'assistant' %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
"clean_up_tokenization_spaces": true,
"eos_token": "<|eot_id|>",
"model_max_length": 8192,
"pad_token": "<|end_of_text|>",
"tokenizer_class": "PreTrainedTokenizerFast"
}

Xet Storage Details

Size:
1.33 kB
·
Xet hash:
6c99b921f62af16b018c8dfcd031abf564cfb63cea8573db8d16b0bb2d9a644f

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.