sft-test2 / tokenizer_config.json
Ba2han's picture
Training in progress, step 1189
4e1c50b verified
Raw
History Blame Contribute Delete
4.28 kB
{
"backend": "tokenizers",
"bos_token": "<|begin_of_text|>",
"clean_up_tokenization_spaces": true,
"eos_token": "<|im_end|>",
"extra_special_tokens": [
"<|im_start|>",
"<|im_end|>"
],
"from_slow": true,
"is_local": false,
"legacy": false,
"model_input_names": [
"input_ids",
"attention_mask"
],
"model_max_length": 8192,
"pad_token": "<|finetune_right_pad_id|>",
"padding_side": "right",
"tokenizer_class": "TokenizersBackend",
"unk_token": null,
"added_tokens_decoder": {
"50030": {
"content": "<|begin_of_text|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"50031": {
"content": "<|end_of_text|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"50032": {
"content": "<|reserved_special_token_0|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"50033": {
"content": "<|reserved_special_token_1|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"50034": {
"content": "<|finetune_right_pad_id|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"50035": {
"content": "<|reserved_special_token_2|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"50036": {
"content": "<|start_header_id|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"50037": {
"content": "<|end_header_id|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"50038": {
"content": "<|eom_id|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"50039": {
"content": "<|eot_id|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"50040": {
"content": "<|python_tag|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"50041": {
"content": "<|reserved_special_token_3|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"50042": {
"content": "<|reserved_special_token_4|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"50043": {
"content": "<|reserved_special_token_5|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"50044": {
"content": "<|reserved_special_token_6|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"50045": {
"content": "<|reserved_special_token_7|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"50046": {
"content": "<|reserved_special_token_8|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"50047": {
"content": "<|reserved_special_token_9|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"50048": {
"content": "<|im_start|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"50049": {
"content": "<|im_end|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
}
}