SmolLM2-360M-Think-R18 / tokenizer_config.json
DuoNeural's picture
DuoNeural Think Instillation R18 — dead-prompt filtered GRPO, +0.030 over post-SFT
e86cf4d verified
Raw
History Blame Contribute Delete
881 Bytes
{
"add_prefix_space": false,
"backend": "tokenizers",
"bos_token": "<|endoftext|>",
"clean_up_tokenization_spaces": false,
"eos_token": "<|endoftext|>",
"errors": "replace",
"extra_special_tokens": [
"<|endoftext|>",
"<|im_start|>",
"<|im_end|>",
"<repo_name>",
"<reponame>",
"<file_sep>",
"<filename>",
"<gh_stars>",
"<issue_start>",
"<issue_comment>",
"<issue_closed>",
"<jupyter_start>",
"<jupyter_text>",
"<jupyter_code>",
"<jupyter_output>",
"<jupyter_script>",
"<empty_output>"
],
"is_local": true,
"local_files_only": false,
"max_length": 384,
"model_max_length": 8192,
"pad_token": "<|im_start|>",
"stride": 0,
"tokenizer_class": "GPT2Tokenizer",
"truncation_side": "right",
"truncation_strategy": "longest_first",
"unk_token": "<|endoftext|>",
"vocab_size": 49152
}