blogger_tokenizers / tokenizer_config.json
infi
Upload tokenizer
141867f verified
{
"add_prefix_space": false,
"added_tokens_decoder": {
"0": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"34000": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"34001": {
"content": "<|chap|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"34002": {
"content": "<|section|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"34003": {
"content": "<|subsection|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"34004": {
"content": "<|startbox|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"34005": {
"content": "<|endbox|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"34006": {
"content": "<|para|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"34007": {
"content": "<|answer|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"34008": {
"content": "<|title|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"34009": {
"content": "<|question|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"34010": {
"content": "<|topic|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"34011": {
"content": "<|teaser|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"34012": {
"content": "<|startblog|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"34013": {
"content": "<|endblog|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"34014": {
"content": "<|unkbox|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"bos_token": "<|startbox|>",
"clean_up_tokenization_spaces": true,
"eos_token": "<|endbox|>",
"model_max_length": 1024,
"pad_token": "<pad>",
"tokenizer_class": "GPT2Tokenizer",
"unk_token": "<|unkbox|>"
}