{ "added_tokens_decoder": { "-1": { "content": "<|pad|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "0": { "content": "<|unk|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "1": { "content": "<|bos|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "2": { "content": "<|eos|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true } }, "bos_token": "<|bos|>", "clean_up_tokenization_spaces": false, "do_lower_case": false, "eos_token": "<|eos|>", "extra_special_tokens": {}, "model_max_length": 1000000000000000019884624838656, "pad_token": "<|pad|>", "tokenizer_class": "BltTokenizerHF", "unk_token": "<|unk|>", "vocab": { "<|bos|>": 2, "<|eos|>": 3, "<|pad|>": 0, "<|unk|>": 1, "hello": 4, "world": 5, "ประเทศไทย": 7, "สวัสดี": 6 } }