{ "added_tokens_decoder": { "0": { "content": "[PAD]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "100": { "content": "[UNK]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "101": { "content": "[CLS]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "102": { "content": "[SEP]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "103": { "content": "[MASK]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501153": { "content": "[LANG_amh]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501154": { "content": "[LANG_arb]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501155": { "content": "[LANG_ben]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501156": { "content": "[LANG_deu]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501157": { "content": "[LANG_eng]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501158": { "content": "[LANG_fas]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501159": { "content": "[LANG_hau]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501160": { "content": "[LANG_hin]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501161": { "content": "[LANG_ita]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501162": { "content": "[LANG_khm]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501163": { "content": "[LANG_mya]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501164": { "content": "[LANG_nep]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501165": { "content": "[LANG_ori]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501166": { "content": "[LANG_pan]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501167": { "content": "[LANG_pol]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501168": { "content": "[LANG_rus]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501169": { "content": "[LANG_spa]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501170": { "content": "[LANG_swa]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501171": { "content": "[LANG_tel]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501172": { "content": "[LANG_tur]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501173": { "content": "[LANG_urd]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "501174": { "content": "[LANG_zho]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true } }, "additional_special_tokens": [ "[LANG_amh]", "[LANG_arb]", "[LANG_ben]", "[LANG_deu]", "[LANG_eng]", "[LANG_fas]", "[LANG_hau]", "[LANG_hin]", "[LANG_ita]", "[LANG_khm]", "[LANG_mya]", "[LANG_nep]", "[LANG_ori]", "[LANG_pan]", "[LANG_pol]", "[LANG_rus]", "[LANG_spa]", "[LANG_swa]", "[LANG_tel]", "[LANG_tur]", "[LANG_urd]", "[LANG_zho]" ], "clean_up_tokenization_spaces": true, "cls_token": "[CLS]", "do_basic_tokenize": true, "do_lower_case": false, "extra_special_tokens": {}, "mask_token": "[MASK]", "model_max_length": 512, "never_split": null, "pad_token": "[PAD]", "sep_token": "[SEP]", "strip_accents": null, "tokenize_chinese_chars": true, "tokenizer_class": "BertTokenizer", "unk_token": "[UNK]" }