| { | |
| "added_tokens_decoder": { | |
| "0": { | |
| "content": "[PAD]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100": { | |
| "content": "[UNK]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "101": { | |
| "content": "[CLS]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "102": { | |
| "content": "[SEP]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "103": { | |
| "content": "[MASK]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501153": { | |
| "content": "[LANG_amh]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501154": { | |
| "content": "[LANG_arb]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501155": { | |
| "content": "[LANG_ben]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501156": { | |
| "content": "[LANG_deu]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501157": { | |
| "content": "[LANG_eng]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501158": { | |
| "content": "[LANG_fas]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501159": { | |
| "content": "[LANG_hau]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501160": { | |
| "content": "[LANG_hin]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501161": { | |
| "content": "[LANG_ita]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501162": { | |
| "content": "[LANG_khm]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501163": { | |
| "content": "[LANG_mya]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501164": { | |
| "content": "[LANG_nep]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501165": { | |
| "content": "[LANG_ori]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501166": { | |
| "content": "[LANG_pan]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501167": { | |
| "content": "[LANG_pol]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501168": { | |
| "content": "[LANG_rus]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501169": { | |
| "content": "[LANG_spa]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501170": { | |
| "content": "[LANG_swa]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501171": { | |
| "content": "[LANG_tel]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501172": { | |
| "content": "[LANG_tur]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501173": { | |
| "content": "[LANG_urd]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "501174": { | |
| "content": "[LANG_zho]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| } | |
| }, | |
| "additional_special_tokens": [ | |
| "[LANG_amh]", | |
| "[LANG_arb]", | |
| "[LANG_ben]", | |
| "[LANG_deu]", | |
| "[LANG_eng]", | |
| "[LANG_fas]", | |
| "[LANG_hau]", | |
| "[LANG_hin]", | |
| "[LANG_ita]", | |
| "[LANG_khm]", | |
| "[LANG_mya]", | |
| "[LANG_nep]", | |
| "[LANG_ori]", | |
| "[LANG_pan]", | |
| "[LANG_pol]", | |
| "[LANG_rus]", | |
| "[LANG_spa]", | |
| "[LANG_swa]", | |
| "[LANG_tel]", | |
| "[LANG_tur]", | |
| "[LANG_urd]", | |
| "[LANG_zho]" | |
| ], | |
| "clean_up_tokenization_spaces": true, | |
| "cls_token": "[CLS]", | |
| "do_basic_tokenize": true, | |
| "do_lower_case": false, | |
| "extra_special_tokens": {}, | |
| "mask_token": "[MASK]", | |
| "model_max_length": 512, | |
| "never_split": null, | |
| "pad_token": "[PAD]", | |
| "sep_token": "[SEP]", | |
| "strip_accents": null, | |
| "tokenize_chinese_chars": true, | |
| "tokenizer_class": "BertTokenizer", | |
| "unk_token": "[UNK]" | |
| } | |