{ "added_tokens_decoder": { "0": { "content": "[PAD]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "100": { "content": "[UNK]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "101": { "content": "[CLS]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "102": { "content": "[SEP]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "103": { "content": "[MASK]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "30522": { "content": "[A]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30523": { "content": "[C]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30524": { "content": "[G]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30525": { "content": "[T]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30526": { "content": "[R]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30527": { "content": "[Y]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30528": { "content": "[S]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30529": { "content": "[W]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30530": { "content": "[K]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30531": { "content": "[M]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30532": { "content": "[B]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30533": { "content": "[D]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30534": { "content": "[H]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30535": { "content": "[V]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30536": { "content": "[N]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30537": { "content": "<|SEQUENCE|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "30538": { "content": "<|ORGANISM|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "30539": { "content": "<|GENE|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "30540": { "content": "<|FLANK_BEFORE|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "30541": { "content": "<|FLANK_AFTER|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true } }, "additional_special_tokens": [ "<|SEQUENCE|>", "<|ORGANISM|>", "<|GENE|>", "<|FLANK_BEFORE|>", "<|FLANK_AFTER|>" ], "clean_up_tokenization_spaces": true, "cls_token": "[CLS]", "do_basic_tokenize": true, "do_lower_case": false, "extra_special_tokens": {}, "mask_token": "[MASK]", "model_max_length": 512, "never_split": null, "pad_token": "[PAD]", "sep_token": "[SEP]", "strip_accents": null, "tokenize_chinese_chars": true, "tokenizer_class": "BertTokenizer", "unk_token": "[UNK]" }