{ "added_tokens_decoder": { "0": { "content": "[PAD]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "100": { "content": "[UNK]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "101": { "content": "[CLS]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "102": { "content": "[SEP]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "103": { "content": "[MASK]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "30522": { "content": "[DNA_A]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30523": { "content": "[DNA_C]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30524": { "content": "[DNA_G]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30525": { "content": "[DNA_T]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30526": { "content": "[DNA_R]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30527": { "content": "[DNA_Y]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30528": { "content": "[DNA_S]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30529": { "content": "[DNA_W]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30530": { "content": "[DNA_K]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30531": { "content": "[DNA_M]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30532": { "content": "[DNA_B]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30533": { "content": "[DNA_D]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30534": { "content": "[DNA_H]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30535": { "content": "[DNA_V]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30536": { "content": "[DNA_N]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30537": { "content": "[INTRON]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30538": { "content": "[EXON]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30539": { "content": "[DNA_PAD]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30540": { "content": "[DNA_UNKNOWN]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30541": { "content": "[DNA_INVALID]", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "30542": { "content": "<|SEQUENCE|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "30543": { "content": "<|ORGANISM|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "30544": { "content": "<|GENE|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "30545": { "content": "<|FLANK_BEFORE|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "30546": { "content": "<|FLANK_AFTER|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "30547": { "content": "<|PREDICTED_BEFORE|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "30548": { "content": "<|TARGET|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true } }, "additional_special_tokens": [ "<|SEQUENCE|>", "<|ORGANISM|>", "<|GENE|>", "<|FLANK_BEFORE|>", "<|FLANK_AFTER|>", "<|PREDICTED_BEFORE|>", "<|TARGET|>" ], "clean_up_tokenization_spaces": true, "cls_token": "[CLS]", "do_basic_tokenize": true, "do_lower_case": false, "eos_token": "[DNA_PAD]", "extra_special_tokens": {}, "mask_token": "[MASK]", "model_max_length": 512, "never_split": null, "pad_token": "[DNA_PAD]", "sep_token": "[SEP]", "strip_accents": null, "tokenize_chinese_chars": true, "tokenizer_class": "BertTokenizer", "unk_token": "[UNK]" }