{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "NFD" }, { "type": "StripAccents" } ] }, "pre_tokenizer": { "type": "BertPreTokenizer" }, "post_processor": null, "decoder": { "type": "WordPiece", "prefix": "##", "cleanup": true }, "model": { "type": "WordPiece", "unk_token": "[UNK]", "continuing_subword_prefix": "##", "max_input_chars_per_word": 100, "vocab": { "[UNK]": 0, "[PAD]": 1, "[CLS]": 2, "[SEP]": 3, "[MASK]": 4, "A": 5, "C": 6, "G": 7, "T": 8, "##T": 9, "##G": 10, "##C": 11, "##A": 12, "AA": 13, "GA": 14, "TT": 15, "AT": 16, "##CT": 17, "##CA": 18, "CA": 19, "GT": 20, "AG": 21, "GG": 22, "GAA": 23, "##CC": 24, "AAA": 25, "CT": 26, "GAT": 27, "AAT": 28, "TA": 29, "AAG": 30, "ATT": 31, "##CG": 32, "CAA": 33, "TTG": 34, "TTA": 35, "TTT": 36, "AAC": 37, "TCT": 38, "GGT": 39, "TG": 40, "GTT": 41, "AGA": 42, "ATG": 43, "GCT": 44, "GAC": 45, "ACT": 46, "GAG": 47, "TCA": 48, "TAT": 49, "TTC": 50, "ATA": 51, "ACA": 52, "CCA": 53, "ATC": 54, "GCA": 55, "AGT": 56, "TAC": 57, "TCC": 58, "CAT": 59, "CTA": 60, "CCT": 61, "CG": 62, "CAG": 63, "GCC": 64, "ACC": 65, "CTT": 66, "GTA": 67, "GTC": 68, "GGA": 69, "GTG": 70, "CTG": 71, "TGG": 72, "AGC": 73, "GGC": 74, "AGG": 75, "TCG": 76, "ACG": 77, "TGT": 78, "CAC": 79, "CCC": 80, "CGT": 81, "GCG": 82, "GGG": 83, "CCG": 84, "CTC": 85, "TGC": 86, "CGA": 87, "CGC": 88, "CGG": 89, "TAA": 90, "TGA": 91, "TAG": 92 } } }