| { |
| "version": "1.0", |
| "truncation": null, |
| "padding": { |
| "strategy": "BatchLongest", |
| "direction": "Right", |
| "pad_to_multiple_of": null, |
| "pad_id": 0, |
| "pad_type_id": 0, |
| "pad_token": "<pad>" |
| }, |
| "added_tokens": [ |
| { |
| "id": 0, |
| "content": "<pad>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 1, |
| "content": "<gap>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 2, |
| "content": "<s>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 3, |
| "content": "</s>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 4, |
| "content": "<unk>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 5, |
| "content": "<cls>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 6, |
| "content": "<sep>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 7, |
| "content": "<expr_top10>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 8, |
| "content": "<expr_pre75_90>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 9, |
| "content": "<expr_pre50_75>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 10, |
| "content": "<expr_pre25_50>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 11, |
| "content": "<expr_low25>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 12, |
| "content": "<expr_unk>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 13, |
| "content": "<seqpos_begin>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 14, |
| "content": "<seqpos_end>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 15, |
| "content": "<seqpos_mid>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 16, |
| "content": "<sim_high>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 17, |
| "content": "<sim_low>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 18, |
| "content": "<S_cerevisiae>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 19, |
| "content": "<S_pombe>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 20, |
| "content": "<E_coli>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 21, |
| "content": "<B_subtilis>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 22, |
| "content": "<mask_*>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 23, |
| "content": "<mask_S>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 24, |
| "content": "<mask_W>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 25, |
| "content": "<mask_D>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 26, |
| "content": "<mask_N>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 27, |
| "content": "<mask_V>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 28, |
| "content": "<mask_H>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 29, |
| "content": "<mask_A>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 30, |
| "content": "<mask_K>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 31, |
| "content": "<mask_G>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 32, |
| "content": "<mask_Q>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 33, |
| "content": "<mask_F>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 34, |
| "content": "<mask_I>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 35, |
| "content": "<mask_T>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 36, |
| "content": "<mask_C>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 37, |
| "content": "<mask_L>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 38, |
| "content": "<mask_P>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 39, |
| "content": "<mask_E>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 40, |
| "content": "<mask_M>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 41, |
| "content": "<mask_Y>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 42, |
| "content": "<mask_R>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 43, |
| "content": "TTT", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 44, |
| "content": "TTC", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 45, |
| "content": "TTA", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 46, |
| "content": "TTG", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 47, |
| "content": "TCT", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 48, |
| "content": "TCC", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 49, |
| "content": "TCA", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 50, |
| "content": "TCG", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 51, |
| "content": "TAT", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 52, |
| "content": "TAC", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 53, |
| "content": "TAA", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 54, |
| "content": "TAG", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 55, |
| "content": "TGT", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 56, |
| "content": "TGC", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 57, |
| "content": "TGA", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 58, |
| "content": "TGG", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 59, |
| "content": "CTT", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 60, |
| "content": "CTC", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 61, |
| "content": "CTA", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 62, |
| "content": "CTG", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 63, |
| "content": "CCT", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 64, |
| "content": "CCC", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 65, |
| "content": "CCA", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 66, |
| "content": "CCG", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 67, |
| "content": "CAT", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 68, |
| "content": "CAC", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 69, |
| "content": "CAA", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 70, |
| "content": "CAG", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 71, |
| "content": "CGT", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 72, |
| "content": "CGC", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 73, |
| "content": "CGA", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 74, |
| "content": "CGG", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 75, |
| "content": "ATT", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 76, |
| "content": "ATC", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 77, |
| "content": "ATA", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 78, |
| "content": "ATG", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 79, |
| "content": "ACT", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 80, |
| "content": "ACC", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 81, |
| "content": "ACA", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 82, |
| "content": "ACG", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 83, |
| "content": "AAT", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 84, |
| "content": "AAC", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 85, |
| "content": "AAA", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 86, |
| "content": "AAG", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 87, |
| "content": "AGT", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 88, |
| "content": "AGC", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 89, |
| "content": "AGA", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 90, |
| "content": "AGG", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 91, |
| "content": "GTT", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 92, |
| "content": "GTC", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 93, |
| "content": "GTA", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 94, |
| "content": "GTG", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 95, |
| "content": "GCT", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 96, |
| "content": "GCC", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 97, |
| "content": "GCA", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 98, |
| "content": "GCG", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 99, |
| "content": "GAT", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 100, |
| "content": "GAC", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 101, |
| "content": "GAA", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 102, |
| "content": "GAG", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 103, |
| "content": "GGT", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 104, |
| "content": "GGC", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 105, |
| "content": "GGA", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 106, |
| "content": "GGG", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 107, |
| "content": "<msk>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| } |
| ], |
| "normalizer": null, |
| "pre_tokenizer": { |
| "type": "Whitespace" |
| }, |
| "post_processor": { |
| "type": "TemplateProcessing", |
| "single": [ |
| { |
| "SpecialToken": { |
| "id": "<cls>", |
| "type_id": 0 |
| } |
| }, |
| { |
| "Sequence": { |
| "id": "A", |
| "type_id": 0 |
| } |
| }, |
| { |
| "SpecialToken": { |
| "id": "<sep>", |
| "type_id": 0 |
| } |
| } |
| ], |
| "pair": [ |
| { |
| "Sequence": { |
| "id": "A", |
| "type_id": 0 |
| } |
| }, |
| { |
| "Sequence": { |
| "id": "B", |
| "type_id": 0 |
| } |
| } |
| ], |
| "special_tokens": { |
| "<cls>": { |
| "id": "<cls>", |
| "ids": [ |
| 5 |
| ], |
| "tokens": [ |
| "<cls>" |
| ] |
| }, |
| "<sep>": { |
| "id": "<sep>", |
| "ids": [ |
| 6 |
| ], |
| "tokens": [ |
| "<sep>" |
| ] |
| } |
| } |
| }, |
| "decoder": null, |
| "model": { |
| "type": "WordLevel", |
| "vocab": {}, |
| "unk_token": "<unk>" |
| } |
| } |