{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": false }, "post_processor": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": false, "use_regex": true }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "<|endoftext|>": 0, "[PAD]": 1, "A": 2, "C": 3, "G": 4, "T": 5, "Ċ": 6, "TT": 7, "AA": 8, "TG": 9, "AG": 10, "CC": 11, "TC": 12, "AC": 13, "GG": 14, "ATT": 15, "AT": 16, "ATG": 17, "GC": 18, "TAA": 19, "TCC": 20, "ACC": 21, "AAAA": 22, "AGG": 23, "AGC": 24, "ATC": 25, "TTC": 26, "AAG": 27, "TTTT": 28, "TGC": 29, "TGG": 30, "AAC": 31, "TTG": 32, "TAG": 33, "TAC": 34, "CCC": 35, "TATT": 36, "TGGG": 37, "AGAA": 38, "TAT": 39, "AGGG": 40, "TTTC": 41, "AGGC": 42, "AGCC": 43, "TGTG": 44, "ATAA": 45, "ATTC": 46, "TTGG": 47, "ACAC": 48, "AAGG": 49, "TCTC": 50, "TCCC": 51, "TATG": 52, "TTTG": 53, "TTCC": 54, "AGAG": 55, "AAAC": 56, "ATGG": 57, "AGTG": 58, "ACCC": 59, "AGAC": 60, "TGCC": 61, "ATTG": 62, "ATCC": 63, "ATGC": 64, "ATAC": 65, "TCAC": 66, "TCTG": 67, "TTAA": 68, "TGAA": 69, "TGGC": 70, "TTGC": 71, "TATC": 72, "TAAG": 73, "TAAC": 74, "AAAG": 75, "GGG": 76, "AAGC": 77, "GGC": 78, "TTAC": 79, "ATAT": 80, "TAGC": 81, "TACC": 82, "AACC": 83, "AATG": 84, "TAGG": 85, "ATATT": 86, "GCC": 87, "AGTC": 88, "TTTTC": 89, "TGAC": 90, "AAAAC": 91, "AATC": 92, "TTTAA": 93, "AAAAG": 94, "ATAG": 95, "TGTC": 96, "TTATT": 97, "TTTTG": 98, "TGAG": 99, "AATT": 100, "AAATT": 101, "ACAG": 102, "TTTCC": 103, "AATAA": 104, "TCAG": 105, "AGGCC": 106, "AAATG": 107, "TGGGC": 108, "ACTC": 109, "ACG": 110, "ATTTC": 111, "ACTG": 112, "TTAG": 113, "TGGCC": 114, "ATATG": 115, "ACAA": 116, "ATCTC": 117, "TATTC": 118, "TGTAA": 119, "ACTT": 120, "ATTCC": 121, "AAAT": 122, "ATGCC": 123, "AAAAAAAA": 124, "TTCCC": 125, "TTTGC": 126, "TTTAG": 127, "TCCCC": 128, "TGGGG": 129, "TTCTC": 130, "TAAAA": 131, "ACCCC": 132, "AGAAG": 133, "ACCTC": 134, "AGGGC": 135, "TTTTTTTT": 136, "ACATT": 137, "AGATG": 138, "GGCC": 139, "GTG": 140, "AAGCC": 141, "ATAAAA": 142, "AGGAG": 143, "ATGGC": 144, "ATTAC": 145, "TTTAC": 146, "ATTGC": 147, "TGGAG": 148, "TCAA": 149, "ACTGC": 150, "TATTG": 151, "AAGGC": 152, "TTTGG": 153, "TTGCC": 154, "AAATC": 155, "TTGGC": 156, "GGGC": 157, "ATATC": 158, "AGAAC": 159, "ACTCC": 160, "ATCCC": 161, "ATAAC": 162, "AATTC": 163, "AGGGG": 164, "AAAGC": 165, "AGAGC": 166, "TCG": 167, "ATTTT": 168, "TGAGG": 169, "ATGGG": 170, "AAAGG": 171, "TTGGG": 172, "AGAAAA": 173, "TCTCC": 174, "ATAAG": 175, "ATTTG": 176, "TATCC": 177, "ACCAC": 178, "TCTTC": 179, "ATCAC": 180, "AGAGG": 181, "TTTATT": 182, "TGTAG": 183, "AGTAA": 184, "ATAGC": 185, "AGCCC": 186, "AGATT": 187, "AGTT": 188, "GGGG": 189, "TGTGC": 190, "TGTGG": 191, "TATGC": 192, "TGCCC": 193, "TGAAG": 194, "TGTT": 195, "TCTT": 196, "AGACC": 197, "TCATT": 198, "TCTGC": 199, "AAGGG": 200, "AGTTC": 201, "AAACC": 202, "ACATG": 203, "ACTTC": 204, "ACAGC": 205, "AATAG": 206, "AATAC": 207, "TGATG": 208, "AATGC": 209, "AATGG": 210, "TGATT": 211, "TCAGC": 212, "TGTTC": 213, "TGAGC": 214, "TCTTG": 215, "TCAGG": 216, "TTTTCC": 217, "TTAAAA": 218, "TATGG": 219, "ATACC": 220, "AGTGG": 221, "TCCCAGC": 222, "AGATC": 223, "AACCC": 224, "TACCC": 225, "TGAAC": 226, "AATCC": 227, "TGTTG": 228, "TTAAG": 229, "TAGCC": 230, "TTAAC": 231, "TTATG": 232, "ATAGG": 233, "AATTG": 234, "TTTAT": 235, "AGTGC": 236, "TGTCC": 237, "TAGGC": 238, "TCTAA": 239, "AĊ": 240, "TCATC": 241, "TCAAG": 242, "TCACC": 243, "AATTTT": 244, "TCTTTT": 245, "TTATC": 246, "AGAAGC": 247, "AGTGGC": 248, "ATTTTC": 249, "CCCC": 250, "AAGAC": 251, "AGTAG": 252, "ATTGG": 253, "TCATG": 254, "AAATAA": 255 }, "merges": [ [ "T", "T" ], [ "A", "A" ], [ "T", "G" ], [ "A", "G" ], [ "C", "C" ], [ "T", "C" ], [ "A", "C" ], [ "G", "G" ], [ "A", "TT" ], [ "A", "T" ], [ "A", "TG" ], [ "G", "C" ], [ "T", "AA" ], [ "T", "CC" ], [ "A", "CC" ], [ "AA", "AA" ], [ "AG", "G" ], [ "AG", "C" ], [ "A", "TC" ], [ "TT", "C" ], [ "AA", "G" ], [ "TT", "TT" ], [ "TG", "C" ], [ "TG", "G" ], [ "AA", "C" ], [ "TT", "G" ], [ "T", "AG" ], [ "T", "AC" ], [ "CC", "C" ], [ "T", "ATT" ], [ "TG", "GG" ], [ "AG", "AA" ], [ "T", "AT" ], [ "AG", "GG" ], [ "TT", "TC" ], [ "AG", "GC" ], [ "AG", "CC" ], [ "TG", "TG" ], [ "AT", "AA" ], [ "ATT", "C" ], [ "TT", "GG" ], [ "AC", "AC" ], [ "AA", "GG" ], [ "TC", "TC" ], [ "TCC", "C" ], [ "T", "ATG" ], [ "TT", "TG" ], [ "TT", "CC" ], [ "AG", "AG" ], [ "AA", "AC" ], [ "ATG", "G" ], [ "AG", "TG" ], [ "ACC", "C" ], [ "AG", "AC" ], [ "TG", "CC" ], [ "ATT", "G" ], [ "AT", "CC" ], [ "ATG", "C" ], [ "AT", "AC" ], [ "TC", "AC" ], [ "TC", "TG" ], [ "TT", "AA" ], [ "TG", "AA" ], [ "TG", "GC" ], [ "TT", "GC" ], [ "T", "ATC" ], [ "TAA", "G" ], [ "TAA", "C" ], [ "AA", "AG" ], [ "GG", "G" ], [ "AA", "GC" ], [ "GG", "C" ], [ "TT", "AC" ], [ "AT", "AT" ], [ "T", "AGC" ], [ "T", "ACC" ], [ "AA", "CC" ], [ "AA", "TG" ], [ "T", "AGG" ], [ "AT", "ATT" ], [ "G", "CC" ], [ "AG", "TC" ], [ "TT", "TTC" ], [ "TG", "AC" ], [ "AAAA", "C" ], [ "AA", "TC" ], [ "TT", "TAA" ], [ "AAAA", "G" ], [ "AT", "AG" ], [ "TG", "TC" ], [ "TT", "ATT" ], [ "TTTT", "G" ], [ "TG", "AG" ], [ "AA", "TT" ], [ "AA", "ATT" ], [ "AC", "AG" ], [ "TT", "TCC" ], [ "AA", "TAA" ], [ "TC", "AG" ], [ "AGG", "CC" ], [ "AA", "ATG" ], [ "TGGG", "C" ], [ "AC", "TC" ], [ "AC", "G" ], [ "ATT", "TC" ], [ "AC", "TG" ], [ "TT", "AG" ], [ "TGG", "CC" ], [ "AT", "ATG" ], [ "AC", "AA" ], [ "ATC", "TC" ], [ "TATT", "C" ], [ "TG", "TAA" ], [ "AC", "TT" ], [ "ATT", "CC" ], [ "AA", "AT" ], [ "ATG", "CC" ], [ "AAAA", "AAAA" ], [ "TT", "CCC" ], [ "TT", "TGC" ], [ "TT", "TAG" ], [ "TCC", "CC" ], [ "TGGG", "G" ], [ "TTC", "TC" ], [ "TAA", "AA" ], [ "ACC", "CC" ], [ "AG", "AAG" ], [ "ACC", "TC" ], [ "AGGG", "C" ], [ "TTTT", "TTTT" ], [ "AC", "ATT" ], [ "AG", "ATG" ], [ "GG", "CC" ], [ "G", "TG" ], [ "AAG", "CC" ], [ "AT", "AAAA" ], [ "AGG", "AG" ], [ "ATG", "GC" ], [ "ATT", "AC" ], [ "TT", "TAC" ], [ "ATT", "GC" ], [ "TGG", "AG" ], [ "TC", "AA" ], [ "AC", "TGC" ], [ "TATT", "G" ], [ "AAGG", "C" ], [ "TT", "TGG" ], [ "TTG", "CC" ], [ "AA", "ATC" ], [ "TTGG", "C" ], [ "GG", "GC" ], [ "AT", "ATC" ], [ "AG", "AAC" ], [ "AC", "TCC" ], [ "AT", "CCC" ], [ "AT", "AAC" ], [ "AA", "TTC" ], [ "AGGG", "G" ], [ "AA", "AGC" ], [ "AG", "AGC" ], [ "TC", "G" ], [ "ATT", "TT" ], [ "TG", "AGG" ], [ "ATG", "GG" ], [ "AA", "AGG" ], [ "TTGG", "G" ], [ "AG", "AAAA" ], [ "TC", "TCC" ], [ "AT", "AAG" ], [ "ATT", "TG" ], [ "TAT", "CC" ], [ "ACC", "AC" ], [ "TC", "TTC" ], [ "ATC", "AC" ], [ "AG", "AGG" ], [ "TT", "TATT" ], [ "TG", "TAG" ], [ "AG", "TAA" ], [ "AT", "AGC" ], [ "AG", "CCC" ], [ "AG", "ATT" ], [ "AG", "TT" ], [ "GG", "GG" ], [ "TG", "TGC" ], [ "TG", "TGG" ], [ "TATG", "C" ], [ "TG", "CCC" ], [ "TG", "AAG" ], [ "TG", "TT" ], [ "TC", "TT" ], [ "AG", "ACC" ], [ "TC", "ATT" ], [ "TC", "TGC" ], [ "AAGG", "G" ], [ "AG", "TTC" ], [ "AA", "ACC" ], [ "AC", "ATG" ], [ "AC", "TTC" ], [ "AC", "AGC" ], [ "AA", "TAG" ], [ "AA", "TAC" ], [ "TG", "ATG" ], [ "AA", "TGC" ], [ "AA", "TGG" ], [ "TG", "ATT" ], [ "TC", "AGC" ], [ "TG", "TTC" ], [ "TG", "AGC" ], [ "TC", "TTG" ], [ "TC", "AGG" ], [ "TTTT", "CC" ], [ "TT", "AAAA" ], [ "TATG", "G" ], [ "AT", "ACC" ], [ "AG", "TGG" ], [ "TCCC", "AGC" ], [ "AG", "ATC" ], [ "AA", "CCC" ], [ "T", "ACCC" ], [ "TG", "AAC" ], [ "AA", "TCC" ], [ "TG", "TTG" ], [ "TT", "AAG" ], [ "TAG", "CC" ], [ "TT", "AAC" ], [ "TT", "ATG" ], [ "AT", "AGG" ], [ "AA", "TTG" ], [ "TT", "TAT" ], [ "AG", "TGC" ], [ "TG", "TCC" ], [ "TAG", "GC" ], [ "TC", "TAA" ], [ "A", "Ċ" ], [ "TC", "ATC" ], [ "TC", "AAG" ], [ "TC", "ACC" ], [ "AA", "TTTT" ], [ "TC", "TTTT" ], [ "TT", "ATC" ], [ "AGAA", "GC" ], [ "AGTG", "GC" ], [ "ATT", "TTC" ], [ "CC", "CC" ], [ "AAG", "AC" ], [ "AG", "TAG" ], [ "ATT", "GG" ], [ "TC", "ATG" ], [ "AA", "ATAA" ] ] } }