CodonT5_input / added_tokens.json
ZYMScott's picture
Upload tokenizer
1072e2b verified
raw
history blame
15.9 kB
{
"<0813-124 phase II>": 32352,
"<090008>": 32575,
"<1.2.20>": 32501,
"<15H5D-4a>": 32221,
"<1692>": 32349,
"<174/2>": 32395,
"<17Nkhm-UP2>": 32196,
"<18EpOKYJ>": 32596,
"<200023>": 32254,
"<21A>": 32508,
"<24.1>": 32388,
"<301>": 32638,
"<3347689II>": 32510,
"<3937>": 32289,
"<477>": 32419,
"<49125>": 32341,
"<5D>": 32268,
"<640>": 32512,
"<670-83>": 32261,
"<675>": 32187,
"<6D370>": 32519,
"<757>": 32534,
"<78-1320>": 32399,
"<7A>": 32312,
"<80813>": 32114,
"<>": 32522,
"<A1122>": 32407,
"<A1>": 32239,
"<A2-F21>": 32595,
"<A212-S19-A16>": 32493,
"<A23BA>": 32118,
"<A398-S21-F17>": 32392,
"<ACYC.E9L>": 32411,
"<ANU1>": 32168,
"<AR>": 32321,
"<ARAD>": 32160,
"<AR_0082>": 32544,
"<AS9>": 32104,
"<ATCC 13028>": 32106,
"<ATCC 39140>": 32502,
"<ATCC 43969>": 32339,
"<ATCC 51329>": 32307,
"<ATCC BAA-895>": 32630,
"<AVS0177>": 32225,
"<Annandia>": 32489,
"<ArsBeeUS>": 32401,
"<Arsenophonus apicola>": 32348,
"<Arsenophonus endosymbiont of Aleurodicus dispersus>": 32491,
"<Arsenophonus endosymbiont of Aphis craccivora>": 32266,
"<Arsenophonus nasoniae>": 32557,
"<Arsenophonus>": 32327,
"<Atlantibacter hermannii>": 32238,
"<Atlantibacter subterranea>": 32309,
"<Atlantibacter>": 32331,
"<BDA62-3>": 32621,
"<BHKY>": 32287,
"<BO-1>": 32245,
"<BPEN>": 32505,
"<BVAF>": 32573,
"<BY21311>": 32450,
"<Bacteria>": 32177,
"<Blochmannia endosymbiont of Camponotus (Colobopsis) obliquus>": 32448,
"<Blochmannia endosymbiont of Camponotus modoc>": 32328,
"<Blochmannia endosymbiont of Camponotus nipponensis>": 32387,
"<Blochmannia endosymbiont of Colobopsis nipponica>": 32315,
"<Blochmannia endosymbiont of Polyrhachis (Hedomyrma) turneri>": 32549,
"<Blochmannia>": 32369,
"<Brenneria goodwinii>": 32391,
"<Brenneria izadpanahii>": 32498,
"<Brenneria nigrifluens>": 32188,
"<Brenneria rubrifaciens>": 32606,
"<Brenneria ulupoensis>": 32201,
"<Brenneria>": 32425,
"<Bruguierivoracaceae>": 32364,
"<Buchnera aphidicola>": 32222,
"<Buchnera>": 32365,
"<Budviciaceae>": 32605,
"<Buttiauxella agrestis>": 32220,
"<Buttiauxella ferragutiae>": 32480,
"<Buttiauxella>": 32190,
"<C-002>": 32184,
"<C-005>": 32426,
"<C-006>": 32154,
"<C-050>": 32250,
"<C-7-2>": 32276,
"<CAVP490>": 32477,
"<CB>": 32452,
"<CCA6>": 32358,
"<CCUG 66741>": 32384,
"<CF-458>": 32284,
"<CFBP 3304>": 32371,
"<CFCC10813>": 32585,
"<CFPB1430>": 32470,
"<CFS1934>": 32507,
"<CQ10>": 32538,
"<CS-931>": 32146,
"<Candidatus Arsenophonus lipoptenae>": 32153,
"<Candidatus Blochmannia pennsylvanicus>": 32527,
"<Candidatus Blochmannia vafer>": 32255,
"<Candidatus Doolittlea endobia>": 32482,
"<Candidatus Fukatsuia symbiotica>": 32193,
"<Candidatus Gullanella endobia>": 32579,
"<Candidatus Hoaglandella endobia>": 32366,
"<Candidatus Mikella endobia>": 32342,
"<Candidatus Purcelliella pentastirinorum>": 32161,
"<Candidatus Riesia pediculicola>": 32445,
"<Candidatus Tachikawaea gelatinosa>": 32577,
"<Candidatus Westeberhardia cardiocondylae>": 32376,
"<Candidatus blochmannia chromaiodes>": 32570,
"<Candidatus ishikawaella capsulata>": 32155,
"<Candidatus moranella endobia>": 32159,
"<Candidatus sodalis pierantonius>": 32580,
"<Candidatus>": 32617,
"<Candidatus_antoea carbekii>": 32112,
"<Candidatus_ukatsuia>": 32240,
"<Cedecea lapagei>": 32624,
"<Cedecea neteri>": 32313,
"<Cedecea>": 32140,
"<Cf7303>": 32275,
"<Chania multitudinisentens>": 32337,
"<Chania>": 32282,
"<Citrobacter amalonaticus>": 32454,
"<Citrobacter arsenatis>": 32451,
"<Citrobacter braakii>": 32633,
"<Citrobacter freundii>": 32123,
"<Citrobacter koseri>": 32567,
"<Citrobacter portucalensis>": 32497,
"<Citrobacter rodentium>": 32363,
"<Citrobacter sedlakii>": 32340,
"<Citrobacter tructae>": 32598,
"<Citrobacter werkmanii>": 32403,
"<Citrobacter>": 32235,
"<Cp2>": 32380,
"<Cronobacter condimenti>": 32122,
"<Cronobacter dublinensis>": 32174,
"<Cronobacter malonaticus>": 32456,
"<Cronobacter muytjensii>": 32214,
"<Cronobacter sakazakii>": 32300,
"<Cronobacter universalis>": 32121,
"<Cronobacter>": 32209,
"<DH-S01>": 32243,
"<DSM 101947>": 32195,
"<DSM 102253>": 32421,
"<DSM 107547>": 32229,
"<DSM 15199>": 32476,
"<DSM 16636>": 32552,
"<DSM 16690>": 32145,
"<DSM 22758>": 32613,
"<DSM 32899>": 32548,
"<DSM 4481>": 32413,
"<DSM 4576>": 32111,
"<DSM 9389>": 32286,
"<Dickeya aquatica>": 32463,
"<Dickeya chrysanthemi>": 32137,
"<Dickeya dadantii>": 32223,
"<Dickeya dianthicola>": 32131,
"<Dickeya fangzhongdai>": 32267,
"<Dickeya parazeae>": 32461,
"<Dickeya poaceiphila>": 32152,
"<Dickeya solani>": 32170,
"<Dickeya zeae>": 32372,
"<Dickeya>": 32447,
"<Doolittlea>": 32215,
"<Duffyella gerundensis>": 32306,
"<Duffyella>": 32253,
"<EBP3064>": 32274,
"<EN-119>": 32562,
"<ERMR1:05>": 32490,
"<Eb661>": 32157,
"<Ech1591>": 32485,
"<Ech586>": 32346,
"<Ech703>": 32178,
"<Edwardsiella anguillarum>": 32234,
"<Edwardsiella hoshinae>": 32468,
"<Edwardsiella ictaluri>": 32410,
"<Edwardsiella piscicida>": 32280,
"<Edwardsiella tarda>": 32513,
"<Edwardsiella>": 32574,
"<Enterobacter asburiae>": 32236,
"<Enterobacter bugandensis>": 32607,
"<Enterobacter chengduensis>": 32555,
"<Enterobacter cloacae>": 32107,
"<Enterobacter hormaechei>": 32304,
"<Enterobacter huaxiensis>": 32294,
"<Enterobacter ludwigii>": 32571,
"<Enterobacter mori>": 32281,
"<Enterobacter oligotrophicus>": 32431,
"<Enterobacter pseudoroggenkampii>": 32465,
"<Enterobacter roggenkampii>": 32354,
"<Enterobacter sichuanensis>": 32241,
"<Enterobacter soli>": 32279,
"<Enterobacter>": 32224,
"<Enterobacterales>": 32635,
"<Enterobacteriaceae endosymbiont of Macroplea mutica>": 32486,
"<Enterobacteriaceae endosymbiont of Plateumaris pusilla>": 32433,
"<Enterobacteriaceae endosymbiont of_acroplea mutica>": 32409,
"<Enterobacteriaceae>": 32400,
"<EpK1/15>": 32386,
"<ErCicurvipes>": 32198,
"<Erwinia amylovora>": 32335,
"<Erwinia billingiae>": 32217,
"<Erwinia persicina>": 32568,
"<Erwinia pyrifoliae>": 32414,
"<Erwinia rhapontici>": 32408,
"<Erwinia sorbitola>": 32179,
"<Erwinia tasmaniensis>": 32189,
"<Erwinia tracheiphila>": 32277,
"<Erwinia>": 32546,
"<Erwiniaceae>": 32228,
"<Escherichia albertii>": 32484,
"<Escherichia coli >": 32212,
"<Escherichia fergusonii>": 32244,
"<Escherichia marmotae>": 32515,
"<Escherichia>": 32460,
"<Et1/99>": 32142,
"<FDAARGOS 1447>": 32231,
"<FDAARGOS_1499>": 32626,
"<FDAARGOS_165>": 32175,
"<FDAARGOS_186>": 32439,
"<FDAARGOS_392>": 32139,
"<FDAARGOS_408>": 32590,
"<FDAARGOS_500>": 32298,
"<FDAARGOS_616>": 32165,
"<FDAARGOS_730>": 32130,
"<FDAARGOS_926>": 32103,
"<FDAARGOS_940>": 32459,
"<FIN>": 32471,
"<FN20211>": 32581,
"<FRB141>": 32164,
"<FRB97>": 32351,
"<FRM16>": 32345,
"<FY-07>": 32338,
"<FY158>": 32148,
"<G5>": 32591,
"<G6>": 32499,
"<Gammaproteobacteria>": 32124,
"<Gibbsiella quercinecans>": 32397,
"<Gibbsiella>": 32584,
"<Gullanella>": 32258,
"<H4-C11>": 32604,
"<HI4320>": 32495,
"<HS11286>": 32323,
"<HS1>": 32488,
"<HYN0051>": 32329,
"<Hafnia alvei>": 32305,
"<Hafnia paralvei>": 32405,
"<Hafnia>": 32257,
"<Hafniaceae>": 32462,
"<Hoaglandella>": 32218,
"<IFB5427>": 32569,
"<IP32953>": 32105,
"<Iran 50>": 32582,
"<Ishikawaella>": 32554,
"<J780>": 32260,
"<JH01>": 32428,
"<JK2.1>": 32285,
"<JZ-GX1>": 32347,
"<JZB2120001>": 32389,
"<Jejubacter calystegiae>": 32210,
"<Jejubacter>": 32374,
"<K-12 substr. MG1655>": 32496,
"<K61>": 32110,
"<KACC 18508>": 32457,
"<KC-Pc-HB1>": 32566,
"<KMM821>": 32379,
"<KSNA2>": 32444,
"<KUDC3025>": 32186,
"<Ka37751>": 32472,
"<Kalro>": 32207,
"<Klebsiella aerogenes>": 32602,
"<Klebsiella africana>": 32319,
"<Klebsiella electrica>": 32283,
"<Klebsiella huaxiensis>": 32353,
"<Klebsiella michiganensis>": 32441,
"<Klebsiella oxytoca>": 32128,
"<Klebsiella pasteurii>": 32597,
"<Klebsiella pneumoniae>": 32265,
"<Klebsiella quasipneumoniae>": 32500,
"<Klebsiella variicola>": 32129,
"<Klebsiella>": 32615,
"<Kluyvera ascorbata>": 32362,
"<Kluyvera intermedia>": 32290,
"<Kluyvera>": 32611,
"<Kosakonia arachidis>": 32523,
"<Kosakonia cowanii>": 32325,
"<Kosakonia oryzae>": 32100,
"<Kosakonia oryzendophytica>": 32134,
"<Kosakonia pseudosacchari>": 32514,
"<Kosakonia radicincitans>": 32194,
"<Kosakonia sacchari>": 32125,
"<Kosakonia>": 32326,
"<KqPF26>": 32623,
"<L6>": 32603,
"<LEMB11>": 32518,
"<LF7a>": 32334,
"<LH84-a>": 32115,
"<LJ1>": 32436,
"<LMG 23823>": 32601,
"<LMG 23826>": 32543,
"<LMG 24197>": 32216,
"<LMG 24199>": 32310,
"<LMG 26250>": 32385,
"<LMG24200>": 32503,
"<LST-1>": 32430,
"<LT-1>": 32297,
"<LT2>": 32443,
"<LTYR-11Z>": 32422,
"<LY-1>": 32608,
"<Leclercia adecarboxylata>": 32564,
"<Leclercia pneumoniae>": 32269,
"<Leclercia>": 32204,
"<Lelliottia steviae>": 32539,
"<Lelliottia>": 32442,
"<Leminorella richardii>": 32427,
"<Leminorella>": 32271,
"<Limnobaculum parvum>": 32317,
"<Limnobaculum zhutongyuii>": 32248,
"<Limnobaculum>": 32226,
"<Lonsdalea britannica>": 32102,
"<Lonsdalea populi>": 32588,
"<Lonsdalea>": 32219,
"<Lsch>": 32143,
"<ME23>": 32199,
"<MS2>": 32589,
"<MiY-A>": 32494,
"<Mikella>": 32357,
"<Mixta gaviniae>": 32506,
"<Mixta hanseatica>": 32509,
"<Mixta intestinalis>": 32185,
"<Mixta>": 32394,
"<Moellerella wisconsensis>": 32246,
"<Moellerella>": 32202,
"<Moranella>": 32453,
"<Morganella morganii>": 32133,
"<Morganella>": 32432,
"<Morganellaceae>": 32318,
"<Mpkobe>": 32619,
"<Musicola paradisiaca>": 32542,
"<Musicola>": 32415,
"<N-5-1>": 32547,
"<N2-1>": 32583,
"<N268-08>": 32301,
"<NA>": 32171,
"<NCPPB 569>": 32356,
"<NCTC 14382>": 32303,
"<NCTC 9529>": 32213,
"<NCTC11466>": 32249,
"<NCTC12003>": 32360,
"<NCTC12148>": 32616,
"<NCTC12151>": 32474,
"<NCTC12284>": 32434,
"<NCTC13188>": 32437,
"<NIBIO1392>": 32181,
"<OLIH>": 32278,
"<Ola 51>": 32404,
"<PA13>": 32355,
"<PCVAL>": 32120,
"<PPO 9019>": 32438,
"<PR-310>": 32609,
"<PRI-2C>": 32233,
"<Pantoea agglomerans>": 32367,
"<Pantoea alfalfae>": 32101,
"<Pantoea alhagi>": 32191,
"<Pantoea ananatis>": 32550,
"<Pantoea deleyi>": 32370,
"<Pantoea dispersa>": 32594,
"<Pantoea eucalypti>": 32299,
"<Pantoea eucrina>": 32640,
"<Pantoea soli>": 32136,
"<Pantoea stewartii>": 32343,
"<Pantoea vagans>": 32627,
"<Pantoea>": 32537,
"<Pectobacteriaceae>": 32576,
"<Pectobacterium aquaticum>": 32176,
"<Pectobacterium aroidearum>": 32536,
"<Pectobacterium atrosepticum>": 32383,
"<Pectobacterium brasiliense>": 32556,
"<Pectobacterium cacticida>": 32205,
"<Pectobacterium carotovorum>": 32429,
"<Pectobacterium colocasium>": 32259,
"<Pectobacterium odoriferum>": 32359,
"<Pectobacterium parmentieri>": 32144,
"<Pectobacterium parvum>": 32563,
"<Pectobacterium polaris>": 32203,
"<Pectobacterium punjabense>": 32636,
"<Pectobacterium quasiaquaticum>": 32149,
"<Pectobacterium wasabiae>": 32417,
"<Pectobacterium>": 32593,
"<Photorhabdus akhurstii>": 32237,
"<Photorhabdus asymbiotica>": 32135,
"<Photorhabdus laumondii>": 32492,
"<Photorhabdus thracensis>": 32529,
"<Photorhabdus>": 32200,
"<Phytobacter diazotrophicus>": 32166,
"<Phytobacter>": 32147,
"<Plesiomonas shigelloides>": 32560,
"<Plesiomonas>": 32138,
"<Pluralibacter gergoviae>": 32586,
"<Pluralibacter>": 32578,
"<Pragia fontium>": 32475,
"<Pragia>": 32612,
"<Profftia>": 32406,
"<Proteus hauseri>": 32117,
"<Proteus mirabilis>": 32545,
"<Proteus penneri>": 32332,
"<Proteus terrae>": 32116,
"<Proteus>": 32610,
"<Providencia alcalifaciens>": 32520,
"<Providencia hangzhouensis>": 32629,
"<Providencia heimbachae>": 32467,
"<Providencia huaxiensis>": 32631,
"<Providencia rettgeri>": 32639,
"<Providencia stuartii>": 32251,
"<Providencia>": 32292,
"<Pseudocitrobacter corydidari>": 32504,
"<Pseudocitrobacter>": 32464,
"<Pseudomonadota>": 32420,
"<Purcelliella>": 32558,
"<RB-25>": 32525,
"<Rahnella aceris>": 32232,
"<Rahnella sikkimica>": 32416,
"<Rahnella victoriana>": 32440,
"<Rahnella>": 32390,
"<Raoultella planticola>": 32402,
"<Raoultella terrigena>": 32302,
"<Raoultella>": 32478,
"<Riesia>": 32158,
"<S07-698>": 32628,
"<S178-2>": 32192,
"<S1>": 32625,
"<S2-A69>": 32182,
"<SCPM-O-B-7604>": 32272,
"<SE6-1>": 32382,
"<SGAir0282>": 32458,
"<SII>": 32565,
"<SK>": 32126,
"<SNU WT2>": 32197,
"<SOPE>": 32273,
"<SRCM103226>": 32163,
"<SS95>": 32256,
"<SWHEFF_49>": 32109,
"<Sakai substr. RIMD 0509952>": 32263,
"<Salmonella bongori>": 32173,
"<Salmonella enterica>": 32435,
"<Salmonella>": 32264,
"<Sample 167>": 32291,
"<Sb-24>": 32533,
"<Scandinavium goeteborgense>": 32333,
"<Scandinavium>": 32141,
"<Schneideria>": 32618,
"<Serratia entomophila>": 32424,
"<Serratia ficaria>": 32614,
"<Serratia fonticola>": 32230,
"<Serratia inhibens>": 32368,
"<Serratia liquefaciens>": 32344,
"<Serratia nematodiphila>": 32620,
"<Serratia plymuthica>": 32156,
"<Serratia proteamaculans>": 32336,
"<Serratia quinivorans>": 32270,
"<Serratia rhizosphaerae>": 32418,
"<Serratia rubidaea>": 32211,
"<Serratia surfactantfaciens>": 32592,
"<Serratia symbiotica>": 32167,
"<Serratia ureilytica>": 32293,
"<Serratia>": 32541,
"<Shigella dysenteriae>": 32132,
"<Shigella flexneri>": 32516,
"<Shigella sonnei>": 32599,
"<Shigella>": 32481,
"<Shimwellia blattae>": 32469,
"<Shimwellia>": 32423,
"<Siccibacter colletis>": 32393,
"<Siccibacter>": 32183,
"<Sodalis endosymbiont of Henestaris halophilus>": 32487,
"<Sodalis glossinidius>": 32551,
"<Sodalis praecaptivus>": 32455,
"<Sodalis>": 32151,
"<SyEd1>": 32320,
"<Symbiopectobacterium purcellii>": 32483,
"<Symbiopectobacterium>": 32350,
"<T6>": 32377,
"<TA9759>": 32473,
"<TBY01>": 32361,
"<THO-011>": 32113,
"<TTO1>": 32540,
"<Tachikawaea>": 32528,
"<Tatumella citrea>": 32330,
"<Tatumella>": 32378,
"<Trabulsiella odontotermitis>": 32288,
"<Trabulsiella>": 32375,
"<US>": 32622,
"<USDA-ARS-USMARC-60222>": 32526,
"<USDA>": 32446,
"<UwTKB>": 32396,
"<VKH10>": 32553,
"<W65>": 32206,
"<WCHECl-C4 = WCHECh050004>": 32632,
"<WCHKl090001>": 32150,
"<WCHPr000369>": 32479,
"<WPP14>": 32242,
"<Westeberhardia>": 32308,
"<Wigglesworthia glossinidia>": 32535,
"<Wigglesworthia>": 32227,
"<Winslowiella toletana>": 32314,
"<Winslowiella>": 32252,
"<XL123>": 32637,
"<XL95>": 32247,
"<Xenorhabdus budapestensis>": 32466,
"<Xenorhabdus doucetiae>": 32316,
"<Xenorhabdus griffiniae>": 32532,
"<Xenorhabdus hominickii>": 32559,
"<Xenorhabdus nematophila>": 32531,
"<Xenorhabdus poinarii>": 32449,
"<Xenorhabdus>": 32600,
"<YD25>": 32561,
"<YF8>": 32169,
"<YRA>": 32381,
"<YSD YN2>": 32521,
"<Y_sim_228>": 32524,
"<Yersinia aldovae>": 32208,
"<Yersinia alsatica>": 32162,
"<Yersinia canariae>": 32295,
"<Yersinia hibernica>": 32412,
"<Yersinia intermedia>": 32398,
"<Yersinia mollaretii>": 32511,
"<Yersinia pestis>": 32572,
"<Yersinia pseudotuberculosis>": 32517,
"<Yersinia rohdei>": 32180,
"<Yersinia ruckeri>": 32587,
"<Yersinia similis>": 32530,
"<Yersinia>": 32322,
"<Yersiniaceae>": 32324,
"<ZJ-FGZX1>": 32373,
"<ZN2>": 32127,
"<[Enterobacter] lignolyticus>": 32119,
"<[Pantoea] beijingensis>": 32311,
"<morsitans>": 32634,
"<obscurior>": 32296,
"<secondary endosymbiont of Ctenarytaina eucalypti>": 32262,
"<secondary endosymbiont of Heteropsylla cubana>": 32108,
"<secondary endosymbiont of Trabutina mannipara>": 32172
}