tryn-mini-7m / tokenizer /vocab.json
LNTTushar's picture
Upload folder using huggingface_hub
7fab2c5 verified
raw
history blame
5.79 kB
{
"[PAD]": 0,
"[UNK]": 1,
"[CLS]": 2,
"[SEP]": 3,
"[MASK]": 4,
"[BOS]": 5,
"[EOS]": 6,
".</w>": 7,
"is</w>": 8,
"the</w>": 9,
"are</w>": 10,
"weather</w>": 11,
"technology</w>": 12,
"i</w>": 13,
"requires</w>": 14,
"reading</w>": 15,
"for</w>": 16,
"society</w>": 17,
"love</w>": 18,
"it</w>": 19,
"tastes</w>": 20,
"in</w>": 21,
"mind</w>": 22,
"pizza</w>": 23,
"science</w>": 24,
"music</w>": 25,
"programming</w>": 26,
"creates</w>": 27,
"food</w>": 28,
"improves</w>": 29,
"with</w>": 30,
"great</w>": 31,
"enthusiasm</w>": 32,
"enjoys</w>": 33,
"very</w>": 34,
"much</w>": 35,
"transportation</w>": 36,
"using</w>": 37,
"transport</w>": 38,
"today</w>": 39,
"today's</w>": 40,
"delicious</w>": 41,
"benefits</w>": 42,
"from</w>": 43,
"because</w>": 44,
"and</w>": 45,
"tasty</w>": 46,
"a</w>": 47,
"history</w>": 48,
"pasta</w>": 49,
"mathematics</w>": 50,
"expands</w>": 51,
"helps</w>": 52,
"expand</w>": 53,
"your</w>": 54,
"eating</w>": 55,
"learning</w>": 56,
"to</w>": 57,
"learn</w>": 58,
",</w>": 59,
"you</w>": 60,
"need</w>": 61,
"art</w>": 62,
"physics</w>": 63,
"mountain</w>": 64,
"books</w>": 65,
"languages</w>": 66,
"cat</w>": 67,
"travel</w>": 68,
"broadens</w>": 69,
"perspective</w>": 70,
"adventure</w>": 71,
"experiences</w>": 72,
"artistic</w>": 73,
"expression</w>": 74,
"creative</w>": 75,
"financial</w>": 76,
"markets</w>": 77,
"volatile</w>": 78,
"cuisine</w>": 79,
"ancient</w>": 80,
"fascinating</w>": 81,
"modern</w>": 82,
"evolves</w>": 83,
"quickly</w>": 84,
"cats</w>": 85,
"independent</w>": 86,
"animals</w>": 87,
"dogs</w>": 88,
"loyal</w>": 89,
"pets</w>": 90,
"healthy</w>": 91,
"wellness</w>": 92,
"space</w>": 93,
"exploration</w>": 94,
"advances</w>": 95,
"exercise</w>": 96,
"health</w>": 97,
"concerts</w>": 98,
"entertaining</w>": 99,
"sports</w>": 100,
"enhance</w>": 101,
"fitness</w>": 102,
"mathematical</w>": 103,
"equations</w>": 104,
"precise</w>": 105,
"logic</w>": 106,
"enjoy</w>": 107,
"needs</w>": 108,
"reasoning</w>": 109,
"changing</w>": 110,
"rapidly</w>": 111,
"brings</w>": 112,
"joy</w>": 113,
"ocean</w>": 114,
"waves</w>": 115,
"powerful</w>": 116,
"beauty</w>": 117,
"computer</w>": 118,
"networks</w>": 119,
"interconnected</w>": 120,
"diverse</w>": 121,
"climbing</w>": 122,
"equipment</w>": 123,
"explains</w>": 124,
"phenomena</w>": 125,
"research</w>": 126,
"discovers</w>": 127,
"truth</w>": 128,
"provide</w>": 129,
"knowledge</w>": 130,
"education</w>": 131,
"offers</w>": 132,
"wisdom</w>": 133,
"sits</w>": 134,
"on</w>": 135,
"mat</w>": 136,
"quantum</w>": 137,
"complex</w>": 138,
"fast</w>": 139,
"convenient</w>": 140,
"fish</w>": 141,
"bicycle</w>": 142,
"motorcycle</w>": 143,
"slow</w>": 144,
"economical</w>": 145,
"car</w>": 146,
"efficient</w>": 147,
"innovative</w>": 148,
"dangerous</w>": 149,
"essays</w>": 150,
"fiction</w>": 151,
"useful</w>": 152,
"practice</w>": 153,
"stories</w>": 154,
"reliable</w>": 155,
"hard</w>": 156,
"work</w>": 157,
"persistence</w>": 158,
"important</w>": 159,
"focus</w>": 160,
"bus</w>": 161,
"patience</w>": 162,
"boat</w>": 163,
"articles</w>": 164,
"beneficial</w>": 165,
"revolutionary</w>": 166,
"awful</w>": 167,
"exercising</w>": 168,
"poetry</w>": 169,
"airplane</w>": 170,
"novels</w>": 171,
"dancing</w>": 172,
"train</w>": 173,
"painting</w>": 174,
"singing</w>": 175,
"harmful</w>": 176,
"sarah</w>": 177,
"river</w>": 178,
"emma</w>": 179,
"salty</w>": 180,
"flying</w>": 181,
"working</w>": 182,
"bland</w>": 183,
"writing</w>": 184,
"salad</w>": 185,
"concentration</w>": 186,
"sunny</w>": 187,
"resting</w>": 188,
"dedication</w>": 189,
"cold</w>": 190,
"cloudy</w>": 191,
"terrible</w>": 192,
"david</w>": 193,
"lisa</w>": 194,
"walking</w>": 195,
"playing</w>": 196,
"sitting</w>": 197,
"anna</w>": 198,
"michael</w>": 199,
"hot</w>": 200,
"pleasant</w>": 201,
"swimming</w>": 202,
"vegetables</w>": 203,
"beach</w>": 204,
"spicy</w>": 205,
"robert</w>": 206,
"james</w>": 207,
"windy</w>": 208,
"lion</w>": 209,
"rich</w>": 210,
"fresh</w>": 211,
"studying</w>": 212,
"mary</w>": 213,
"bear</w>": 214,
"bitter</w>": 215,
"sleeping</w>": 216,
"sour</w>": 217,
"cooking</w>": 218,
"forest</w>": 219,
"horse</w>": 220,
"john</w>": 221,
"chemistry</w>": 222,
"bread</w>": 223,
"tiger</w>": 224,
"street</w>": 225,
"meat</w>": 226,
"field</w>": 227,
"fruit</w>": 228,
"garden</w>": 229,
"bird</w>": 230,
"elephant</w>": 231,
"house</w>": 232,
"cake</w>": 233,
"beautiful</w>": 234,
"fox</w>": 235,
"dog</w>": 236,
"sweet</w>": 237,
"park</w>": 238,
"rainy</w>": 239,
"city</w>": 240,
"soup</w>": 241,
"village</w>": 242,
"jumping</w>": 243,
"rabbit</w>": 244,
"rice</w>": 245,
"running</w>": 246,
"wolf</w>": 247,
" ": 248,
"'": 249,
",": 250,
".": 251,
"a": 252,
"b": 253,
"c": 254,
"d": 255,
"e": 256,
"f": 257,
"g": 258,
"h": 259,
"i": 260,
"j": 261,
"k": 262,
"l": 263,
"m": 264,
"n": 265,
"o": 266,
"p": 267,
"q": 268,
"r": 269,
"s": 270,
"t": 271,
"u": 272,
"v": 273,
"w": 274,
"x": 275,
"y": 276,
"z": 277
}