Base-mini / vocab.json
QuantaSparkLabs's picture
Push to HF
59f1228 verified
Raw
History Blame Contribute Delete
6.5 kB
{
"[PAD]": 0,
"[UNK]": 1,
"[SEP]": 2,
"[CLS]": 3,
"[MASK]": 4,
"[BOS]": 5,
"[EOS]": 6,
"!": 7,
"\"": 8,
"#": 9,
"$": 10,
"%": 11,
"&": 12,
"'": 13,
"(": 14,
")": 15,
"*": 16,
"+": 17,
",": 18,
"-": 19,
".": 20,
"/": 21,
"0": 22,
"1": 23,
"2": 24,
"3": 25,
"4": 26,
"5": 27,
"6": 28,
"7": 29,
"8": 30,
"9": 31,
":": 32,
";": 33,
"<": 34,
"=": 35,
">": 36,
"?": 37,
"@": 38,
"A": 39,
"B": 40,
"C": 41,
"D": 42,
"E": 43,
"F": 44,
"G": 45,
"H": 46,
"I": 47,
"J": 48,
"K": 49,
"L": 50,
"M": 51,
"N": 52,
"O": 53,
"P": 54,
"Q": 55,
"R": 56,
"S": 57,
"T": 58,
"U": 59,
"V": 60,
"W": 61,
"X": 62,
"Y": 63,
"Z": 64,
"[": 65,
"\\": 66,
"]": 67,
"^": 68,
"_": 69,
"`": 70,
"a": 71,
"b": 72,
"c": 73,
"d": 74,
"e": 75,
"f": 76,
"g": 77,
"h": 78,
"i": 79,
"j": 80,
"k": 81,
"l": 82,
"m": 83,
"n": 84,
"o": 85,
"p": 86,
"q": 87,
"r": 88,
"s": 89,
"t": 90,
"u": 91,
"v": 92,
"w": 93,
"x": 94,
"y": 95,
"z": 96,
"{": 97,
"|": 98,
"}": 99,
"~": 100,
"¡": 101,
"¢": 102,
"£": 103,
"¤": 104,
"¥": 105,
"¦": 106,
"§": 107,
"¨": 108,
"©": 109,
"ª": 110,
"«": 111,
"¬": 112,
"®": 113,
"¯": 114,
"°": 115,
"±": 116,
"²": 117,
"³": 118,
"´": 119,
"µ": 120,
"¶": 121,
"·": 122,
"¸": 123,
"¹": 124,
"º": 125,
"»": 126,
"¼": 127,
"½": 128,
"¾": 129,
"¿": 130,
"À": 131,
"Á": 132,
"Â": 133,
"Ã": 134,
"Ä": 135,
"Å": 136,
"Æ": 137,
"Ç": 138,
"È": 139,
"É": 140,
"Ê": 141,
"Ë": 142,
"Ì": 143,
"Í": 144,
"Î": 145,
"Ï": 146,
"Ð": 147,
"Ñ": 148,
"Ò": 149,
"Ó": 150,
"Ô": 151,
"Õ": 152,
"Ö": 153,
"×": 154,
"Ø": 155,
"Ù": 156,
"Ú": 157,
"Û": 158,
"Ü": 159,
"Ý": 160,
"Þ": 161,
"ß": 162,
"à": 163,
"á": 164,
"â": 165,
"ã": 166,
"ä": 167,
"å": 168,
"æ": 169,
"ç": 170,
"è": 171,
"é": 172,
"ê": 173,
"ë": 174,
"ì": 175,
"í": 176,
"î": 177,
"ï": 178,
"ð": 179,
"ñ": 180,
"ò": 181,
"ó": 182,
"ô": 183,
"õ": 184,
"ö": 185,
"÷": 186,
"ø": 187,
"ù": 188,
"ú": 189,
"û": 190,
"ü": 191,
"ý": 192,
"þ": 193,
"ÿ": 194,
"Ā": 195,
"ā": 196,
"Ă": 197,
"ă": 198,
"Ą": 199,
"ą": 200,
"Ć": 201,
"ć": 202,
"Ĉ": 203,
"ĉ": 204,
"Ċ": 205,
"ċ": 206,
"Č": 207,
"č": 208,
"Ď": 209,
"ď": 210,
"Đ": 211,
"đ": 212,
"Ē": 213,
"ē": 214,
"Ĕ": 215,
"ĕ": 216,
"Ė": 217,
"ė": 218,
"Ę": 219,
"ę": 220,
"Ě": 221,
"ě": 222,
"Ĝ": 223,
"ĝ": 224,
"Ğ": 225,
"ğ": 226,
"Ġ": 227,
"ġ": 228,
"Ģ": 229,
"ģ": 230,
"Ĥ": 231,
"ĥ": 232,
"Ħ": 233,
"ħ": 234,
"Ĩ": 235,
"ĩ": 236,
"Ī": 237,
"ī": 238,
"Ĭ": 239,
"ĭ": 240,
"Į": 241,
"į": 242,
"İ": 243,
"ı": 244,
"IJ": 245,
"ij": 246,
"Ĵ": 247,
"ĵ": 248,
"Ķ": 249,
"ķ": 250,
"ĸ": 251,
"Ĺ": 252,
"ĺ": 253,
"Ļ": 254,
"ļ": 255,
"Ľ": 256,
"ľ": 257,
"Ŀ": 258,
"ŀ": 259,
"Ł": 260,
"ł": 261,
"Ń": 262,
"es": 263,
"en": 264,
"th": 265,
"the": 266,
"at": 267,
"or": 268,
"de": 269,
"ates": 270,
"ut": 271,
"co": 272,
"ra": 273,
"di": 274,
"Ġp": 275,
"Ġt": 276,
"ns": 277,
"ent": 278,
"st": 279,
"Ġpr": 280,
"Ġa": 281,
"io": 282,
"ts": 283,
"Ġde": 284,
"Ġdeco": 285,
"li": 286,
"wor": 287,
"work": 288,
"Ġf": 289,
"ne": 290,
"Ġpro": 291,
"Ġg": 292,
"des": 293,
"Ġe": 294,
"er": 295,
"put": 296,
"al": 297,
"der": 298,
"ces": 299,
"for": 300,
"rans": 301,
"Ġtrans": 302,
"form": 303,
"Ġtransform": 304,
"Ġs": 305,
"Ġo": 306,
"zes": 307,
"ions": 308,
"Ġen": 309,
"Ġenco": 310,
"re": 311,
"Ġdecodes": 312,
"ab": 313,
"but": 314,
"bab": 315,
"ili": 316,
"ibut": 317,
"ribut": 318,
"ty": 319,
"Ġdi": 320,
"stribut": 321,
"Ġprobab": 322,
"ility": 323,
"Ġdistribut": 324,
"Ġprobability": 325,
"Ġdistributions": 326,
"dd": 327,
"hi": 328,
"Ġst": 329,
"Ġhi": 330,
"dden": 331,
"Ġstates": 332,
"Ġhidden": 333,
"ener": 334,
"Ġgener": 335,
"Ġgenerates": 336,
"ework": 337,
"mework": 338,
"ramework": 339,
"Ġframework": 340,
"as": 341,
"cl": 342,
"fi": 343,
"ifi": 344,
"sifi": 345,
"Ġcl": 346,
"assifi": 347,
"Ġclassifi": 348,
"Ġclassifies": 349,
"be": 350,
"ddi": 351,
"gs": 352,
"mbe": 353,
"ngs": 354,
"Ġembe": 355,
"ddings": 356,
"Ġembeddings": 357,
"twork": 358,
"Ġne": 359,
"Ġnetwork": 360,
"eli": 361,
"ip": 362,
"Ġpip": 363,
"eline": 364,
"Ġpipeline": 365,
"Ġdecoder": 366,
"ct": 367,
"eat": 368,
"ect": 369,
"ure": 370,
"vect": 371,
"Ġvect": 372,
"ors": 373,
"Ġfeat": 374,
"Ġvectors": 375,
"Ġfeature": 376,
"an": 377,
"gent": 378,
"Ġagent": 379,
"mo": 380,
"Ġmo": 381,
"del": 382,
"Ġmodel": 383,
"in": 384,
"ken": 385,
"oken": 386,
"Ġin": 387,
"Ġtoken": 388,
"Ġinput": 389,
"Ġtokens": 390,
"gi": 391,
"lo": 392,
"Ġlo": 393,
"utput": 394,
"Ġoutput": 395,
"gits": 396,
"Ġlogits": 397,
"uates": 398,
"val": 399,
"Ġeval": 400,
"Ġtransforms": 401,
"Ġevaluates": 402,
"ses": 403,
"Ġproces": 404,
"Ġprocesses": 405,
"dates": 406,
"pdates": 407,
"updates": 408,
"Ġupdates": 409,
"radi": 410,
"Ġgradi": 411,
"Ġgradient": 412,
"nal": 413,
"yzes": 414,
"Ġanal": 415,
"Ġanalyzes": 416,
"eq": 417,
"ex": 418,
"uen": 419,
"Ġtex": 420,
"Ġseq": 421,
"uences": 422,
"Ġtext": 423,
"Ġsequences": 424,
"em": 425,
"yst": 426,
"Ġsyst": 427,
"Ġsystem": 428,
"Ġtransformer": 429,
"Ġencodes": 430,
"im": 431,
"izes": 432,
"pt": 433,
"Ġopt": 434,
"imizes": 435,
"Ġoptimizes": 436,
"Ġencoder": 437,
"ei": 438,
"gh": 439,
"tent": 440,
"wei": 441,
"Ġat": 442,
"Ġwei": 443,
"ion": 444,
"ghts": 445,
"tention": 446,
"Ġattention": 447,
"Ġweights": 448,
"gor": 449,
"ith": 450,
"lgor": 451,
"Ġalgor": 452,
"ithm": 453,
"Ġalgorithm": 454,
"cts": 455,
"edi": 456,
"Ġpredi": 457,
"Ġpredicts": 458,
"pr": 459,
"Ġre": 460,
"esent": 461,
"ations": 462,
"present": 463,
"Ġrepresent": 464,
"Ġrepresentations": 465
}