unicosys-hypergraph / tokenizer.json
drzo's picture
feat: initial unicosys hypergraph knowledge model (34.7M params, 203K nodes, 15K edges)
539df93 verified
raw
history blame
2.06 kB
{
"vocab_size": 32000,
"max_length": 128,
"char_to_id": {
"p": 4,
"e": 5,
"t": 6,
"r": 7,
" ": 8,
"a": 9,
"n": 10,
"d": 11,
"w": 12,
"f": 13,
"u": 14,
"c": 15,
"i": 16,
"j": 17,
"q": 18,
"l": 19,
"y": 20,
"o": 21,
"b": 22,
"s": 23,
"k": 24,
"v": 25,
"m": 26,
"h": 27,
"g": 28,
"(": 29,
")": 30,
"\u00e9": 31,
"z": 32,
":": 33,
"4": 34,
"8": 35,
"3": 36,
"1": 37,
"0": 38,
"7": 39,
"6": 40,
"9": 41,
"2": 42,
"5": 43,
"x": 44,
",": 45,
"-": 46,
"/": 47,
"&": 48,
".": 49,
"+": 50,
"%": 51,
"#": 52,
"'": 53,
"@": 54,
"_": 55,
"*": 56,
"|": 57,
"[": 58,
"]": 59,
"!": 60,
"\u2122": 61,
"=": 62,
"\u00f1": 63,
"\u263a": 64,
"\u00a3": 65,
"\u2013": 66,
"\ud83d\udd17": 67,
"\ud83d\udc9c": 68,
"\u00a0": 69,
"\u26f3": 70,
"\u26a1": 71,
"\u23f0": 72,
"?": 73,
"\u2019": 74,
"\u2018": 75,
"\ud83c\udfe0": 76,
"\u2014": 77,
"\ud83c\udfe1": 78,
"\u2728": 79,
"\u00ae": 80,
"\ud83c\udf38": 81,
"$": 82,
"\ud83d\udc8c": 83,
"\ud83d\udcb8": 84,
"\ud83d\udd52": 85,
"\ud83d\udfe2": 86,
"\ud83d\ude97": 87,
"\ud83e\udde0": 88,
"\ud83d\udc64": 89,
"\ud83c\udf89": 90,
"\ud83d\ude80": 91,
"\ud83c\udf0e": 92,
"\ud83d\udc40": 93,
"\ufe0f": 94,
"\u2011": 95,
"\ud83c\udf82": 96,
"\ud83d\ude08": 97,
"\ud83c\udfa4": 98,
";": 99,
"\ud83d\udcbc": 100,
"\ud83e\udd76": 101,
"\ud83d\udea8": 102,
"\ud83c\udf34": 103,
"\ud83d\udd2d": 104,
"\ud83d\ude0e": 105,
"\u2600": 106,
"\ud83d\udcda": 107,
"\ud83c\udf81": 108,
"\ud83e\udd16": 109,
"\ud83d\udc4b": 110,
"\u2022": 111,
"\u2763": 112,
"\ud83d\udd25": 113,
"\ud83c\udf40": 114,
"\u2764": 115,
"\u200d": 116,
"\"": 117,
"\ud83d\udcf1": 118
},
"next_id": 119
}