HiMoE / model /config.json
AGofficial's picture
Upload 114 files
0c7d1ce verified
{
"config": {
"block_size": 128,
"n_layer": 2,
"n_head": 4,
"n_embd": 256,
"dropout": 0.1,
"num_moes": 6,
"num_experts": 8,
"batch_size": 32,
"max_iters": 750,
"eval_interval": 50,
"eval_iters": 20,
"lr": 0.0003,
"data_file": "hamlet.txt",
"model_dir": "model"
},
"vocab_size": 67,
"step": 750,
"stoi": {
"\n": 0,
" ": 1,
"!": 2,
"\"": 3,
"&": 4,
"'": 5,
"(": 6,
")": 7,
",": 8,
"-": 9,
".": 10,
"1": 11,
":": 12,
";": 13,
"?": 14,
"A": 15,
"B": 16,
"C": 17,
"D": 18,
"E": 19,
"F": 20,
"G": 21,
"H": 22,
"I": 23,
"J": 24,
"K": 25,
"L": 26,
"M": 27,
"N": 28,
"O": 29,
"P": 30,
"Q": 31,
"R": 32,
"S": 33,
"T": 34,
"U": 35,
"V": 36,
"W": 37,
"Y": 38,
"[": 39,
"]": 40,
"a": 41,
"b": 42,
"c": 43,
"d": 44,
"e": 45,
"f": 46,
"g": 47,
"h": 48,
"i": 49,
"j": 50,
"k": 51,
"l": 52,
"m": 53,
"n": 54,
"o": 55,
"p": 56,
"q": 57,
"r": 58,
"s": 59,
"t": 60,
"u": 61,
"v": 62,
"w": 63,
"x": 64,
"y": 65,
"z": 66
},
"itos": {
"0": "\n",
"1": " ",
"2": "!",
"3": "\"",
"4": "&",
"5": "'",
"6": "(",
"7": ")",
"8": ",",
"9": "-",
"10": ".",
"11": "1",
"12": ":",
"13": ";",
"14": "?",
"15": "A",
"16": "B",
"17": "C",
"18": "D",
"19": "E",
"20": "F",
"21": "G",
"22": "H",
"23": "I",
"24": "J",
"25": "K",
"26": "L",
"27": "M",
"28": "N",
"29": "O",
"30": "P",
"31": "Q",
"32": "R",
"33": "S",
"34": "T",
"35": "U",
"36": "V",
"37": "W",
"38": "Y",
"39": "[",
"40": "]",
"41": "a",
"42": "b",
"43": "c",
"44": "d",
"45": "e",
"46": "f",
"47": "g",
"48": "h",
"49": "i",
"50": "j",
"51": "k",
"52": "l",
"53": "m",
"54": "n",
"55": "o",
"56": "p",
"57": "q",
"58": "r",
"59": "s",
"60": "t",
"61": "u",
"62": "v",
"63": "w",
"64": "x",
"65": "y",
"66": "z"
}
}