RIEP-25 / config.json
PakNin's picture
Upload folder using huggingface_hub
931b0c7 verified
Raw
History Blame Contribute Delete
7.3 kB
{
"architectures": [
"PhimoeForCausalLM"
],
"attention_bias": true,
"attention_dropout": 0.0,
"auto_map": {
"AutoConfig": "configuration_slimmoe.PhiMoEConfig",
"AutoModelForCausalLM": "modeling_slimmoe.PhiMoEForCausalLM"
},
"bos_token_id": 1,
"dtype": "bfloat16",
"eos_token_id": 32000,
"expert_dropout": 0.0,
"head_dim": 128,
"hidden_act": "silu",
"hidden_dropout": 0.0,
"hidden_size": 4096,
"initializer_range": 0.02,
"input_jitter_noise": 0.01,
"intermediate_size": 960,
"is_pruned": true,
"lm_head_bias": true,
"max_position_embeddings": 4096,
"model_type": "phimoe",
"num_attention_heads": 32,
"num_experts_per_tok": 2,
"num_hidden_layers": 32,
"num_key_value_heads": 8,
"num_local_experts": 32,
"old_to_new_mapping": {
"0": {
"1": 0,
"10": 7,
"11": 8,
"12": 9,
"14": 10,
"3": 1,
"4": 2,
"5": 3,
"6": 4,
"7": 5,
"8": 6
},
"1": {
"0": 0,
"1": 1,
"10": 10,
"11": 11,
"12": 12,
"13": 13,
"14": 14,
"15": 15,
"2": 2,
"3": 3,
"4": 4,
"5": 5,
"6": 6,
"7": 7,
"8": 8,
"9": 9
},
"10": {
"0": 0,
"1": 1,
"10": 8,
"11": 9,
"13": 10,
"15": 11,
"2": 2,
"3": 3,
"4": 4,
"6": 5,
"7": 6,
"8": 7
},
"11": {
"1": 0,
"11": 5,
"12": 6,
"13": 7,
"14": 8,
"15": 9,
"2": 1,
"5": 2,
"6": 3,
"8": 4
},
"12": {
"0": 0,
"1": 1,
"10": 5,
"11": 6,
"12": 7,
"14": 8,
"15": 9,
"2": 2,
"4": 3,
"5": 4
},
"13": {
"0": 0,
"1": 1,
"11": 7,
"12": 8,
"13": 9,
"14": 10,
"15": 11,
"2": 2,
"3": 3,
"5": 4,
"6": 5,
"9": 6
},
"14": {
"0": 0,
"10": 7,
"11": 8,
"12": 9,
"13": 10,
"14": 11,
"15": 12,
"2": 1,
"3": 2,
"4": 3,
"6": 4,
"8": 5,
"9": 6
},
"15": {
"0": 0,
"10": 7,
"11": 8,
"12": 9,
"13": 10,
"14": 11,
"3": 1,
"4": 2,
"5": 3,
"6": 4,
"8": 5,
"9": 6
},
"16": {
"0": 0,
"1": 1,
"10": 7,
"11": 8,
"12": 9,
"14": 10,
"2": 2,
"3": 3,
"4": 4,
"5": 5,
"8": 6
},
"17": {
"1": 0,
"10": 8,
"14": 9,
"15": 10,
"2": 1,
"3": 2,
"4": 3,
"5": 4,
"6": 5,
"7": 6,
"9": 7
},
"18": {
"0": 0,
"1": 1,
"10": 9,
"12": 10,
"13": 11,
"14": 12,
"15": 13,
"2": 2,
"3": 3,
"4": 4,
"5": 5,
"7": 6,
"8": 7,
"9": 8
},
"19": {
"0": 0,
"1": 1,
"10": 7,
"11": 8,
"13": 9,
"14": 10,
"4": 2,
"5": 3,
"6": 4,
"7": 5,
"9": 6
},
"2": {
"1": 0,
"10": 9,
"11": 10,
"12": 11,
"13": 12,
"14": 13,
"15": 14,
"2": 1,
"3": 2,
"4": 3,
"5": 4,
"6": 5,
"7": 6,
"8": 7,
"9": 8
},
"20": {
"1": 0,
"10": 8,
"11": 9,
"12": 10,
"13": 11,
"14": 12,
"15": 13,
"2": 1,
"3": 2,
"4": 3,
"5": 4,
"6": 5,
"8": 6,
"9": 7
},
"21": {
"0": 0,
"1": 1,
"12": 6,
"14": 7,
"15": 8,
"4": 2,
"5": 3,
"6": 4,
"9": 5
},
"22": {
"0": 0,
"12": 6,
"13": 7,
"14": 8,
"15": 9,
"3": 1,
"4": 2,
"6": 3,
"8": 4,
"9": 5
},
"23": {
"0": 0,
"1": 1,
"12": 8,
"13": 9,
"14": 10,
"15": 11,
"2": 2,
"3": 3,
"5": 4,
"6": 5,
"7": 6,
"9": 7
},
"24": {
"0": 0,
"1": 1,
"11": 8,
"13": 9,
"14": 10,
"15": 11,
"3": 2,
"5": 3,
"6": 4,
"7": 5,
"8": 6,
"9": 7
},
"25": {
"0": 0,
"10": 7,
"11": 8,
"12": 9,
"13": 10,
"15": 11,
"4": 1,
"5": 2,
"6": 3,
"7": 4,
"8": 5,
"9": 6
},
"26": {
"0": 0,
"10": 8,
"11": 9,
"12": 10,
"13": 11,
"14": 12,
"15": 13,
"2": 1,
"3": 2,
"4": 3,
"5": 4,
"6": 5,
"8": 6,
"9": 7
},
"27": {
"0": 0,
"10": 7,
"11": 8,
"12": 9,
"15": 10,
"2": 1,
"3": 2,
"4": 3,
"5": 4,
"8": 5,
"9": 6
},
"28": {
"1": 0,
"10": 5,
"11": 6,
"12": 7,
"13": 8,
"14": 9,
"3": 1,
"5": 2,
"7": 3,
"9": 4
},
"29": {
"10": 6,
"12": 7,
"14": 8,
"3": 0,
"4": 1,
"6": 2,
"7": 3,
"8": 4,
"9": 5
},
"3": {
"1": 0,
"10": 8,
"11": 9,
"12": 10,
"13": 11,
"14": 12,
"15": 13,
"2": 1,
"3": 2,
"5": 3,
"6": 4,
"7": 5,
"8": 6,
"9": 7
},
"30": {
"0": 0,
"1": 1,
"10": 10,
"11": 11,
"12": 12,
"13": 13,
"15": 14,
"2": 2,
"3": 3,
"4": 4,
"5": 5,
"6": 6,
"7": 7,
"8": 8,
"9": 9
},
"31": {
"0": 0,
"1": 1,
"10": 8,
"11": 9,
"12": 10,
"13": 11,
"14": 12,
"15": 13,
"2": 2,
"4": 3,
"6": 4,
"7": 5,
"8": 6,
"9": 7
},
"4": {
"1": 0,
"10": 6,
"11": 7,
"13": 8,
"14": 9,
"15": 10,
"3": 1,
"6": 2,
"7": 3,
"8": 4,
"9": 5
},
"5": {
"10": 4,
"12": 5,
"13": 6,
"15": 7,
"4": 0,
"5": 1,
"7": 2,
"9": 3
},
"6": {
"0": 0,
"1": 1,
"10": 7,
"12": 8,
"13": 9,
"14": 10,
"15": 11,
"4": 2,
"5": 3,
"6": 4,
"7": 5,
"9": 6
},
"7": {
"0": 0,
"10": 8,
"11": 9,
"12": 10,
"13": 11,
"14": 12,
"15": 13,
"3": 1,
"4": 2,
"5": 3,
"6": 4,
"7": 5,
"8": 6,
"9": 7
},
"8": {
"0": 0,
"1": 1,
"10": 9,
"12": 10,
"13": 11,
"14": 12,
"2": 2,
"3": 3,
"4": 4,
"5": 5,
"6": 6,
"7": 7,
"8": 8
},
"9": {
"0": 0,
"1": 1,
"10": 9,
"13": 10,
"14": 11,
"15": 12,
"2": 2,
"3": 3,
"4": 4,
"5": 5,
"6": 6,
"7": 7,
"9": 8
}
},
"output_router_logits": false,
"pruned": true,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 10000.0,
"router_aux_loss_coef": 0.0,
"router_jitter_noise": 0.01,
"sliding_window": 2047,
"tie_word_embeddings": false,
"transformers_version": "4.57.3",
"use_cache": true,
"vocab_size": 32064
}