RIEP-50 / config.json
PakNin's picture
Upload folder using huggingface_hub
b8b877f verified
Raw
History Blame Contribute Delete
5.39 kB
{
"architectures": [
"PhimoeForCausalLM"
],
"attention_bias": true,
"attention_dropout": 0.0,
"auto_map": {
"AutoConfig": "configuration_slimmoe.PhiMoEConfig",
"AutoModelForCausalLM": "modeling_slimmoe.PhiMoEForCausalLM"
},
"bos_token_id": 1,
"dtype": "bfloat16",
"eos_token_id": 32000,
"expert_dropout": 0.0,
"head_dim": 128,
"hidden_act": "silu",
"hidden_dropout": 0.0,
"hidden_size": 4096,
"initializer_range": 0.02,
"input_jitter_noise": 0.01,
"intermediate_size": 960,
"is_pruned": true,
"lm_head_bias": true,
"max_position_embeddings": 4096,
"model_type": "phimoe",
"num_attention_heads": 32,
"num_experts_per_tok": 2,
"num_hidden_layers": 32,
"num_key_value_heads": 8,
"num_local_experts": 48,
"old_to_new_mapping": {
"0": {
"12": 3,
"14": 4,
"4": 0,
"5": 1,
"8": 2
},
"1": {
"1": 0,
"10": 6,
"12": 7,
"14": 8,
"3": 1,
"4": 2,
"5": 3,
"7": 4,
"8": 5
},
"10": {
"1": 0,
"13": 6,
"15": 7,
"2": 1,
"4": 2,
"6": 3,
"7": 4,
"8": 5
},
"11": {
"0": 0,
"11": 3,
"12": 4,
"14": 5,
"15": 6,
"6": 1,
"8": 2
},
"12": {
"0": 0,
"10": 3,
"11": 4,
"12": 5,
"14": 6,
"15": 7,
"5": 1,
"6": 2
},
"13": {
"11": 2,
"13": 3,
"14": 4,
"2": 0,
"3": 1
},
"14": {
"11": 2,
"12": 3,
"13": 4,
"14": 5,
"2": 0,
"3": 1
},
"15": {
"0": 0,
"1": 1,
"10": 5,
"12": 6,
"14": 7,
"3": 2,
"4": 3,
"7": 4
},
"16": {
"1": 0,
"11": 5,
"12": 6,
"14": 7,
"2": 1,
"3": 2,
"5": 3,
"7": 4
},
"17": {
"1": 0,
"12": 7,
"14": 8,
"2": 1,
"3": 2,
"4": 3,
"5": 4,
"6": 5,
"7": 6
},
"18": {
"10": 6,
"12": 7,
"14": 8,
"3": 0,
"4": 1,
"5": 2,
"7": 3,
"8": 4,
"9": 5
},
"19": {
"0": 0,
"10": 6,
"14": 7,
"3": 1,
"4": 2,
"5": 3,
"6": 4,
"7": 5
},
"2": {
"1": 0,
"10": 8,
"11": 9,
"12": 10,
"13": 11,
"14": 12,
"15": 13,
"2": 1,
"3": 2,
"4": 3,
"5": 4,
"6": 5,
"7": 6,
"8": 7
},
"20": {
"1": 0,
"12": 5,
"13": 6,
"14": 7,
"15": 8,
"2": 1,
"5": 2,
"6": 3,
"7": 4
},
"21": {
"14": 3,
"15": 4,
"4": 0,
"6": 1,
"9": 2
},
"22": {
"0": 0,
"1": 1,
"14": 7,
"15": 8,
"3": 2,
"4": 3,
"6": 4,
"8": 5,
"9": 6
},
"23": {
"0": 0,
"10": 5,
"12": 6,
"14": 7,
"2": 1,
"3": 2,
"5": 3,
"9": 4
},
"24": {
"1": 0,
"11": 5,
"14": 6,
"3": 1,
"5": 2,
"7": 3,
"9": 4
},
"25": {
"0": 0,
"10": 5,
"11": 6,
"12": 7,
"15": 8,
"2": 1,
"4": 2,
"8": 3,
"9": 4
},
"26": {
"11": 4,
"12": 5,
"15": 6,
"2": 0,
"3": 1,
"4": 2,
"8": 3
},
"27": {
"0": 0,
"10": 5,
"11": 6,
"12": 7,
"13": 8,
"15": 9,
"2": 1,
"4": 2,
"5": 3,
"9": 4
},
"28": {
"1": 0,
"10": 5,
"11": 6,
"12": 7,
"13": 8,
"4": 1,
"6": 2,
"7": 3,
"9": 4
},
"29": {
"1": 0,
"10": 6,
"11": 7,
"4": 1,
"6": 2,
"7": 3,
"8": 4,
"9": 5
},
"3": {
"1": 0,
"10": 6,
"11": 7,
"13": 8,
"14": 9,
"15": 10,
"4": 1,
"6": 2,
"7": 3,
"8": 4,
"9": 5
},
"30": {
"1": 0,
"10": 4,
"11": 5,
"12": 6,
"15": 7,
"3": 1,
"7": 2,
"9": 3
},
"31": {
"1": 0,
"10": 6,
"12": 7,
"15": 8,
"2": 1,
"4": 2,
"7": 3,
"8": 4,
"9": 5
},
"4": {
"10": 6,
"13": 7,
"15": 8,
"4": 0,
"5": 1,
"6": 2,
"7": 3,
"8": 4,
"9": 5
},
"5": {
"12": 3,
"13": 4,
"15": 5,
"4": 0,
"7": 1,
"9": 2
},
"6": {
"0": 0,
"1": 1,
"10": 6,
"13": 7,
"15": 8,
"5": 2,
"6": 3,
"7": 4,
"9": 5
},
"7": {
"1": 0,
"10": 3,
"12": 4,
"13": 5,
"3": 1,
"7": 2
},
"8": {
"1": 0,
"10": 7,
"12": 8,
"14": 9,
"3": 1,
"4": 2,
"5": 3,
"6": 4,
"7": 5,
"8": 6
},
"9": {
"0": 0,
"11": 3,
"4": 1,
"6": 2
}
},
"output_router_logits": false,
"pruned": true,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 10000.0,
"router_aux_loss_coef": 0.0,
"router_jitter_noise": 0.01,
"sliding_window": 2047,
"tie_word_embeddings": false,
"transformers_version": "4.57.3",
"use_cache": true,
"vocab_size": 32064
}