RIEP-75 / config.json
PakNin's picture
Upload folder using huggingface_hub
473dee9 verified
Raw
History Blame Contribute Delete
3.54 kB
{
"architectures": [
"PhimoeForCausalLM"
],
"attention_bias": true,
"attention_dropout": 0.0,
"auto_map": {
"AutoConfig": "configuration_slimmoe.PhiMoEConfig",
"AutoModelForCausalLM": "modeling_slimmoe.PhiMoEForCausalLM"
},
"bos_token_id": 1,
"dtype": "bfloat16",
"eos_token_id": 32000,
"expert_dropout": 0.0,
"head_dim": 128,
"hidden_act": "silu",
"hidden_dropout": 0.0,
"hidden_size": 4096,
"initializer_range": 0.02,
"input_jitter_noise": 0.01,
"intermediate_size": 960,
"is_pruned": true,
"lm_head_bias": true,
"max_position_embeddings": 4096,
"model_type": "phimoe",
"num_attention_heads": 32,
"num_experts_per_tok": 2,
"num_hidden_layers": 32,
"num_key_value_heads": 8,
"num_local_experts": 48,
"old_to_new_mapping": {
"0": {
"4": 0
},
"1": {
"14": 3,
"3": 0,
"4": 1,
"5": 2
},
"10": {
"1": 0,
"2": 1,
"6": 2
},
"11": {
"0": 0,
"14": 2,
"6": 1
},
"12": {
"10": 1,
"12": 2,
"6": 0
},
"13": {
"11": 2,
"13": 3,
"14": 4,
"2": 0,
"3": 1
},
"14": {
"2": 0,
"3": 1
},
"15": {
"1": 0,
"10": 2,
"12": 3,
"14": 4,
"4": 1
},
"16": {
"1": 0,
"12": 3,
"3": 1,
"5": 2
},
"17": {
"4": 0,
"7": 1
},
"18": {
"10": 2,
"14": 3,
"4": 0,
"5": 1
},
"19": {
"10": 3,
"14": 4,
"4": 0,
"5": 1,
"7": 2
},
"2": {
"1": 0,
"12": 5,
"14": 6,
"3": 1,
"5": 2,
"6": 3,
"8": 4
},
"20": {
"1": 0,
"12": 5,
"13": 6,
"14": 7,
"2": 1,
"5": 2,
"6": 3,
"7": 4
},
"21": {
"14": 1,
"15": 2,
"4": 0
},
"22": {
"14": 1,
"15": 2,
"3": 0
},
"23": {
"12": 3,
"14": 4,
"3": 0,
"5": 1,
"9": 2
},
"24": {
"11": 2,
"5": 0,
"9": 1
},
"25": {
"0": 0,
"10": 2,
"11": 3,
"9": 1
},
"26": {
"12": 2,
"2": 0,
"8": 1
},
"27": {
"10": 3,
"12": 4,
"13": 5,
"15": 6,
"4": 0,
"5": 1,
"9": 2
},
"28": {
"10": 2,
"11": 3,
"12": 4,
"13": 5,
"4": 0,
"7": 1
},
"29": {
"1": 0,
"4": 1,
"6": 2
},
"3": {
"15": 1,
"6": 0
},
"30": {
"1": 0,
"10": 3,
"11": 4,
"12": 5,
"15": 6,
"3": 1,
"7": 2
},
"31": {
"10": 1,
"7": 0
},
"4": {
"10": 3,
"13": 4,
"4": 0,
"7": 1,
"9": 2
},
"5": {},
"6": {
"0": 0,
"1": 1,
"10": 5,
"13": 6,
"6": 2,
"7": 3,
"9": 4
},
"7": {
"1": 0,
"10": 2,
"13": 3,
"7": 1
},
"8": {
"10": 3,
"12": 4,
"14": 5,
"3": 0,
"5": 1,
"6": 2
},
"9": {
"0": 0,
"11": 2,
"4": 1
}
},
"output_router_logits": false,
"pruned": true,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 10000.0,
"router_aux_loss_coef": 0.0,
"router_jitter_noise": 0.01,
"sliding_window": 2047,
"tie_word_embeddings": false,
"transformers_version": "4.57.3",
"use_cache": true,
"vocab_size": 32064
}