{ "architectures": [ "PhimoeForCausalLM" ], "attention_bias": true, "attention_dropout": 0.0, "auto_map": { "AutoConfig": "configuration_slimmoe.PhiMoEConfig", "AutoModelForCausalLM": "modeling_slimmoe.PhiMoEForCausalLM" }, "bos_token_id": 1, "dtype": "bfloat16", "eos_token_id": 32000, "expert_dropout": 0.0, "head_dim": 128, "hidden_act": "silu", "hidden_dropout": 0.0, "hidden_size": 4096, "initializer_range": 0.02, "input_jitter_noise": 0.01, "intermediate_size": 960, "is_pruned": true, "lm_head_bias": true, "max_position_embeddings": 4096, "model_type": "phimoe", "num_attention_heads": 32, "num_experts_per_tok": 2, "num_hidden_layers": 32, "num_key_value_heads": 8, "num_local_experts": 48, "old_to_new_mapping": { "0": { "4": 0 }, "1": { "14": 3, "3": 0, "4": 1, "5": 2 }, "10": { "1": 0, "2": 1, "6": 2 }, "11": { "0": 0, "14": 2, "6": 1 }, "12": { "10": 1, "12": 2, "6": 0 }, "13": { "11": 2, "13": 3, "14": 4, "2": 0, "3": 1 }, "14": { "2": 0, "3": 1 }, "15": { "1": 0, "10": 2, "12": 3, "14": 4, "4": 1 }, "16": { "1": 0, "12": 3, "3": 1, "5": 2 }, "17": { "4": 0, "7": 1 }, "18": { "10": 2, "14": 3, "4": 0, "5": 1 }, "19": { "10": 3, "14": 4, "4": 0, "5": 1, "7": 2 }, "2": { "1": 0, "12": 5, "14": 6, "3": 1, "5": 2, "6": 3, "8": 4 }, "20": { "1": 0, "12": 5, "13": 6, "14": 7, "2": 1, "5": 2, "6": 3, "7": 4 }, "21": { "14": 1, "15": 2, "4": 0 }, "22": { "14": 1, "15": 2, "3": 0 }, "23": { "12": 3, "14": 4, "3": 0, "5": 1, "9": 2 }, "24": { "11": 2, "5": 0, "9": 1 }, "25": { "0": 0, "10": 2, "11": 3, "9": 1 }, "26": { "12": 2, "2": 0, "8": 1 }, "27": { "10": 3, "12": 4, "13": 5, "15": 6, "4": 0, "5": 1, "9": 2 }, "28": { "10": 2, "11": 3, "12": 4, "13": 5, "4": 0, "7": 1 }, "29": { "1": 0, "4": 1, "6": 2 }, "3": { "15": 1, "6": 0 }, "30": { "1": 0, "10": 3, "11": 4, "12": 5, "15": 6, "3": 1, "7": 2 }, "31": { "10": 1, "7": 0 }, "4": { "10": 3, "13": 4, "4": 0, "7": 1, "9": 2 }, "5": {}, "6": { "0": 0, "1": 1, "10": 5, "13": 6, "6": 2, "7": 3, "9": 4 }, "7": { "1": 0, "10": 2, "13": 3, "7": 1 }, "8": { "10": 3, "12": 4, "14": 5, "3": 0, "5": 1, "6": 2 }, "9": { "0": 0, "11": 2, "4": 1 } }, "output_router_logits": false, "pruned": true, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "router_aux_loss_coef": 0.0, "router_jitter_noise": 0.01, "sliding_window": 2047, "tie_word_embeddings": false, "transformers_version": "4.57.3", "use_cache": true, "vocab_size": 32064 }