{ "architectures": [ "PhimoeForCausalLM" ], "attention_bias": true, "attention_dropout": 0.0, "auto_map": { "AutoConfig": "configuration_slimmoe.PhiMoEConfig", "AutoModelForCausalLM": "modeling_slimmoe.PhiMoEForCausalLM" }, "bos_token_id": 1, "dtype": "bfloat16", "eos_token_id": 32000, "expert_dropout": 0.0, "head_dim": 128, "hidden_act": "silu", "hidden_dropout": 0.0, "hidden_size": 4096, "initializer_range": 0.02, "input_jitter_noise": 0.01, "intermediate_size": 960, "is_pruned": true, "lm_head_bias": true, "max_position_embeddings": 4096, "model_type": "phimoe", "num_attention_heads": 32, "num_experts_per_tok": 2, "num_hidden_layers": 32, "num_key_value_heads": 8, "num_local_experts": 48, "old_to_new_mapping": { "0": { "12": 3, "14": 4, "4": 0, "5": 1, "8": 2 }, "1": { "1": 0, "10": 6, "12": 7, "14": 8, "3": 1, "4": 2, "5": 3, "7": 4, "8": 5 }, "10": { "1": 0, "13": 6, "15": 7, "2": 1, "4": 2, "6": 3, "7": 4, "8": 5 }, "11": { "0": 0, "11": 3, "12": 4, "14": 5, "15": 6, "6": 1, "8": 2 }, "12": { "0": 0, "10": 3, "11": 4, "12": 5, "14": 6, "15": 7, "5": 1, "6": 2 }, "13": { "11": 2, "13": 3, "14": 4, "2": 0, "3": 1 }, "14": { "11": 2, "12": 3, "13": 4, "14": 5, "2": 0, "3": 1 }, "15": { "0": 0, "1": 1, "10": 5, "12": 6, "14": 7, "3": 2, "4": 3, "7": 4 }, "16": { "1": 0, "11": 5, "12": 6, "14": 7, "2": 1, "3": 2, "5": 3, "7": 4 }, "17": { "1": 0, "12": 7, "14": 8, "2": 1, "3": 2, "4": 3, "5": 4, "6": 5, "7": 6 }, "18": { "10": 6, "12": 7, "14": 8, "3": 0, "4": 1, "5": 2, "7": 3, "8": 4, "9": 5 }, "19": { "0": 0, "10": 6, "14": 7, "3": 1, "4": 2, "5": 3, "6": 4, "7": 5 }, "2": { "1": 0, "10": 8, "11": 9, "12": 10, "13": 11, "14": 12, "15": 13, "2": 1, "3": 2, "4": 3, "5": 4, "6": 5, "7": 6, "8": 7 }, "20": { "1": 0, "12": 5, "13": 6, "14": 7, "15": 8, "2": 1, "5": 2, "6": 3, "7": 4 }, "21": { "14": 3, "15": 4, "4": 0, "6": 1, "9": 2 }, "22": { "0": 0, "1": 1, "14": 7, "15": 8, "3": 2, "4": 3, "6": 4, "8": 5, "9": 6 }, "23": { "0": 0, "10": 5, "12": 6, "14": 7, "2": 1, "3": 2, "5": 3, "9": 4 }, "24": { "1": 0, "11": 5, "14": 6, "3": 1, "5": 2, "7": 3, "9": 4 }, "25": { "0": 0, "10": 5, "11": 6, "12": 7, "15": 8, "2": 1, "4": 2, "8": 3, "9": 4 }, "26": { "11": 4, "12": 5, "15": 6, "2": 0, "3": 1, "4": 2, "8": 3 }, "27": { "0": 0, "10": 5, "11": 6, "12": 7, "13": 8, "15": 9, "2": 1, "4": 2, "5": 3, "9": 4 }, "28": { "1": 0, "10": 5, "11": 6, "12": 7, "13": 8, "4": 1, "6": 2, "7": 3, "9": 4 }, "29": { "1": 0, "10": 6, "11": 7, "4": 1, "6": 2, "7": 3, "8": 4, "9": 5 }, "3": { "1": 0, "10": 6, "11": 7, "13": 8, "14": 9, "15": 10, "4": 1, "6": 2, "7": 3, "8": 4, "9": 5 }, "30": { "1": 0, "10": 4, "11": 5, "12": 6, "15": 7, "3": 1, "7": 2, "9": 3 }, "31": { "1": 0, "10": 6, "12": 7, "15": 8, "2": 1, "4": 2, "7": 3, "8": 4, "9": 5 }, "4": { "10": 6, "13": 7, "15": 8, "4": 0, "5": 1, "6": 2, "7": 3, "8": 4, "9": 5 }, "5": { "12": 3, "13": 4, "15": 5, "4": 0, "7": 1, "9": 2 }, "6": { "0": 0, "1": 1, "10": 6, "13": 7, "15": 8, "5": 2, "6": 3, "7": 4, "9": 5 }, "7": { "1": 0, "10": 3, "12": 4, "13": 5, "3": 1, "7": 2 }, "8": { "1": 0, "10": 7, "12": 8, "14": 9, "3": 1, "4": 2, "5": 3, "6": 4, "7": 5, "8": 6 }, "9": { "0": 0, "11": 3, "4": 1, "6": 2 } }, "output_router_logits": false, "pruned": true, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "router_aux_loss_coef": 0.0, "router_jitter_noise": 0.01, "sliding_window": 2047, "tie_word_embeddings": false, "transformers_version": "4.57.3", "use_cache": true, "vocab_size": 32064 }