np-cr commited on
Commit
3472bd7
·
verified ·
1 Parent(s): 1ec742b

fix: Align num_local_experts=128 to match actual weights

Browse files
Files changed (1) hide show
  1. config.json +3 -3
config.json CHANGED
@@ -20,14 +20,14 @@
20
  "model_type": "glm4_moe",
21
  "moe_intermediate_size": 1408,
22
  "n_group": 1,
23
- "n_routed_experts": 4,
24
  "n_shared_experts": 1,
25
  "norm_topk_prob": true,
26
  "num_attention_heads": 4,
27
- "num_experts_per_tok": 4,
28
  "num_hidden_layers": 2,
29
  "num_key_value_heads": 2,
30
- "num_local_experts": 4,
31
  "num_nextn_predict_layers": 1,
32
  "pad_token_id": 151329,
33
  "partial_rotary_factor": 0.5,
 
20
  "model_type": "glm4_moe",
21
  "moe_intermediate_size": 1408,
22
  "n_group": 1,
23
+ "n_routed_experts": 128,
24
  "n_shared_experts": 1,
25
  "norm_topk_prob": true,
26
  "num_attention_heads": 4,
27
+ "num_experts_per_tok": 8,
28
  "num_hidden_layers": 2,
29
  "num_key_value_heads": 2,
30
+ "num_local_experts": 128,
31
  "num_nextn_predict_layers": 1,
32
  "pad_token_id": 151329,
33
  "partial_rotary_factor": 0.5,