rootxhacker commited on
Commit
ccdf026
·
verified ·
1 Parent(s): de68200

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +39 -0
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "vocab_size": 50304,
3
+ "d_model": 768,
4
+ "n_layers": 16,
5
+ "n_dense_layers": 1,
6
+ "n_q_heads": 12,
7
+ "n_kv_heads": 3,
8
+ "head_dim": 128,
9
+ "qk_norm": true,
10
+ "rope_theta": 10000.0,
11
+ "attn_softcap": 0.0,
12
+ "dense_ffn": 2304,
13
+ "expert_ffn": 320,
14
+ "n_experts": 36,
15
+ "top_k": 6,
16
+ "n_shared": 1,
17
+ "gating": "sigmoid",
18
+ "norm_topk_prob": false,
19
+ "balancing": "aux_free",
20
+ "aux_loss_coef": 0.001,
21
+ "z_loss_coef": 0.001,
22
+ "bias_update_rate": 0.001,
23
+ "router_init_std": 0.02,
24
+ "tie_embeddings": true,
25
+ "scale_embeddings": false,
26
+ "final_z_loss_coef": 0.0001,
27
+ "logit_softcap": 0.0,
28
+ "n_mtp": 0,
29
+ "mtp_weight": 0.1,
30
+ "init_std": 0.02,
31
+ "expert_backend": "grouped",
32
+ "fused_ce": true,
33
+ "ce_chunk": 4096,
34
+ "fp8_head": false,
35
+ "fp8_x_scale": 1.0,
36
+ "fp8_w_scale": 1.0,
37
+ "fp8_grad_scale": 1.0,
38
+ "preset": "500M"
39
+ }