File size: 662 Bytes
ec927b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
{
  "vocab_size": 114,
  "hidden_size": 448,
  "num_hidden_layers": 16,
  "num_attention_heads": 7,
  "num_key_value_heads": 7,
  "intermediate_size": 1344,
  "max_position_embeddings": 8192,
  "rms_norm_eps": "1e-6",
  "initializer_range": 0.02,
  "use_cache": true,
  "num_experts": 8,
  "num_experts_per_tok": 2,
  "expert_capacity_factor": 1.5,
  "router_aux_loss_coef": 0.01,
  "moe_implementation": "megablocks",
  "moe_world_size": 4,
  "resid_dropout": 0.0,
  "hidden_dropout": 0.0,
  "dropout_warmup_steps": 0,
  "dropout_ramp_steps": 0,
  "dropout_schedule": "linear",
  "attention_dropout": 0.0,
  "gradient_clip_norm": 0.0,
  "label_smoothing": 0.0
}