File size: 664 Bytes
7b65fce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
{
  "attention_dropout": 0.0,
  "dropout_ramp_steps": 0,
  "dropout_schedule": "linear",
  "dropout_warmup_steps": 0,
  "expert_capacity_factor": 1.5,
  "gradient_clip_norm": 0.0,
  "hidden_dropout": 0.0,
  "hidden_size": 672,
  "initializer_range": 0.02,
  "intermediate_size": 2016,
  "label_smoothing": 0.0,
  "max_position_embeddings": 8192,
  "moe_implementation": "megablocks",
  "moe_world_size": 4,
  "num_attention_heads": 12,
  "num_experts": 8,
  "num_experts_per_tok": 2,
  "num_hidden_layers": 20,
  "num_key_value_heads": 12,
  "resid_dropout": 0.0,
  "rms_norm_eps": "1e-6",
  "router_aux_loss_coef": 0.01,
  "use_cache": true,
  "vocab_size": 114
}