File size: 569 Bytes
ea08ed0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
{
  "vocab_size": 32000,
  "hidden_size": 512,
  "num_layers": 8,
  "num_attention_heads": 8,
  "num_key_value_heads": 2,
  "intermediate_size": 1365,
  "max_position_embeddings": 2048,
  "rms_norm_eps": 1e-06,
  "rope_theta": 10000.0,
  "learning_rate": 0.0005,
  "weight_decay": 0.1,
  "beta1": 0.9,
  "beta2": 0.95,
  "gradient_clip_val": 1.0,
  "warmup_steps": 1000,
  "max_steps": 50000,
  "batch_size": 2,
  "gradient_accumulation_steps": 16,
  "eval_interval": 500,
  "save_interval": 2500,
  "max_length": 512,
  "dataloader_workers": 0
}