File size: 455 Bytes
83eb093
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
model:
  name: llama
  architecture: transformer
  vocab_size: 32000
  hidden_size: 4096
  num_attention_heads: 32
  num_hidden_layers: 32
  intermediate_size: 11008
  activation_function: swiglu
  max_position_embeddings: 2048
  initializer_range: 0.02
  layer_norm_eps: 1e-5
  pad_token_id: 0
  bos_token_id: 1
  eos_token_id: 2
  tie_word_embeddings: false
  rotary_embedding_base: 10000
  attention_dropout: 0.0
  hidden_dropout: 0.0
  use_cache: true