File size: 455 Bytes
83eb093 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | model:
name: llama
architecture: transformer
vocab_size: 32000
hidden_size: 4096
num_attention_heads: 32
num_hidden_layers: 32
intermediate_size: 11008
activation_function: swiglu
max_position_embeddings: 2048
initializer_range: 0.02
layer_norm_eps: 1e-5
pad_token_id: 0
bos_token_id: 1
eos_token_id: 2
tie_word_embeddings: false
rotary_embedding_base: 10000
attention_dropout: 0.0
hidden_dropout: 0.0
use_cache: true |