gpt2-124m-ablation / config.json
bitlabsdb's picture
Upload best model (Val Loss: 4.485)
1a20488 verified
raw
history blame contribute delete
538 Bytes
{
"vocab_size": 50257,
"context_length": 256,
"emb_dim": 768,
"n_heads": 12,
"n_layers": 12,
"drop_rate": 0.1,
"qkv_bias": false,
"max_length": 256,
"output_dimension": 768,
"batch_size": 8,
"architectures": [
"GPTModel"
],
"model_type": "gpt2",
"n_positions": 256,
"n_embd": 768,
"n_layer": 12,
"n_head": 12,
"activation_function": "gelu_new",
"resid_pdrop": 0.1,
"attn_pdrop": 0.1,
"layer_norm_epsilon": 1e-05,
"initializer_range": 0.02,
"bos_token_id": 50256,
"eos_token_id": 50256
}