axon-test-model / config.json
apd-jmorinelli's picture
Upload folder using huggingface_hub
f9d6891 verified
{
"activation_type": "gelu",
"architectures": [
"AxonForCausalLM"
],
"attention_type": "standard",
"batch_size": 15,
"bias": true,
"block_size": 1024,
"block_type": "standard",
"dropout": 0.2,
"dropout_attention": 0.2,
"dropout_resid": 0.2,
"eval_interval": 25,
"eval_iters": 200,
"flash_attention": false,
"grad_clip": 1.0,
"gradient_accumulation_steps": 40,
"layer_norm_type": "default",
"log_interval": 10,
"max_iters": 600000,
"mlp_ratio": 4,
"model_type": "gpt2",
"n_embd": 256,
"n_head": 4,
"n_layer": 8,
"out_path": "models/axon-test/",
"rotary_embeddings": true,
"transformers_version": "4.41.2",
"use_cache": true,
"vocab_size": 50304
}