llama-124m / params.json
alexgara's picture
Upload params.json with huggingface_hub
65051aa verified
{
"model": {
"vocab_size": 32000,
"d_model": 768,
"n_heads": 12,
"n_layers": 12,
"d_ff": 2560,
"max_seq_len": 1024,
"dropout": 0.0,
"activation": "swiglu",
"norm": "rmsnorm",
"norm_first": true,
"bias": false,
"pos_encoding": "learned",
"weight_tying": true
},
"training": {
"batch_size": 128,
"learning_rate": 0.0006,
"max_epochs": 1,
"grad_clip": 1.0,
"pad_id": 0,
"log_every": 100,
"attention_log_every": 2000,
"device": "cuda",
"checkpoint_dir": "checkpoints/llama_124m",
"tensorboard_dir": "runs/llama_124m",
"gradient_accumulation_steps": 4,
"precision": "bfloat16"
},
"tokenizer_path": "data/llama_124m/tok_32k.model",
"total_parameters": 124472064,
"train_tokens": 9651061760,
"warmup_steps": 750,
"seed": 42
}