Ralph-1 / config.json
bitzic's picture
Add Ralph-1 baseline (253,872,128 params, 262M tokens, final val loss 3.8163)
db521cf verified
Raw
History Blame Contribute Delete
519 Bytes
{
"vocab_size": 50257,
"dim": 1024,
"n_layers": 16,
"n_heads": 16,
"head_dim": 64,
"ffn_mult": 2.6875,
"max_seq_len": 1024,
"seq_len": 1024,
"batch_size": 128,
"micro_batch_size": 32,
"total_steps": 2000,
"warmup_steps": 200,
"max_lr": 0.0003,
"min_lr": 3e-05,
"weight_decay": 0.1,
"beta1": 0.9,
"beta2": 0.95,
"grad_clip": 1.0,
"manifest_path": "data/data_manifest.json",
"data_base_dir": "data",
"data_seed": 1337,
"init_seed": 1337,
"use_bf16": true,
"log_every": 50
}