moe-emergence / dense-baseline /final-model.json
sumitdotml's picture
add dense and moe checkpoints
3ff42e6
raw
history blame contribute delete
756 Bytes
{
"format_version": 1,
"step": 4999,
"preset": "dense",
"mode": "dense",
"config": {
"preset": "dense",
"mode": "dense",
"run_name": "dense-baseline",
"seed": 42,
"max_steps": 5000,
"batch_size": 2,
"grad_accum_steps": 4,
"effective_batch_size": 8,
"block_size": 512,
"learning_rate": 5e-05,
"weight_decay": 0.01,
"warmup_fraction": 0.1,
"max_grad_norm": 1.0,
"lb_coef": 0.0,
"z_coef": 0.0,
"n_experts": 8,
"topk": 1,
"noise_std": 0.0,
"moe_layers": [],
"size_mb": 10.0,
"balance_tokens": true,
"eval_every": 200,
"save_every": 500,
"collapse_early_stop": false
},
"metrics_summary": {
"eval_loss": 2.1567,
"eval_perplexity": 8.6424
}
}