{ "format_version": 1, "step": 4999, "preset": "dense", "mode": "dense", "config": { "preset": "dense", "mode": "dense", "run_name": "dense-baseline", "seed": 42, "max_steps": 5000, "batch_size": 2, "grad_accum_steps": 4, "effective_batch_size": 8, "block_size": 512, "learning_rate": 5e-05, "weight_decay": 0.01, "warmup_fraction": 0.1, "max_grad_norm": 1.0, "lb_coef": 0.0, "z_coef": 0.0, "n_experts": 8, "topk": 1, "noise_std": 0.0, "moe_layers": [], "size_mb": 10.0, "balance_tokens": true, "eval_every": 200, "save_every": 500, "collapse_early_stop": false }, "metrics_summary": { "eval_loss": 2.1567, "eval_perplexity": 8.6424 } }