echoself / training_metadata.json
drzo's picture
Deploy NanEcho CI checkpoint (4L/4H/256E, 200 iters, val_loss=1.9258)
ee242c7 verified
{
"out_dir": "out-nanecho-ci",
"eval_interval": 25,
"log_interval": 5,
"eval_iters": 10,
"eval_only": false,
"always_save_checkpoint": true,
"init_from": "scratch",
"wandb_log": false,
"wandb_project": "nanecho",
"wandb_run_name": "nanecho-1771761179.4450994",
"dataset": "nanecho",
"gradient_accumulation_steps": 2,
"batch_size": 2,
"block_size": 1024,
"n_layer": 4,
"n_head": 4,
"n_embd": 256,
"dropout": 0.1,
"bias": true,
"learning_rate": 0.0002,
"max_iters": 200,
"weight_decay": 0.01,
"beta1": 0.9,
"beta2": 0.95,
"grad_clip": 1.0,
"decay_lr": true,
"warmup_iters": 20,
"lr_decay_iters": 200,
"min_lr": 2e-05,
"backend": "nccl",
"device": "cpu",
"dtype": "float32",
"compile": false
}