| { | |
| "out_dir": "out-nanecho-ci", | |
| "eval_interval": 25, | |
| "log_interval": 5, | |
| "eval_iters": 10, | |
| "eval_only": false, | |
| "always_save_checkpoint": true, | |
| "init_from": "scratch", | |
| "wandb_log": false, | |
| "wandb_project": "nanecho", | |
| "wandb_run_name": "nanecho-1771761179.4450994", | |
| "dataset": "nanecho", | |
| "gradient_accumulation_steps": 2, | |
| "batch_size": 2, | |
| "block_size": 1024, | |
| "n_layer": 4, | |
| "n_head": 4, | |
| "n_embd": 256, | |
| "dropout": 0.1, | |
| "bias": true, | |
| "learning_rate": 0.0002, | |
| "max_iters": 200, | |
| "weight_decay": 0.01, | |
| "beta1": 0.9, | |
| "beta2": 0.95, | |
| "grad_clip": 1.0, | |
| "decay_lr": true, | |
| "warmup_iters": 20, | |
| "lr_decay_iters": 200, | |
| "min_lr": 2e-05, | |
| "backend": "nccl", | |
| "device": "cpu", | |
| "dtype": "float32", | |
| "compile": false | |
| } |