| { | |
| "vocab_size": 50264, | |
| "context_length": 1024, | |
| "d_model": 512, | |
| "n_heads": 8, | |
| "n_layers": 6, | |
| "d_ff": 2048, | |
| "dropout": 0.1, | |
| "bias": true, | |
| "use_rope": false, | |
| "rope_base": 10000.0, | |
| "use_swiglu": false, | |
| "use_rmsnorm": false, | |
| "use_flash_attention": false, | |
| "batch_size": 8, | |
| "learning_rate": 3e-05, | |
| "weight_decay": 0.1, | |
| "beta1": 0.9, | |
| "beta2": 0.95, | |
| "grad_clip": 1.0, | |
| "grad_accum_steps": 4, | |
| "warmup_steps": 120, | |
| "max_steps": 1800, | |
| "eval_interval": 100, | |
| "save_interval": 600, | |
| "data_dir": "data/processed", | |
| "checkpoint_dir": "checkpoints/phase5_chat_v3", | |
| "log_dir": "logs/phase5_chat_v3", | |
| "device": "auto", | |
| "dtype": "float32", | |
| "seed": 42, | |
| "use_wandb": false | |
| } |