{ "vocab_size": 50264, "context_length": 1024, "d_model": 512, "n_heads": 8, "n_layers": 6, "d_ff": 2048, "dropout": 0.1, "bias": true, "use_rope": false, "rope_base": 10000.0, "use_swiglu": false, "use_rmsnorm": false, "use_flash_attention": false, "batch_size": 8, "learning_rate": 3e-05, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "grad_accum_steps": 4, "warmup_steps": 120, "max_steps": 1800, "eval_interval": 100, "save_interval": 600, "data_dir": "data/processed", "checkpoint_dir": "checkpoints/phase5_chat_v3", "log_dir": "logs/phase5_chat_v3", "device": "auto", "dtype": "float32", "seed": 42, "use_wandb": false }