{ "model": { "vocab_size": 50257, "max_seq_len": 2048, "dim": 1024, "n_layers": 16, "n_heads": 16, "hidden_dim": 2736, "dropout": 0.0 }, "training": { "batch_size": 1, "gradient_accumulation_steps": 32, "max_steps": 50000, "warmup_steps": 2000, "learning_rate": 0.0003, "weight_decay": 0.01, "grad_clip": 1.0, "mixed_precision": "bf16", "gradient_checkpointing": true }, "data": { "seq_length": 1024, "data_path": "data/tokens/packed_1024.txt" }, "hardware": { "device": "cuda", "compile_model": false }, "logging": { "log_interval": 10, "save_interval": 2000, "output_dir": "checkpoints" } }