{ "stage": "CPT", "model_id": "Qwen/Qwen3-14B-Base", "num_epochs": 2, "max_steps": -1, "batch_size": 4, "grad_accum": 8, "effective_batch_size": 32, "learning_rate": 3e-05, "weight_decay": 0.1, "warmup_ratio": 0.0, "max_grad_norm": 1.0, "seed": 42, "cache_key": "20be9f8f8ac0d877_Qwen3-14B-", "domain_counts": {}, "domain_eval_domains": [], "max_seq_length": 3072, "prepared_max_seq_length": 16384, "chunked_loss": false, "chunked_loss_size": 1024 }