| { | |
| "stage": "CPT", | |
| "model_id": "Qwen/Qwen3-14B-Base", | |
| "num_epochs": 2, | |
| "max_steps": -1, | |
| "batch_size": 4, | |
| "grad_accum": 8, | |
| "effective_batch_size": 32, | |
| "learning_rate": 3e-05, | |
| "weight_decay": 0.1, | |
| "warmup_ratio": 0.0, | |
| "max_grad_norm": 1.0, | |
| "seed": 42, | |
| "cache_key": "20be9f8f8ac0d877_Qwen3-14B-", | |
| "domain_counts": {}, | |
| "domain_eval_domains": [], | |
| "max_seq_length": 3072, | |
| "prepared_max_seq_length": 16384, | |
| "chunked_loss": false, | |
| "chunked_loss_size": 1024 | |
| } |