| { | |
| "output_dir": "/scratch/11079/antonxue/cache/ADLMC/ancoder_stage1_0.6B_20260507_063210", | |
| "base_model": "Qwen/Qwen3-0.6B", | |
| "num_denoiser_layers": -1, | |
| "anchor_weight": 0.1, | |
| "data_dir": "/scratch/11079/antonxue/dreamcoder_data", | |
| "max_length": 2048, | |
| "max_steps": 500000, | |
| "batch_size": 16, | |
| "gradient_accumulation_steps": 1, | |
| "learning_rate": 0.0001, | |
| "warmup_steps": 500, | |
| "weight_decay": 0.1, | |
| "logging_steps": 5, | |
| "save_steps": 1000, | |
| "seed": 42, | |
| "gradient_checkpointing": true, | |
| "resume_from_checkpoint": null, | |
| "allow_different_output_dir": false | |
| } |