AnCoder-1.0B-Base / argparse.json
AntonXue's picture
Initial release: SWA-averaged Stage-1 endpoint (steps 46k-50k, 1k stride)
4a4735e verified
{
"output_dir": "/scratch/11079/antonxue/cache/ADLMC/ancoder_stage1_0.6B_20260507_063210",
"base_model": "Qwen/Qwen3-0.6B",
"num_denoiser_layers": -1,
"anchor_weight": 0.1,
"data_dir": "/scratch/11079/antonxue/dreamcoder_data",
"max_length": 2048,
"max_steps": 500000,
"batch_size": 16,
"gradient_accumulation_steps": 1,
"learning_rate": 0.0001,
"warmup_steps": 500,
"weight_decay": 0.1,
"logging_steps": 5,
"save_steps": 1000,
"seed": 42,
"gradient_checkpointing": true,
"resume_from_checkpoint": null,
"allow_different_output_dir": false
}