math_model / split_info.json
jdecim's picture
Push DPO checkpoint with T=0.3 (optimal for pass@8 on CI gate)
ac3517e verified
raw
history blame
860 Bytes
{
"experiment_name": "exp4b_dpo",
"config_file": "/scratch/jennifer/standard-project-m2-the-transformers/configs/exp4b_dpo.yaml",
"resolved_config": {
"num_train_epochs": 1,
"learning_rate": 5e-07,
"per_device_train_batch_size": 1,
"per_device_eval_batch_size": 1,
"gradient_accumulation_steps": 8,
"warmup_ratio": 0.1,
"seed": 42,
"save_total_limit": 3,
"eval_steps": 50,
"logging_steps": 10,
"beta": 0.1,
"loss_type": "sigmoid",
"max_length": 4096,
"max_completion_length": 3072,
"val_fraction": 0.1,
"wandb_project": "cs552-math-dpo",
"experiment_name": "exp4b_dpo",
"output_dir": "/scratch/checkpoints/exp4b_dpo",
"sft_checkpoint": "/scratch/checkpoints/sft_mixlong_full",
"train_source": "/scratch/data/dpo_pairs_v2/pairs.jsonl"
},
"n_train": 5048,
"n_val": 561
}