{ "experiment_name": "exp4b_dpo", "config_file": "/scratch/jennifer/standard-project-m2-the-transformers/configs/exp4b_dpo.yaml", "resolved_config": { "num_train_epochs": 1, "learning_rate": 5e-07, "per_device_train_batch_size": 1, "per_device_eval_batch_size": 1, "gradient_accumulation_steps": 8, "warmup_ratio": 0.1, "seed": 42, "save_total_limit": 3, "eval_steps": 50, "logging_steps": 10, "beta": 0.1, "loss_type": "sigmoid", "max_length": 4096, "max_completion_length": 3072, "val_fraction": 0.1, "wandb_project": "cs552-math-dpo", "experiment_name": "exp4b_dpo", "output_dir": "/scratch/checkpoints/exp4b_dpo", "sft_checkpoint": "/scratch/checkpoints/sft_mixlong_full", "train_source": "/scratch/data/dpo_pairs_v2/pairs.jsonl" }, "n_train": 5048, "n_val": 561 }