| { | |
| "project": "iterative preference learning", | |
| "exp_name": "llama-3-8b_iter2", | |
| "cache_dir": ".cache", | |
| "result_dir": "results", | |
| "data": null, | |
| "prompt_max_length": 1024, | |
| "max_length": 2048, | |
| "model_name_or_path": ".cache/llama-3-8b_iter1", | |
| "ref_model_name_or_path": "RLHFlow/LLaMA3-SFT", | |
| "beta": 0.1, | |
| "n_epochs": 1, | |
| "per_device_batch_size": 1, | |
| "gradient_accumulation_steps": 32, | |
| "lr": 5e-07, | |
| "warmup_ratio": 0.03, | |
| "max_grad_norm": 1, | |
| "open_port": 57039 | |
| } |