| { | |
| "datasets": [ | |
| "aditijb/collabllm-20q" | |
| ], | |
| "probs": [ | |
| 1 | |
| ], | |
| "assistant_model_name": "meta-llama/Llama-3.2-1B-Instruct", | |
| "n_eval_per_dataset": 30, | |
| "max_prompt_length": 2048, | |
| "max_new_tokens": 256, | |
| "num_train_epochs": 1, | |
| "learning_rate": 5e-06, | |
| "save_total_limit": 10, | |
| "minimum_gap": 0.1, | |
| "per_device_train_batch_size": 4, | |
| "gradient_accumulation_steps": 8, | |
| "eval_steps": 1, | |
| "run_name": null, | |
| "resume_ckpt_dir": null, | |
| "output_dir": "./train/20q/dpo_train_offline", | |
| "seed": 42, | |
| "push_to_hub": true, | |
| "push_to_blob": false | |
| } |