{ "datasets": [ "aditijb/collabllm-20q" ], "probs": [ 1 ], "assistant_model_name": "meta-llama/Llama-3.2-1B-Instruct", "n_eval_per_dataset": 30, "max_prompt_length": 2048, "max_new_tokens": 256, "num_train_epochs": 1, "learning_rate": 5e-06, "save_total_limit": 10, "minimum_gap": 0.1, "per_device_train_batch_size": 4, "gradient_accumulation_steps": 8, "eval_steps": 1, "run_name": null, "resume_ckpt_dir": null, "output_dir": "./train/20q/dpo_train_offline", "seed": 42, "push_to_hub": true, "push_to_blob": false }