aditijb's picture
Training in progress, epoch 0
1cb6c15 verified
{
"datasets": [
"aditijb/collabllm-20q"
],
"probs": [
1
],
"assistant_model_name": "meta-llama/Llama-3.2-1B-Instruct",
"n_eval_per_dataset": 30,
"max_prompt_length": 2048,
"max_new_tokens": 256,
"num_train_epochs": 1,
"learning_rate": 5e-06,
"save_total_limit": 10,
"minimum_gap": 0.1,
"per_device_train_batch_size": 4,
"gradient_accumulation_steps": 8,
"eval_steps": 1,
"run_name": null,
"resume_ckpt_dir": null,
"output_dir": "./train/20q/dpo_train_offline",
"seed": 42,
"push_to_hub": true,
"push_to_blob": false
}