simpo-train-large-wrong / all_results.json
jz666's picture
End of training
d5735b7 verified
{
"epoch": 0.995910949568378,
"eval_logits/chosen": -6.213160991668701,
"eval_logits/rejected": -6.001035213470459,
"eval_logps/chosen": -0.544950544834137,
"eval_logps/rejected": -0.6353368759155273,
"eval_loss": 4.511509418487549,
"eval_rewards/accuracies": 0.5963114500045776,
"eval_rewards/chosen": -5.44950532913208,
"eval_rewards/margins": 0.9038640260696411,
"eval_rewards/rejected": -6.353369235992432,
"eval_runtime": 86.5154,
"eval_samples": 1941,
"eval_samples_per_second": 22.435,
"eval_steps_per_second": 1.41,
"total_flos": 0.0,
"train_loss": 5.239574314033898,
"train_runtime": 2514.0727,
"train_samples": 17605,
"train_samples_per_second": 7.003,
"train_steps_per_second": 0.054
}