simpo-train-large-wrong / eval_results.json
jz666's picture
End of training
d5735b7 verified
{
"epoch": 0.995910949568378,
"eval_logits/chosen": -6.213160991668701,
"eval_logits/rejected": -6.001035213470459,
"eval_logps/chosen": -0.544950544834137,
"eval_logps/rejected": -0.6353368759155273,
"eval_loss": 4.511509418487549,
"eval_rewards/accuracies": 0.5963114500045776,
"eval_rewards/chosen": -5.44950532913208,
"eval_rewards/margins": 0.9038640260696411,
"eval_rewards/rejected": -6.353369235992432,
"eval_runtime": 86.5154,
"eval_samples": 1941,
"eval_samples_per_second": 22.435,
"eval_steps_per_second": 1.41
}