{ "epoch": 0.995910949568378, "eval_logits/chosen": -6.920379161834717, "eval_logits/rejected": -6.706053733825684, "eval_logps/chosen": -0.6168988347053528, "eval_logps/rejected": -0.7138991951942444, "eval_loss": 4.313390731811523, "eval_rewards/accuracies": 0.6024590134620667, "eval_rewards/chosen": -6.168987274169922, "eval_rewards/margins": 0.9700047373771667, "eval_rewards/rejected": -7.138991832733154, "eval_runtime": 65.9401, "eval_samples": 1941, "eval_samples_per_second": 29.436, "eval_steps_per_second": 1.85 }