{ "epoch": 0.9997271487039563, "eval_logits/chosen": -1.326757788658142, "eval_logits/rejected": -1.3503906726837158, "eval_logps/chosen": -296.1499938964844, "eval_logps/rejected": -340.8999938964844, "eval_loss": 0.8531758785247803, "eval_rewards/accuracies": 0.6082331538200378, "eval_rewards/chosen": -7.192187309265137, "eval_rewards/margins": 1.5004761219024658, "eval_rewards/rejected": -8.696093559265137, "eval_runtime": 61.315, "eval_samples_per_second": 20.778, "eval_steps_per_second": 0.652 }