{ "eval_logits/chosen": -0.5001821517944336, "eval_logits/rejected": -0.5001798868179321, "eval_logps/chosen": -268.5600891113281, "eval_logps/rejected": -269.17041015625, "eval_loss": -0.029902135953307152, "eval_rewards/chosen": -0.8525950312614441, "eval_rewards/rejected": -1.1856228113174438, "eval_runtime": 4761.786, "eval_samples": 20000, "eval_samples_per_second": 4.2, "eval_steps_per_second": 2.1 }