{ "epoch": 9.908256880733944, "eval_logits/chosen": 1.670241117477417, "eval_logits/rejected": 1.8750687837600708, "eval_logps/chosen": -71.820556640625, "eval_logps/rejected": -85.4496078491211, "eval_loss": 0.6761088967323303, "eval_rewards/accuracies": 0.5199999809265137, "eval_rewards/chosen": -0.03448202461004257, "eval_rewards/margins": 0.033580340445041656, "eval_rewards/rejected": -0.06806236505508423, "eval_runtime": 12.5383, "eval_samples_per_second": 15.473, "eval_steps_per_second": 1.994 }