{ "epoch": 0.995910949568378, "eval_logits/chosen": -13.768111228942871, "eval_logits/rejected": -13.749955177307129, "eval_logps/chosen": -1.4371137619018555, "eval_logps/rejected": -1.7012752294540405, "eval_loss": 4.129865646362305, "eval_rewards/accuracies": 0.6721311211585999, "eval_rewards/chosen": -14.371137619018555, "eval_rewards/margins": 2.6416165828704834, "eval_rewards/rejected": -17.012752532958984, "eval_runtime": 65.8969, "eval_samples": 1941, "eval_samples_per_second": 29.455, "eval_steps_per_second": 1.851 }