{ "epoch": 3.0, "eval_logits/chosen": -114258240.0, "eval_logits/rejected": -101566632.0, "eval_logps/chosen": -483.6653747558594, "eval_logps/rejected": -500.6080322265625, "eval_loss": 0.30653947591781616, "eval_rewards/chosen": 1.6094696521759033, "eval_rewards/margins": -4.692190408706665, "eval_rewards/rejected": 6.301660060882568, "eval_runtime": 2.6407, "eval_samples_per_second": 3.787, "eval_steps_per_second": 0.757, "kl": 0.0 }