{ "dataset": 0, "epoch": 4.0, "eval_logits/chosen": -2.9694149494171143, "eval_logits/rejected": -4.259973526000977, "eval_logps/chosen": -733.276611328125, "eval_logps/rejected": -979.574462890625, "eval_loss": 1.8394471501537168e-09, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 8.114361763000488, "eval_rewards/margins": 60.553192138671875, "eval_rewards/rejected": -52.42021179199219, "eval_runtime": 8.4622, "eval_samples_per_second": 177.258, "eval_score": -0.7999978065490723, "eval_steps_per_second": 5.554 }