{ "epoch": 0.8, "eval_logits/chosen": 26.002456665039062, "eval_logits/rejected": 22.07574462890625, "eval_logps/chosen": -45.84428024291992, "eval_logps/rejected": -34.291664123535156, "eval_loss": 0.6927242279052734, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.0004447936953511089, "eval_rewards/margins": 0.0008460998651571572, "eval_rewards/rejected": -0.0004013061698060483, "eval_runtime": 0.9296, "eval_samples_per_second": 1.076, "eval_steps_per_second": 1.076 }