| { |
| "epoch": 2.0, |
| "eval_logits/chosen": 0.6623885631561279, |
| "eval_logits/rejected": 0.7668179869651794, |
| "eval_logps/chosen": -271.2955017089844, |
| "eval_logps/rejected": -249.1475830078125, |
| "eval_loss": 2156.2255859375, |
| "eval_rewards/accuracies": 0.6940000057220459, |
| "eval_rewards/chosen": -0.11051338165998459, |
| "eval_rewards/margins": 0.06660113483667374, |
| "eval_rewards/rejected": -0.17711451649665833, |
| "eval_runtime": 325.4702, |
| "eval_samples": 2000, |
| "eval_samples_per_second": 6.145, |
| "eval_steps_per_second": 0.384 |
| } |