| { |
| "epoch": 9.908256880733944, |
| "eval_logits/chosen": 1.670241117477417, |
| "eval_logits/rejected": 1.8750687837600708, |
| "eval_logps/chosen": -71.820556640625, |
| "eval_logps/rejected": -85.4496078491211, |
| "eval_loss": 0.6761088967323303, |
| "eval_rewards/accuracies": 0.5199999809265137, |
| "eval_rewards/chosen": -0.03448202461004257, |
| "eval_rewards/margins": 0.033580340445041656, |
| "eval_rewards/rejected": -0.06806236505508423, |
| "eval_runtime": 12.5383, |
| "eval_samples_per_second": 15.473, |
| "eval_steps_per_second": 1.994, |
| "total_flos": 9.208786937802916e+17, |
| "train_loss": 0.6812221950954861, |
| "train_runtime": 2021.1055, |
| "train_samples_per_second": 8.629, |
| "train_steps_per_second": 0.134 |
| } |