| { |
| "epoch": 0.9997172745264349, |
| "eval_logits/chosen": -1.1595183610916138, |
| "eval_logits/rejected": -1.1711369752883911, |
| "eval_logps/chosen": -4.307417392730713, |
| "eval_logps/rejected": -5.051736831665039, |
| "eval_loss": 2.3919546604156494, |
| "eval_rewards/accuracies": 0.8038102388381958, |
| "eval_rewards/chosen": -43.07417297363281, |
| "eval_rewards/margins": 7.443192958831787, |
| "eval_rewards/rejected": -50.517372131347656, |
| "eval_runtime": 100.3048, |
| "eval_samples_per_second": 29.699, |
| "eval_steps_per_second": 1.864, |
| "total_flos": 134366991482880.0, |
| "train_loss": 3.293350306571339, |
| "train_runtime": 7617.916, |
| "train_samples_per_second": 7.429, |
| "train_steps_per_second": 0.058 |
| } |