| { | |
| "dataset": 0, | |
| "epoch": 4.0, | |
| "eval_logits/chosen": -2.9694149494171143, | |
| "eval_logits/rejected": -4.259973526000977, | |
| "eval_logps/chosen": -733.276611328125, | |
| "eval_logps/rejected": -979.574462890625, | |
| "eval_loss": 1.8394471501537168e-09, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": 8.114361763000488, | |
| "eval_rewards/margins": 60.553192138671875, | |
| "eval_rewards/rejected": -52.42021179199219, | |
| "eval_runtime": 8.4622, | |
| "eval_samples_per_second": 177.258, | |
| "eval_score": -0.7999978065490723, | |
| "eval_steps_per_second": 5.554 | |
| } |