| { |
| "epoch": 0.9880609304240429, |
| "eval_logits/chosen": 9.224187850952148, |
| "eval_logits/rejected": 10.039901733398438, |
| "eval_logps/chosen": -0.3674238324165344, |
| "eval_logps/rejected": -0.3802245259284973, |
| "eval_loss": 1.378309726715088, |
| "eval_rewards/accuracies": 0.5407407283782959, |
| "eval_rewards/chosen": -0.7348476648330688, |
| "eval_rewards/margins": 0.025601468980312347, |
| "eval_rewards/rejected": -0.7604490518569946, |
| "eval_runtime": 153.1014, |
| "eval_samples": 1080, |
| "eval_samples_per_second": 7.054, |
| "eval_steps_per_second": 1.764, |
| "total_flos": 0.0, |
| "train_loss": 1.3904367176691692, |
| "train_runtime": 8038.3927, |
| "train_samples": 9715, |
| "train_samples_per_second": 1.209, |
| "train_steps_per_second": 0.009 |
| } |