| { |
| "epoch": 1.0, |
| "eval_logits/chosen": -923703389.9449542, |
| "eval_logits/rejected": -831308124.5957447, |
| "eval_logps/chosen": -219.38191657110093, |
| "eval_logps/rejected": -231.15425531914894, |
| "eval_loss": 0.37631505727767944, |
| "eval_rewards/chosen": 0.6432788393913059, |
| "eval_rewards/margins": 1.9445905758025428, |
| "eval_rewards/rejected": -1.3013117364112368, |
| "eval_runtime": 1556.3946, |
| "eval_samples_per_second": 0.643, |
| "eval_steps_per_second": 0.161, |
| "kl": 0.0, |
| "total_flos": 2.3597531637009613e+17, |
| "train_loss": 0.4100885201333348, |
| "train_runtime": 53730.3673, |
| "train_samples_per_second": 0.354, |
| "train_steps_per_second": 0.011 |
| } |