| { |
| "epoch": 1.0, |
| "eval_logits/chosen": -2.0462207794189453, |
| "eval_logits/rejected": -1.7758138179779053, |
| "eval_logps/chosen": -90.40424346923828, |
| "eval_logps/rejected": -73.17691802978516, |
| "eval_loss": 0.5703843832015991, |
| "eval_rewards/accuracies": 0.9472222328186035, |
| "eval_rewards/chosen": 0.1581471860408783, |
| "eval_rewards/margins": 0.2657936215400696, |
| "eval_rewards/rejected": -0.10764642059803009, |
| "eval_runtime": 118.208, |
| "eval_samples": 2862, |
| "eval_samples_per_second": 24.212, |
| "eval_steps_per_second": 0.761, |
| "train_loss": 0.6280729855576607, |
| "train_runtime": 9689.6427, |
| "train_samples": 140201, |
| "train_samples_per_second": 14.469, |
| "train_steps_per_second": 0.151 |
| } |