| { |
| "epoch": 1.998667732480682, |
| "eval_logits/chosen": -13.45787239074707, |
| "eval_logits/rejected": -12.986124992370605, |
| "eval_logps/chosen": -209.70858764648438, |
| "eval_logps/rejected": -266.514404296875, |
| "eval_loss": 0.535685658454895, |
| "eval_rewards/accuracies": 0.7565789818763733, |
| "eval_rewards/chosen": -4.100185394287109, |
| "eval_rewards/margins": 1.7237099409103394, |
| "eval_rewards/rejected": -5.823895454406738, |
| "eval_runtime": 15.7482, |
| "eval_samples_per_second": 19.304, |
| "eval_steps_per_second": 2.413, |
| "total_flos": 174218888085504.0, |
| "train_loss": 0.33488652056087054, |
| "train_runtime": 7850.5465, |
| "train_samples_per_second": 7.649, |
| "train_steps_per_second": 0.239 |
| } |