{ "epoch": 0.995910949568378, "eval_logits/chosen": -6.920379161834717, "eval_logits/rejected": -6.706053733825684, "eval_logps/chosen": -0.6168988347053528, "eval_logps/rejected": -0.7138991951942444, "eval_loss": 4.313390731811523, "eval_rewards/accuracies": 0.6024590134620667, "eval_rewards/chosen": -6.168987274169922, "eval_rewards/margins": 0.9700047373771667, "eval_rewards/rejected": -7.138991832733154, "eval_runtime": 65.9401, "eval_samples": 1941, "eval_samples_per_second": 29.436, "eval_steps_per_second": 1.85, "total_flos": 0.0, "train_loss": 4.287031420826041, "train_runtime": 1653.4486, "train_samples": 17605, "train_samples_per_second": 10.647, "train_steps_per_second": 0.083 }