{ "eval_accuracy": 1.0, "eval_loss": 0.9172223210334778, "eval_margin": 0.109375, "eval_max_reward": 2.6875, "eval_mean_reward": 2.6328125, "eval_min_reward": 2.578125, "eval_num_tokens": 1778.0, "eval_runtime": 0.7199, "eval_samples_per_second": 1.389, "eval_steps_per_second": 1.389 }