| global_step,aime24_acc_avg4,aime25_acc_avg4,amc23_acc_avg4,aime24_acc,aime25_acc,amc23_acc,gsm8k_acc,math500_acc,minerva_math_acc,olympiadbench_acc,mmlu_stem_acc,prompt_level_strict_acc_ood,gpqa_pass@1:1_samples_ood | |
| 0,2.50,1.70,21.20,3.30,0.00,15.00,59.90,46.60,14.00,18.70,40.30,21.1, | |
| 10,5.80,1.70,31.20,3.30,3.30,37.50,74.00,54.00,18.80,22.80,43.90,20.3, | |
| 20,7.50,2.50,31.90,6.70,3.30,27.50,78.20,57.80,25.40,24.70,45.50,20.3, | |
| 30,5.80,3.30,36.20,3.30,0.00,37.50,80.10,60.40,22.40,25.00,47.90,20.9, | |
| 40,5.80,1.70,35.00,10.00,6.70,32.50,79.90,65.00,23.50,24.70,50.70,21.3, | |
| 50,6.70,4.20,38.10,6.70,0.00,35.00,81.20,64.20,25.70,24.60,53.70,25.1, | |
| 60,5.80,5.80,37.50,3.30,0.00,35.00,81.30,63.60,28.30,26.70,56.00,24.0, | |
| 70,5.00,1.70,35.60,3.30,0.00,42.50,82.30,61.80,26.50,28.10,55.50,26.2, | |
| 80,6.70,7.50,35.60,6.70,3.30,25.00,82.00,64.40,24.30,26.40,59.40,25.0, | |
| 90,8.30,1.70,42.50,10.00,10.00,42.50,82.60,64.40,26.50,25.50,60.70,24.4, | |
| 100,5.80,7.50,30.60,6.70,6.70,32.50,82.00,65.20,29.40,28.30,60.70,24.2, | |