| global_step,aime24_acc_avg16,aime25_acc_avg16,amc23_acc_avg16,aime24_acc_avg32,aime25_acc_avg32,amc23_acc_avg32,aime24_acc,aime25_acc,amc23_acc,gsm8k_acc,math500_acc,minerva_math_acc,olympiadbench_acc,mmlu_stem_acc,prompt_level_strict_acc_ood,gpqa_pass@1:1_samples_ood | |
| 0,,,,,,,6.70,0.00,20.00,60.10,44.20,10.30,16.00,40.70,17.7,0.27 | |
| 10,,,,,,,10.00,0.00,32.50,73.20,55.60,19.10,22.10,42.40,20.0,0.27 | |
| 20,,,,,,,13.30,0.00,40.00,78.80,60.60,26.50,25.50,46.20,20.0,0.23 | |
| 30,,,,,,,3.30,3.30,32.50,79.60,60.80,26.80,27.00,49.50,22.9,0.27 | |
| 40,,,,,,,13.30,6.70,32.50,80.30,61.80,28.30,26.40,51.80,22.2,0.29 | |
| 50,,,,,,,6.70,0.00,37.50,80.40,65.20,27.60,25.90,54.40,25.5,0.32 | |
| 60,,,,,,,10.00,0.00,37.50,81.70,63.60,29.40,25.50,55.00,24.4,0.28 | |
| 70,,,,,,,13.30,3.30,42.50,80.40,64.60,27.20,29.50,58.40,25.1,0.26 | |
| 80,,,,,,,6.70,3.30,32.50,81.80,63.40,29.40,29.90,57.90,24.0,0.30 | |
| 90,,,,,,,3.30,0.00,37.50,83.30,66.40,29.80,29.60,58.50,25.9,0.31 | |
| 100,,,,,,,3.30,3.30,42.50,82.70,65.20,30.10,30.40,59.10,26.2,0.24 | |