| global_step,aime24_acc_avg4,aime25_acc_avg4,amc23_acc_avg4,aime24_acc,aime25_acc,amc23_acc,gsm8k_acc,math500_acc,minerva_math_acc,olympiadbench_acc,mmlu_stem_acc,prompt_level_strict_acc_ood,gpqa_pass@1:1_samples_ood | |
| 0,3.30,2.50,25.00,3.30,3.30,22.50,59.40,41.00,11.00,19.30,39.10,20.5, | |
| 10,4.20,3.30,32.50,6.70,6.70,37.50,74.10,57.80,21.00,21.00,42.90,22.0, | |
| 20,5.00,2.50,35.60,6.70,0.00,25.00,77.60,59.40,24.60,24.10,46.00,25.9, | |
| 30,5.00,5.80,36.20,6.70,10.00,35.00,78.90,63.20,26.80,24.90,50.60,29.2, | |
| 40,5.80,1.70,35.00,10.00,3.30,40.00,80.90,64.00,26.10,28.40,56.80,27.7, | |
| 50,5.80,0.80,40.00,3.30,3.30,35.00,81.00,63.20,27.90,27.00,57.70,25.7, | |
| 60,4.20,1.70,44.40,6.70,3.30,22.50,80.20,64.20,24.60,25.90,59.20,26.1, | |
| 70,9.20,0.80,35.00,13.30,6.70,32.50,80.70,63.20,27.20,26.10,60.90,25.5, | |
| 80,7.50,2.50,30.60,3.30,0.00,25.00,78.70,57.00,21.70,22.70,44.60,22.4, | |
| 90,4.20,0.00,26.20,3.30,3.30,37.50,80.00,60.40,21.70,25.30,53.10,24.0, | |
| 100,4.20,0.00,30.00,6.70,0.00,30.00,80.90,56.20,21.00,25.30,45.30,22.2, | |