| model,aime24_acc,aime24_pass_acc,aime24_tokens,aime24_keywords,aime24_correct_tokens,aime24_wrong_tokens,aime24_clip_ratio,aime24_stop_tokens,aime24_stop_ratio,aime24_box_ratio,aime24_repeat_ratio,aime25_acc,aime25_pass_acc,aime25_tokens,aime25_keywords,aime25_correct_tokens,aime25_wrong_tokens,aime25_clip_ratio,aime25_stop_tokens,aime25_stop_ratio,aime25_box_ratio,aime25_repeat_ratio,amc23_acc,amc23_pass_acc,amc23_tokens,amc23_keywords,amc23_correct_tokens,amc23_wrong_tokens,amc23_clip_ratio,amc23_stop_tokens,amc23_stop_ratio,amc23_box_ratio,amc23_repeat_ratio,gsm8k_acc,gsm8k_pass_acc,gsm8k_tokens,gsm8k_keywords,gsm8k_correct_tokens,gsm8k_wrong_tokens,gsm8k_clip_ratio,gsm8k_stop_tokens,gsm8k_stop_ratio,gsm8k_box_ratio,gsm8k_repeat_ratio,math500_acc,math500_pass_acc,math500_tokens,math500_keywords,math500_correct_tokens,math500_wrong_tokens,math500_clip_ratio,math500_stop_tokens,math500_stop_ratio,math500_box_ratio,math500_repeat_ratio,minerva_math_acc,minerva_math_pass_acc,minerva_math_tokens,minerva_math_keywords,minerva_math_correct_tokens,minerva_math_wrong_tokens,minerva_math_clip_ratio,minerva_math_stop_tokens,minerva_math_stop_ratio,minerva_math_box_ratio,minerva_math_repeat_ratio,mmlu_stem_acc,mmlu_stem_pass_acc,mmlu_stem_tokens,mmlu_stem_keywords,mmlu_stem_correct_tokens,mmlu_stem_wrong_tokens,mmlu_stem_clip_ratio,mmlu_stem_stop_tokens,mmlu_stem_stop_ratio,mmlu_stem_box_ratio,mmlu_stem_repeat_ratio,olympiadbench_acc,olympiadbench_pass_acc,olympiadbench_tokens,olympiadbench_keywords,olympiadbench_correct_tokens,olympiadbench_wrong_tokens,olympiadbench_clip_ratio,olympiadbench_stop_tokens,olympiadbench_stop_ratio,olympiadbench_box_ratio,olympiadbench_repeat_ratio,avg_acc,avg_pass_acc,avg_tokens,avg_keywords,avg_correct_tokens,avg_wrong_tokens,avg_clip_ratio,avg_stop_tokens,avg_stop_ratio,avg_box_ratio,avg_repeat_ratio | |
| eval_results-global_step_0,3.3,3.3,2037.2,0.6333333333333333,1031.0,2071.896551724138,0.06666666666666667,978.5714285714286,0.9333333333333333,0.8666666666666667,0.7,3.3,3.3,1940.6,0.5,481.0,1990.9310344827586,0.06666666666666667,936.3571428571429,0.9333333333333333,0.9,0.7666666666666667,27.5,27.5,1545.675,0.25,702.0909090909091,1865.655172413793,0.05,784.8684210526316,0.95,0.825,0.75,61.5,61.5,940.4283548142532,0.047763457164518575,435.5992601726264,1746.3661417322835,0.03411675511751327,295.63186813186815,0.9658832448824868,0.7733131159969674,0.27748294162244125,42.4,42.4,1263.42,0.194,586.9009433962265,1761.4131944444443,0.034,684.0041407867495,0.966,0.886,0.536,9.9,9.9,1564.7463235294117,0.26838235294117646,445.1111111111111,1688.134693877551,0.03676470588235294,785.6793893129772,0.9632352941176471,0.7757352941176471,0.5036764705882353,40.5,40.5,720.636845593108,3.686216037110669,540.3112203112203,843.1619365609349,0.02253147779986746,333.8810169491525,0.9774685222001326,0.6189529489728297,0.4847581179589132,16.9,16.9,1594.2103703703704,1.2651851851851852,655.6666666666666,1784.9304812834225,0.04296296296296296,878.859133126935,0.957037037037037,0.8474074074074074,0.682962962962963,25.6625,25.6625,1450.864611788393,0.8556100457168603,609.7100138435951,1719.061150814916,0.04421365438700375,709.7315675986107,0.9557863456129962,0.8116344291451898,0.5876933949749024 | |
| eval_results-global_step_10,0.0,0.0,1816.4333333333334,0.9,0.0,1816.4333333333334,0.06666666666666667,803.5714285714286,0.9333333333333333,0.9333333333333333,0.7333333333333333,3.3,3.3,836.2,0.6,736.0,839.6551724137931,0.0,836.2,1.0,1.0,0.7333333333333333,25.0,25.0,879.725,0.55,608.0,970.3,0.0,879.725,1.0,0.95,0.7,75.4,75.4,383.70507960576197,0.04624715693707354,337.930583501006,523.7046153846154,0.004548900682335102,304.38537699923836,0.9954510993176648,0.9575435936315391,0.22062168309325247,57.0,57.0,878.026,0.608,480.2105263157895,1405.3627906976744,0.012,687.4048582995952,0.988,0.974,0.498,18.8,18.8,853.9852941176471,0.33455882352941174,464.52941176470586,943.8597285067873,0.014705882352941176,630.4440298507462,0.9852941176470589,0.9338235294117647,0.49264705882352944,44.4,44.4,501.7385685884692,0.5016567263088137,364.1498881431767,611.7602862254025,0.009940357852882704,345.87248995983936,0.9900596421471173,0.8184227965540093,0.5271703114645461,22.2,22.2,1433.8785185185186,1.0355555555555556,625.9466666666667,1664.7161904761904,0.034074074074074076,920.8773006134969,0.965925925925926,0.9407407407407408,0.6814814814814815,30.7625,30.7625,947.9614742704663,0.5720022827913568,452.0958845489181,1096.9740146297245,0.017741985203612465,676.0600605367931,0.9822580147963875,0.9384829992089234,0.5733234001911844 | |
| eval_results-global_step_20,3.3,3.3,1742.6666666666667,0.6666666666666666,1353.0,1756.103448275862,0.03333333333333333,1251.1724137931035,0.9666666666666667,0.9333333333333333,0.8666666666666667,3.3,3.3,1435.2,0.5333333333333333,762.0,1458.4137931034484,0.03333333333333333,933.1034482758621,0.9666666666666667,0.9666666666666667,0.6666666666666666,22.5,22.5,1939.95,2.575,632.2222222222222,2319.6129032258063,0.075,830.5135135135135,0.925,0.925,0.65,79.8,79.8,290.42608036391204,0.04094010614101592,274.7654320987654,352.42105263157896,0.0,290.42608036391204,1.0,0.9992418498862775,0.21531463229719486,57.4,57.4,669.05,0.4,446.64808362369337,968.7183098591549,0.004,607.4899598393574,0.996,0.986,0.494,24.6,24.6,787.5477941176471,0.16176470588235295,497.04477611940297,882.4926829268293,0.007352941176470588,674.8888888888889,0.9926470588235294,0.9705882352941176,0.48161764705882354,45.9,45.9,455.8774022531478,0.3518886679920477,344.08020231213874,550.5697674418604,0.004970178926441352,376.16550116550115,0.9950298210735586,0.9449966865473823,0.5738899933730948,24.3,24.3,1408.917037037037,0.34814814814814815,649.1646341463414,1652.7514677103718,0.034074074074074076,894.6457055214723,0.965925925925926,0.9496296296296296,0.6992592592592592,32.6375,32.6375,1091.2043725548012,0.6347177035204458,619.8656688153204,1242.6354281468641,0.024007982605456585,732.3006889202013,0.9759920173945432,0.9594320501696758,0.5809268581652132 | |
| eval_results-global_step_30,6.7,6.7,1395.9666666666667,0.36666666666666664,675.5,1447.4285714285713,0.03333333333333333,892.4137931034483,0.9666666666666667,0.9333333333333333,0.7,6.7,6.7,1161.2666666666667,0.6333333333333333,941.0,1177.0,0.0,1161.2666666666667,1.0,0.9666666666666667,0.6,30.0,30.0,1225.15,0.45,657.25,1468.5357142857142,0.025,846.3333333333334,0.975,0.975,0.7,80.1,80.1,318.9264594389689,0.04852160727824109,283.4683065279092,461.9770992366412,0.000758150113722517,307.0288315629742,0.9992418498862775,0.9977255496588324,0.21834723275208492,59.4,59.4,784.176,0.168,489.4410774410774,1215.3891625615763,0.014,568.131845841785,0.986,0.982,0.5,22.1,22.1,756.0772058823529,0.16911764705882354,504.9,827.1650943396227,0.007352941176470588,646.8222222222222,0.9926470588235294,0.9705882352941176,0.4522058823529412,51.0,51.0,498.35023194168326,0.5559973492379059,384.0688758934373,617.2677484787018,0.006958250497017893,392.05672339005673,0.9930417495029821,0.963220675944334,0.5841616964877402,27.3,27.3,1357.611851851852,0.45037037037037037,681.3369565217391,1611.0427698574338,0.03111111111111111,883.980122324159,0.9688888888888889,0.9555555555555556,0.6459259259259259,35.4125,35.4125,937.190635306024,0.3552508717431676,577.1206520480204,1103.2257700235327,0.014814223278956932,712.2541923055807,0.9851857767210432,0.968011252056605,0.5500800921898366 | |
| eval_results-global_step_40,6.7,6.7,2110.6,0.6333333333333333,605.0,2218.1428571428573,0.06666666666666667,1118.642857142857,0.9333333333333333,0.9333333333333333,0.9,0.0,0.0,1960.4333333333334,0.4666666666666667,0.0,1960.4333333333334,0.06666666666666667,957.6785714285714,0.9333333333333333,0.9333333333333333,0.7666666666666667,27.5,27.5,1727.5,0.425,555.7272727272727,2171.9655172413795,0.025,1361.5641025641025,0.975,0.95,0.625,81.3,81.3,316.6391205458681,0.03639120545868082,294.52935694315005,413.0772357723577,0.000758150113722517,304.8285280728376,0.9992418498862775,0.9977255496588324,0.20849128127369218,64.8,64.8,819.456,0.48,460.03395061728395,1481.1193181818182,0.012,635.1295546558705,0.988,0.982,0.478,27.2,27.2,849.4889705882352,0.13970588235294118,512.3648648648649,975.4848484848485,0.011029411764705883,680.546468401487,0.9889705882352942,0.9852941176470589,0.47058823529411764,54.8,54.8,456.3293571901922,0.38999337309476473,369.08348457350274,561.9831501831502,0.0023194168323392977,420.1956160743939,0.9976805831676607,0.9781312127236581,0.6050364479787939,28.6,28.6,1579.1629629629629,0.92,714.0518134715026,1925.5663900414938,0.04148148148148148,955.1112828438949,0.9585185185185185,0.9437037037037037,0.677037037037037,36.362500000000004,36.362500000000004,1227.4512180775737,0.4363863076132984,438.8488428996971,1463.4715812976551,0.028240224190697813,804.2121226480018,0.9717597758093023,0.9629401562999899,0.5913524585312885 | |
| eval_results-global_step_50,3.3,3.3,1380.7666666666667,0.6333333333333333,597.0,1407.7931034482758,0.03333333333333333,876.6896551724138,0.9666666666666667,0.9666666666666667,0.7,3.3,3.3,1339.8,0.6666666666666666,1210.0,1344.2758620689656,0.03333333333333333,834.2758620689655,0.9666666666666667,0.9666666666666667,0.7333333333333333,37.5,37.5,1306.6,2.8,823.8666666666667,1596.24,0.025,929.9487179487179,0.975,0.975,0.575,81.9,81.9,353.5799848369977,0.07733131159969674,298.2453703703704,603.6276150627615,0.002274450341167551,317.911094224924,0.9977255496588324,0.9969673995451099,0.22896133434420016,65.2,65.2,709.784,0.2,479.4171779141104,1141.3908045977012,0.006,617.4949698189134,0.994,0.99,0.498,26.8,26.8,812.9889705882352,0.1801470588235294,487.4246575342466,932.4170854271357,0.003676470588235294,758.2140221402213,0.9963235294117647,0.9852941176470589,0.5183823529411765,56.7,56.7,511.2889330682571,0.5695825049701789,379.6002337814144,683.6832440703902,0.005632869449966865,423.61912695768075,0.9943671305500331,0.9774685222001326,0.6136514247846255,27.6,27.6,1540.408888888889,0.3125925925925926,674.0967741935484,1869.926380368098,0.04148148148148148,915.0618238021639,0.9585185185185185,0.9496296296296296,0.6681481481481482,37.7875,37.7875,994.4021805061307,0.6799566834982496,618.7063600575447,1197.4192618804161,0.018841492315939734,709.1519090167501,0.9811585076840602,0.9759616252944081,0.5669345741939354 | |
| eval_results-global_step_60,10.0,10.0,2410.233333333333,0.6666666666666666,862.3333333333334,2582.222222222222,0.1,900.3333333333334,0.9,0.9,0.7333333333333333,3.3,3.3,1924.7333333333333,7.8,794.0,1963.7241379310344,0.06666666666666667,919.3928571428571,0.9333333333333333,0.9333333333333333,0.8666666666666667,37.5,37.5,829.925,0.375,685.0666666666667,916.84,0.0,829.925,1.0,1.0,0.575,83.1,83.1,324.6050037907506,0.045489006823351025,302.3905109489051,433.7847533632287,0.0,324.6050037907506,1.0,0.9984836997725549,0.2441243366186505,64.8,64.8,926.742,0.618,528.5895061728395,1659.7045454545455,0.02,619.1510204081633,0.98,0.978,0.504,29.0,29.0,798.8198529411765,0.1875,546.7215189873418,902.0103626943005,0.007352941176470588,686.2185185185185,0.9926470588235294,0.9926470588235294,0.5183823529411765,58.1,58.1,509.7074221338635,0.62259774685222,429.939497716895,620.0971563981043,0.003644797879390325,452.9684070502162,0.9963552021206097,0.9774685222001326,0.6176275679257787,27.6,27.6,1478.5288888888888,0.35703703703703704,677.3118279569892,1783.2862985685072,0.034074074074074076,966.2914110429448,0.965925925925926,0.9540740740740741,0.677037037037037,39.175000000000004,39.175000000000004,1150.4118543026682,1.3340363071724093,603.2941077228713,1357.708684578993,0.02896730997457521,712.360693910848,0.9710326900254248,0.9667508360254531,0.5920214118153303 | |
| eval_results-global_step_70,6.7,6.7,1438.1333333333334,0.6,541.5,1502.1785714285713,0.03333333333333333,936.0,0.9666666666666667,0.9666666666666667,0.9,10.0,10.0,1666.8333333333333,1.8333333333333333,3368.0,1477.8148148148148,0.03333333333333333,1172.7241379310344,0.9666666666666667,0.9333333333333333,0.6666666666666666,35.0,35.0,868.8,0.25,691.5,964.2692307692307,0.0,868.8,1.0,1.0,0.75,82.3,82.3,351.11675511751326,0.0576194086429113,303.32442396313365,572.7179487179487,0.001516300227445034,327.3545937737282,0.9984836997725549,0.9977255496588324,0.2517058377558757,64.4,64.4,851.0,0.224,537.9782608695652,1417.252808988764,0.014,635.8782961460446,0.986,0.982,0.516,28.3,28.3,725.4558823529412,0.19852941176470587,526.3246753246754,804.0871794871795,0.003676470588235294,669.0885608856089,0.9963235294117647,0.9963235294117647,0.5404411764705882,60.0,60.0,513.0513585155733,0.6322067594433399,405.64403973509934,674.4295190713101,0.0026507620941020544,471.968438538206,0.9973492379058979,0.9811133200795229,0.6500994035785288,27.1,27.1,1437.631111111111,0.6666666666666666,724.1584699453551,1703.0081300813008,0.028148148148148148,1014.2149390243902,0.9718518518518519,0.9585185185185185,0.7022222222222222,39.22500000000001,39.22500000000001,981.5027217204756,0.5577944474813696,887.3037337297286,1139.46977541989,0.01458229346557465,762.0036207873766,0.9854177065344254,0.9769601147085798,0.6221419133367352 | |
| eval_results-global_step_80,6.7,6.7,3223.6666666666665,2.966666666666667,713.5,3402.964285714286,0.13333333333333333,1258.3461538461538,0.8666666666666667,0.8333333333333334,0.8333333333333334,0.0,0.0,1018.2,0.9666666666666667,0.0,1018.2,0.0,1018.2,1.0,1.0,0.7333333333333333,40.0,40.0,858.425,0.175,739.4375,937.75,0.0,858.425,1.0,1.0,0.8,81.6,81.6,337.5693707354056,0.1106899166034875,308.72955390334573,465.2716049382716,0.000758150113722517,325.6858877086495,0.9992418498862775,0.9977255496588324,0.27520849128127367,65.8,65.8,902.826,0.296,507.90273556231006,1662.6491228070176,0.016,657.3577235772358,0.984,0.98,0.522,28.3,28.3,756.5808823529412,0.27941176470588236,574.2467532467532,828.5794871794872,0.003676470588235294,700.3468634686346,0.9963235294117647,0.9926470588235294,0.5257352941176471,60.2,60.2,548.6855533465872,0.7852882703777336,458.73733480176213,684.5806988352746,0.005301524188204109,466.4663557628248,0.9946984758117959,0.9774685222001326,0.6444665341285619,29.2,29.2,1362.3837037037038,0.7037037037037037,741.725888324873,1618.1778242677824,0.023703703703703703,1007.0015174506829,0.9762962962962963,0.9614814814814815,0.717037037037037,38.975,38.975,1126.042147100663,0.7854283735905176,505.53497072988057,1327.271627967765,0.02284664774089987,786.4786877267727,0.9771533522591,0.9678319931871636,0.6313892529038982 | |
| eval_results-global_step_90,10.0,10.0,1972.8333333333333,0.8,914.3333333333334,2090.4444444444443,0.03333333333333333,1489.344827586207,0.9666666666666667,0.9666666666666667,0.7666666666666667,6.7,6.7,1147.4666666666667,0.8,1175.0,1145.5,0.0,1147.4666666666667,1.0,0.9666666666666667,0.7,40.0,40.0,2062.0,0.325,754.625,2933.5833333333335,0.075,932.5135135135135,0.925,0.925,0.725,82.9,82.9,352.68915845337375,0.05534495830174375,308.3729433272395,568.1644444444445,0.000758150113722517,340.8171471927162,0.9992418498862775,0.9969673995451099,0.2608036391205459,66.0,66.0,730.612,0.276,510.8484848484849,1157.2117647058824,0.004,669.289156626506,0.996,0.988,0.524,29.0,29.0,882.2316176470588,0.6875,611.7848101265823,992.9326424870467,0.007352941176470588,769.8592592592593,0.9926470588235294,0.9816176470588235,0.5404411764705882,61.1,61.1,501.2876076872101,0.5480450629555997,431.6793271839392,610.4689361702127,0.0019880715705765406,470.41434262948206,0.9980119284294234,0.9817760106030484,0.6517561298873427,29.9,29.9,1476.4874074074073,0.4666666666666667,734.8613861386139,1793.2071881606764,0.03111111111111111,1010.5825688073395,0.9688888888888889,0.9540740740740741,0.6874074074074074,40.7,40.7,1140.7009738993813,0.49481958599050135,680.1881606197743,1411.4390942182551,0.019192950913151763,853.7859352852113,0.9808070490868481,0.9700960580767987,0.6070093774440689 | |
| eval_results-global_step_100,3.3,3.3,3031.266666666667,0.8,1164.0,3095.655172413793,0.1,1590.7037037037037,0.9,0.8666666666666667,0.7333333333333333,6.7,6.7,909.7333333333333,1.1333333333333333,773.0,919.5,0.0,909.7333333333333,1.0,1.0,0.7666666666666667,40.0,40.0,1053.625,0.5,804.0,1220.0416666666667,0.0,1053.625,1.0,0.975,0.725,83.5,83.5,375.40485216072784,0.053828658074298714,306.84650317892823,721.6559633027523,0.003032600454890068,327.67300380228136,0.9969673995451099,0.9954510993176648,0.27824109173616374,65.6,65.6,763.142,0.308,574.3719512195122,1123.1220930232557,0.008,640.2641129032259,0.992,0.99,0.54,29.4,29.4,760.125,0.23529411764705882,547.8625,848.5677083333334,0.003676470588235294,703.90036900369,0.9963235294117647,0.9889705882352942,0.5404411764705882,63.0,63.0,552.1166335321404,0.8330019880715706,444.35526315789474,735.2531305903399,0.004970178926441352,474.7688977688978,0.9950298210735586,0.9791252485089463,0.6597084161696488,30.1,30.1,1489.5822222222223,0.677037037037037,798.3694581280788,1786.8622881355932,0.02666666666666667,1091.867579908676,0.9733333333333334,0.957037037037037,0.7125925925925926,40.2,40.2,1116.8744634893862,0.5675618917704124,676.6007094605518,1306.3322528082167,0.018293239579529174,849.067000052976,0.9817067604204709,0.9690313299707012,0.6194979096211242 | |