| model,gsm8k_acc,gsm8k_pass_acc,gsm8k_tokens,gsm8k_keywords,gsm8k_correct_tokens,gsm8k_wrong_tokens,gsm8k_clip_ratio,gsm8k_stop_tokens,gsm8k_stop_ratio,gsm8k_box_ratio,gsm8k_repeat_ratio,math500_acc,math500_pass_acc,math500_tokens,math500_keywords,math500_correct_tokens,math500_wrong_tokens,math500_clip_ratio,math500_stop_tokens,math500_stop_ratio,math500_box_ratio,math500_repeat_ratio,minerva_math_acc,minerva_math_pass_acc,minerva_math_tokens,minerva_math_keywords,minerva_math_correct_tokens,minerva_math_wrong_tokens,minerva_math_clip_ratio,minerva_math_stop_tokens,minerva_math_stop_ratio,minerva_math_box_ratio,minerva_math_repeat_ratio,mmlu_stem_acc,mmlu_stem_pass_acc,mmlu_stem_tokens,mmlu_stem_keywords,mmlu_stem_correct_tokens,mmlu_stem_wrong_tokens,mmlu_stem_clip_ratio,mmlu_stem_stop_tokens,mmlu_stem_stop_ratio,mmlu_stem_box_ratio,mmlu_stem_repeat_ratio,olympiadbench_acc,olympiadbench_pass_acc,olympiadbench_tokens,olympiadbench_keywords,olympiadbench_correct_tokens,olympiadbench_wrong_tokens,olympiadbench_clip_ratio,olympiadbench_stop_tokens,olympiadbench_stop_ratio,olympiadbench_box_ratio,olympiadbench_repeat_ratio,avg_acc,avg_pass_acc,avg_tokens,avg_keywords,avg_correct_tokens,avg_wrong_tokens,avg_clip_ratio,avg_stop_tokens,avg_stop_ratio,avg_box_ratio,avg_repeat_ratio |
| eval_results-global_step_10,90.0,90.0,297.9658832448825,0.037149355572403335,276.0454928390901,495.0833333333333,0.000758150113722517,284.6039453717754,0.9992418498862775,0.9924184988627748,0.14783927217589082,75.6,75.6,745.9,0.456,506.2275132275132,1488.4918032786886,0.006,652.4164989939637,0.994,0.988,0.468,27.9,27.9,788.7867647058823,0.25,749.171052631579,804.1479591836735,0.003676470588235294,732.6642066420665,0.9963235294117647,0.9558823529411765,0.4742647058823529,60.1,60.1,425.62591119946984,0.34161696487740223,368.8450082735797,511.0564315352697,0.0026507620941020544,384.2305647840532,0.9973492379058979,0.961895294897283,0.5424121935056329,37.2,37.2,1149.3096296296296,0.7333333333333333,705.9681274900398,1411.7594339622642,0.011851851851851851,971.5592203898051,0.9881481481481481,0.9733333333333334,0.6592592592592592,58.160000000000004,58.160000000000004,681.5176377559728,0.3636199307566278,521.2514388923603,942.1077922586459,0.004987446929582343,605.0948872363327,0.9950125530704177,0.9743058960069136,0.45835508616462717 |
| eval_results-global_step_20,90.4,90.4,329.5557240333586,0.13646702047005307,284.3640939597315,753.7165354330708,0.002274450341167551,293.88829787234044,0.9977255496588324,0.9977255496588324,0.12585291887793784,77.2,77.2,731.878,0.418,517.779792746114,1456.8070175438597,0.002,701.2825651302605,0.998,0.99,0.462,34.2,34.2,674.1580882352941,0.1213235294117647,516.2795698924731,756.1843575418994,0.003676470588235294,617.6273062730627,0.9963235294117647,0.9926470588235294,0.4742647058823529,60.2,60.2,426.8793903247184,0.5974155069582505,371.46919691969197,510.8258333333333,0.0016567263088137839,400.9920345170926,0.9983432736911863,0.9827700463883366,0.5546719681908548,39.7,39.7,1227.0533333333333,0.5614814814814815,792.4253731343283,1513.2457002457002,0.017777777777777778,959.710407239819,0.9822222222222222,0.9718518518518519,0.6370370370370371,60.339999999999996,60.339999999999996,677.9049071853409,0.36693750766430994,496.4636053304677,998.1558888195726,0.005477085003198882,594.7001222065151,0.9945229149968011,0.9869989013445102,0.4507653259976365 |
| eval_results-global_step_30,92.1,92.1,314.0310841546626,0.0887035633055345,294.7753086419753,538.9903846153846,0.000758150113722517,302.13050075872536,0.9992418498862775,0.9984836997725549,0.12282031842304776,76.8,76.8,740.888,1.118,542.9765625,1396.0431034482758,0.002,710.314629258517,0.998,0.994,0.502,40.8,40.8,701.7389705882352,0.19117647058823528,529.7387387387388,820.3229813664597,0.003676470588235294,645.350553505535,0.9963235294117647,0.9852941176470589,0.49264705882352944,63.4,63.4,412.60205434062294,0.403247183565275,373.4628661087866,480.2640144665461,0.0006626905235255136,402.28149867374003,0.9993373094764745,0.9837640821736249,0.5583167660702452,39.7,39.7,1431.7837037037036,1.32,749.7723880597015,1880.872235872236,0.023703703703703703,1078.019726858877,0.9762962962962963,0.957037037037037,0.6681481481481482,62.55999999999999,62.55999999999999,720.208762557445,0.624225443491809,498.14517280984046,1023.2985439537804,0.006160202985837406,627.6193818110789,0.9938397970141626,0.9837157873260551,0.46878645829299403 |
| eval_results-global_step_40,92.4,92.4,322.20318423047763,0.1865049279757392,291.233798195242,699.72,0.0,322.20318423047763,1.0,0.9962092494313874,0.12585291887793784,78.0,78.0,896.814,0.604,585.8435897435897,1999.3454545454545,0.01,744.3333333333334,0.99,0.986,0.474,37.9,37.9,669.7720588235294,0.34191176470588236,541.8834951456311,747.7159763313609,0.0,669.7720588235294,1.0,0.9926470588235294,0.45588235294117646,65.0,65.0,435.149436713055,0.4178263750828363,407.7121752419766,486.20094786729857,0.0016567263088137839,409.459010952539,0.9983432736911863,0.9834327369118622,0.5453943008614976,40.0,40.0,1546.2162962962964,1.4637037037037037,828.9444444444445,2024.3975308641975,0.02962962962962963,1105.7053435114503,0.9703703703703703,0.9511111111111111,0.6622222222222223,62.660000000000004,62.660000000000004,774.0309952126715,0.6027893542936323,531.1235005541768,1191.4759819216622,0.008257271187688684,650.2945861702659,0.9917427288123115,0.981880031255578,0.45267035898056684 |
| eval_results-global_step_50,92.2,92.2,318.1652767247915,0.08794541319181198,291.24342105263156,636.0,0.000758150113722517,306.3285280728376,0.9992418498862775,0.9984836997725549,0.13646702047005307,79.0,79.0,914.92,0.864,556.1873417721519,2264.4380952380952,0.012,732.4271255060729,0.988,0.986,0.474,39.3,39.3,815.1397058823529,0.2536764705882353,534.7570093457944,996.9636363636364,0.007352941176470588,702.8444444444444,0.9926470588235294,0.9852941176470589,0.5073529411764706,66.8,66.8,455.0473823724321,0.6815772034459907,376.1318790282598,614.0609390609391,0.0033134526176275677,403.4644281914894,0.9966865473823724,0.9837640821736249,0.547713717693837,41.6,41.6,1718.1777777777777,2.5407407407407407,830.814946619217,2351.043147208122,0.03851851851851852,1145.6101694915253,0.9614814814814815,0.957037037037037,0.6533333333333333,63.78000000000001,63.78000000000001,844.2900285514709,0.8855879655933556,517.826919563611,1372.5011635741585,0.012388612485267838,658.1349391412739,0.9876113875147322,0.9821157873260551,0.4637734025347388 |
| eval_results-global_step_60,92.2,92.2,318.92418498862776,0.06823351023502654,305.0493421052632,482.7281553398058,0.0,318.92418498862776,1.0,0.9992418498862775,0.13646702047005307,81.2,81.2,905.92,1.192,591.2142857142857,2265.18085106383,0.006,814.82092555332,0.994,0.988,0.496,39.7,39.7,838.8235294117648,0.5845588235294118,609.7592592592592,989.670731707317,0.007352941176470588,726.5259259259259,0.9926470588235294,0.9852941176470589,0.48161764705882354,71.3,71.3,457.058648111332,0.5006626905235255,403.3559479553903,590.5092378752887,0.0023194168323392977,420.88077050813683,0.9976805831676607,0.9831013916500994,0.5357852882703777,41.9,41.9,1403.7407407407406,1.5866666666666667,856.0212014134275,1799.1607142857142,0.017777777777777778,1138.5686274509803,0.9822222222222222,0.9718518518518519,0.64,65.26,65.26,784.893420650493,0.7864243381909262,553.0800072895252,1225.4499380543912,0.0066900271573175326,683.9440868853982,0.9933099728426825,0.9854978422070575,0.4579739911598509 |
| eval_results-global_step_70,93.0,93.0,339.1319181197877,0.266868840030326,294.92257538712306,928.75,0.002274450341167551,303.3328267477204,0.9977255496588324,0.9977255496588324,0.12054586808188021,81.0,81.0,837.746,0.636,612.3802469135802,1798.5157894736842,0.0,837.746,1.0,0.998,0.464,41.5,41.5,713.9889705882352,0.24632352941176472,527.9026548672566,846.2389937106918,0.003676470588235294,657.6125461254612,0.9963235294117647,0.9963235294117647,0.4338235294117647,74.1,74.1,467.97282968853546,0.5089463220675944,392.6317673378076,683.0268199233717,0.002982107355864811,421.52210036556994,0.9970178926441352,0.9821073558648111,0.5351225977468522,43.7,43.7,1608.6725925925925,1.288888888888889,887.0881355932204,2168.85,0.02666666666666667,1215.0121765601218,0.9733333333333334,0.9644444444444444,0.6874074074074074,66.66,66.66,793.5024621978303,0.5894055160797148,542.9850760197976,1285.0763206215495,0.007119938990386865,687.0451299597746,0.9928800610096131,0.9877201758759705,0.4481798805295809 |
| eval_results-global_step_80,92.3,92.3,340.7338893100834,0.14404852160727824,308.898931799507,720.5686274509804,0.002274450341167551,305.0387537993921,0.9977255496588324,0.9977255496588324,0.1326762699014405,80.0,80.0,886.88,0.882,601.3975,2028.81,0.004,826.2389558232932,0.996,0.992,0.474,38.2,38.2,790.1286764705883,1.1580882352941178,607.0480769230769,903.4642857142857,0.007352941176470588,677.4740740740741,0.9926470588235294,0.9926470588235294,0.4742647058823529,76.5,76.5,457.18124585818424,0.69350563286945,406.67071057192373,621.3760563380282,0.0019880715705765406,426.04913678618857,0.9980119284294234,0.9844267726971504,0.5473823724320742,42.4,42.4,1764.7985185185184,2.1214814814814815,888.6643356643357,2408.948586118252,0.03259259259259259,1285.0336906584992,0.9674074074074074,0.9614814814814815,0.6785185185185185,65.88,65.88,847.9444660314748,0.9998247742504656,562.5359109917687,1336.6335111243093,0.009641611136161455,703.9669222282895,0.9903583888638385,0.9856561725321988,0.4613683733468772 |
| eval_results-global_step_90,93.1,93.1,333.76648976497347,0.155420773313116,299.12296416938113,801.2637362637363,0.000758150113722517,321.88088012139605,0.9992418498862775,0.9984836997725549,0.12206216830932524,82.4,82.4,973.79,1.226,643.2111650485436,2521.5,0.008,853.1612903225806,0.992,0.986,0.494,39.3,39.3,807.9669117647059,1.0661764705882353,756.392523364486,841.4121212121212,0.003676470588235294,751.9114391143911,0.9963235294117647,0.9889705882352942,0.45955882352941174,77.9,77.9,436.93671305500334,0.42909211398277003,411.5027647809443,526.5847076461769,0.0003313452617627568,431.94862446138546,0.9996686547382373,0.9867461895294898,0.5506958250497018,44.7,44.7,1721.3392592592593,2.745185185185185,947.8145695364238,2347.6246648793567,0.02666666666666667,1330.1887366818873,0.9733333333333334,0.9585185185185185,0.6948148148148148,67.48,67.48,854.7598747687883,1.1243749086138615,611.6087973799557,1407.677046000278,0.007886526526077447,737.818194140328,0.9921134734739224,0.9837437992111715,0.4642263263406507 |
| eval_results-global_step_100,93.3,93.3,327.6360879454132,0.18271417740712662,302.2113821138211,679.0112359550562,0.000758150113722517,315.6631259484067,0.9992418498862775,0.9984836997725549,0.1326762699014405,80.4,80.4,934.582,1.188,589.9253731343283,2348.377551020408,0.004,874.4819277108434,0.996,0.996,0.524,41.2,41.2,797.0330882352941,0.7463235294117647,556.7589285714286,965.225,0.003676470588235294,740.9335793357934,0.9963235294117647,0.9889705882352942,0.4852941176470588,79.9,79.9,441.13021868787274,0.5324718356527501,413.7375621890547,550.1584158415842,0.0006626905235255136,430.8113395225464,0.9993373094764745,0.9857521537442014,0.5646123260437376,44.6,44.6,1886.8133333333333,3.5007407407407407,1097.4451827242524,2522.1069518716577,0.02666666666666667,1500.1217656012177,0.9733333333333334,0.965925925925926,0.7096296296296296,67.88,67.88,877.4389456403827,1.2300500566424764,592.015685746577,1412.9758309377412,0.007152795578429999,772.4023476237614,0.99284720442157,0.9870264735355953,0.48324246864437337 |
|
|