tmp-qwinvmnwp1 / eval_results /eval_results.csv
bensondccnqwc's picture
Add files using upload-large-folder tool
2ef6674 verified
raw
history blame
15.6 kB
model,aime24_acc,aime24_pass_acc,aime24_tokens,aime24_keywords,aime24_correct_tokens,aime24_wrong_tokens,aime24_clip_ratio,aime24_stop_tokens,aime24_stop_ratio,aime24_box_ratio,aime24_repeat_ratio,aime25_acc,aime25_pass_acc,aime25_tokens,aime25_keywords,aime25_correct_tokens,aime25_wrong_tokens,aime25_clip_ratio,aime25_stop_tokens,aime25_stop_ratio,aime25_box_ratio,aime25_repeat_ratio,amc23_acc,amc23_pass_acc,amc23_tokens,amc23_keywords,amc23_correct_tokens,amc23_wrong_tokens,amc23_clip_ratio,amc23_stop_tokens,amc23_stop_ratio,amc23_box_ratio,amc23_repeat_ratio,gsm8k_acc,gsm8k_pass_acc,gsm8k_tokens,gsm8k_keywords,gsm8k_correct_tokens,gsm8k_wrong_tokens,gsm8k_clip_ratio,gsm8k_stop_tokens,gsm8k_stop_ratio,gsm8k_box_ratio,gsm8k_repeat_ratio,math500_acc,math500_pass_acc,math500_tokens,math500_keywords,math500_correct_tokens,math500_wrong_tokens,math500_clip_ratio,math500_stop_tokens,math500_stop_ratio,math500_box_ratio,math500_repeat_ratio,minerva_math_acc,minerva_math_pass_acc,minerva_math_tokens,minerva_math_keywords,minerva_math_correct_tokens,minerva_math_wrong_tokens,minerva_math_clip_ratio,minerva_math_stop_tokens,minerva_math_stop_ratio,minerva_math_box_ratio,minerva_math_repeat_ratio,mmlu_stem_acc,mmlu_stem_pass_acc,mmlu_stem_tokens,mmlu_stem_keywords,mmlu_stem_correct_tokens,mmlu_stem_wrong_tokens,mmlu_stem_clip_ratio,mmlu_stem_stop_tokens,mmlu_stem_stop_ratio,mmlu_stem_box_ratio,mmlu_stem_repeat_ratio,olympiadbench_acc,olympiadbench_pass_acc,olympiadbench_tokens,olympiadbench_keywords,olympiadbench_correct_tokens,olympiadbench_wrong_tokens,olympiadbench_clip_ratio,olympiadbench_stop_tokens,olympiadbench_stop_ratio,olympiadbench_box_ratio,olympiadbench_repeat_ratio,avg_acc,avg_pass_acc,avg_tokens,avg_keywords,avg_correct_tokens,avg_wrong_tokens,avg_clip_ratio,avg_stop_tokens,avg_stop_ratio,avg_box_ratio,avg_repeat_ratio
eval_results-global_step_0,3.3,3.3,5497.1,6.2,731.0,5661.448275862069,0.16666666666666666,1482.16,0.8333333333333334,0.7333333333333333,0.6666666666666666,3.3,3.3,3035.5666666666666,2.566666666666667,497.0,3123.103448275862,0.13333333333333333,736.1923076923077,0.8666666666666667,0.7333333333333333,0.7333333333333333,22.5,22.5,2372.3,0.275,612.1111111111111,2883.3225806451615,0.1,806.1944444444445,0.9,0.875,0.75,59.4,59.4,936.2373009855952,3.067475360121304,470.01785714285717,1619.4448598130841,0.037149355572403335,293.4007874015748,0.9628506444275967,0.7482941622441244,0.2676269901440485,41.0,41.0,1458.172,0.678,925.5268292682927,1828.3152542372882,0.048,606.0609243697479,0.952,0.822,0.526,11.0,11.0,1264.4926470588234,0.17279411764705882,2071.266666666667,1164.4793388429753,0.029411764705882353,792.5378787878788,0.9705882352941176,0.8308823529411765,0.48161764705882354,39.1,39.1,756.6398277004639,0.35321404903909875,510.91178965224765,914.1783578031539,0.024519549370444003,312.8760190217391,0.975480450629556,0.6219350563286945,0.5003313452617627,19.3,19.3,1829.8725925925926,1.1037037037037036,677.0461538461539,2104.85871559633,0.06074074074074074,895.8123028391167,0.9392592592592592,0.8266666666666667,0.6725925925925926,24.8625,24.8625,2143.7976293755178,1.8021067371472292,811.8600509609162,2412.3938538844905,0.0749776762986838,740.6543330696012,0.9250223237013162,0.773930613105916,0.5747710718821535
eval_results-global_step_10,6.7,6.7,3538.733333333333,0.8,1469.0,3686.5714285714284,0.16666666666666666,1046.84,0.8333333333333334,0.8333333333333334,0.8,6.7,6.7,971.5666666666667,0.7666666666666667,1083.5,963.5714285714286,0.0,971.5666666666667,1.0,0.9666666666666667,0.8,37.5,37.5,748.925,0.3,610.5333333333333,831.96,0.0,748.925,1.0,1.0,0.55,74.1,74.1,379.94996209249433,0.18574677786201668,319.7389969293756,551.9561403508771,0.004548900682335102,289.4699162223915,0.9954510993176648,0.9673995451099318,0.21834723275208492,57.8,57.8,709.708,0.148,452.07612456747404,1062.5781990521327,0.008,586.4233870967741,0.992,0.984,0.474,21.0,21.0,755.0514705882352,0.6433823529411765,491.7719298245614,824.8511627906977,0.007352941176470588,642.1444444444444,0.9926470588235294,0.9375,0.5036764705882353,42.9,42.9,500.7100728959576,0.5165672630881378,369.4367283950617,599.5081300813008,0.007952286282306162,367.0661322645291,0.9920477137176938,0.8204108681245859,0.5291583830351226,21.0,21.0,1474.6355555555556,0.32296296296296295,662.7676056338029,1690.9305816135084,0.04,869.0401234567901,0.96,0.9451851851851852,0.6370370370370371,33.462500000000006,33.462500000000006,1134.9100076415302,0.46041575294012016,682.3530898354511,1276.4908838789215,0.029315099350972314,690.1844587689495,0.9706849006490278,0.9318119498024628,0.56402739042656
eval_results-global_step_20,6.7,6.7,722.8333333333334,0.3,540.5,735.8571428571429,0.0,722.8333333333334,1.0,1.0,0.8,0.0,0.0,1402.3666666666666,0.43333333333333335,0.0,1402.3666666666666,0.03333333333333333,899.1724137931035,0.9666666666666667,0.9333333333333333,0.8333333333333334,25.0,25.0,1519.4,0.175,691.0,1795.5333333333333,0.025,1148.051282051282,0.975,0.95,0.75,77.6,77.6,310.27445034116755,0.07202426080363912,276.0390625,429.11186440677966,0.000758150113722517,298.43247344461304,0.9992418498862775,0.9984836997725549,0.21152388172858225,59.4,59.4,810.632,0.188,507.28619528619527,1254.4433497536945,0.014,594.9756592292089,0.986,0.978,0.492,24.6,24.6,737.3639705882352,0.20588235294117646,472.13432835820896,824.0487804878048,0.003676470588235294,681.4686346863468,0.9963235294117647,0.9705882352941176,0.4632352941176471,46.0,46.0,412.50231941683234,0.5076209410205434,334.77737752161386,478.68773006134967,0.0019880715705765406,381.4166666666667,0.9980119284294234,0.9555997349237906,0.587143803843605,24.1,24.1,1291.0474074074075,0.8429629629629629,598.1533742331288,1511.63671875,0.025185185185185185,910.1489361702128,0.9748148148148148,0.9585185185185185,0.6725925925925926,32.925,32.925,900.8025184692053,0.34060298138270695,427.4862922373934,1053.9606982895964,0.012992651348881609,704.5624249218458,0.9870073486511184,0.9680654402302894,0.6012286132019701
eval_results-global_step_30,6.7,6.7,1064.7666666666667,0.6,624.5,1096.2142857142858,0.0,1064.7666666666667,1.0,0.9666666666666667,0.7333333333333333,10.0,10.0,809.7666666666667,0.4,959.0,793.1851851851852,0.0,809.7666666666667,1.0,1.0,0.6333333333333333,35.0,35.0,1524.9,0.2,668.2142857142857,1986.1923076923076,0.05,763.2631578947369,0.95,0.95,0.7,78.9,78.9,303.4776345716452,0.039423805913570885,275.05571565802114,409.9064748201439,0.000758150113722517,291.56752655538696,0.9992418498862775,0.9984836997725549,0.20545868081880211,63.2,63.2,588.564,0.424,457.9145569620253,812.9402173913044,0.002,557.9098196392786,0.998,0.998,0.47,26.8,26.8,678.9007352941177,0.14705882352941177,444.90410958904107,764.7386934673367,0.003676470588235294,623.5608856088561,0.9963235294117647,0.9816176470588235,0.47058823529411764,50.6,50.6,402.526507620941,0.43339960238568587,334.87688277668633,471.8095238095238,0.0009940357852882703,387.0066334991708,0.9990059642147118,0.9821073558648111,0.6159708416169649,24.9,24.9,1185.242962962963,0.3496296296296296,662.7142857142857,1358.3885601577908,0.01925925925925926,894.1812688821752,0.9807407407407407,0.9703703703703703,0.6503703703703704,37.0125,37.0125,819.768146722875,0.32418898268228724,553.3974795517931,961.6719060297348,0.009585989468313169,674.0028281766173,0.9904140105316869,0.9809057174666533,0.5598818493458652
eval_results-global_step_40,10.0,10.0,1547.9666666666667,0.4666666666666667,821.0,1628.7407407407406,0.03333333333333333,1049.6896551724137,0.9666666666666667,0.9666666666666667,0.6666666666666666,3.3,3.3,785.2,0.7,1334.0,766.2758620689655,0.0,785.2,1.0,1.0,0.7333333333333333,40.0,40.0,744.775,0.35,697.9375,776.0,0.0,744.775,1.0,1.0,0.75,80.9,80.9,290.2562547384382,0.03866565579984837,277.622305529522,343.75,0.0,290.2562547384382,1.0,0.9992418498862775,0.1728582259287339,64.0,64.0,553.62,0.242,474.0125,695.1444444444444,0.0,553.62,1.0,1.0,0.466,26.1,26.1,599.7610294117648,0.15808823529411764,512.056338028169,630.7412935323383,0.0,599.7610294117648,1.0,1.0,0.47058823529411764,56.8,56.8,372.73426110006625,0.3992710404241219,347.2728862973761,406.24635456638526,0.0,372.73426110006625,1.0,0.9847581179589132,0.617296222664016,28.4,28.4,770.1407407407407,0.397037037037037,617.1354166666666,830.9627329192547,0.0014814814814814814,747.5489614243323,0.9985185185185185,0.997037037037037,0.6251851851851852,38.68749999999999,38.68749999999999,708.0567440822095,0.34396607940272395,635.1296183152168,759.732678534016,0.0043518518518518515,642.9481452308769,0.9956481481481482,0.9934629589436118,0.5627409836340066
eval_results-global_step_50,3.3,3.3,839.3333333333334,1.1333333333333333,524.0,850.2068965517242,0.0,839.3333333333334,1.0,1.0,0.6666666666666666,3.3,3.3,812.7333333333333,1.0333333333333334,1007.0,806.0344827586207,0.0,812.7333333333333,1.0,1.0,0.6,35.0,35.0,795.875,0.35,640.1428571428571,879.7307692307693,0.0,795.875,1.0,1.0,0.575,81.0,81.0,294.0652009097801,0.05686125852918878,280.12827715355803,353.36653386454185,0.0,294.0652009097801,1.0,0.9992418498862775,0.155420773313116,63.2,63.2,549.488,0.288,457.2278481012658,707.9347826086956,0.0,549.488,1.0,0.996,0.452,27.9,27.9,594.4742647058823,0.21691176470588236,491.17105263157896,634.530612244898,0.0,594.4742647058823,1.0,0.9963235294117647,0.4852941176470588,57.7,57.7,381.3194168323393,0.45957587806494365,358.6295232624928,412.25371965544247,0.0,381.3194168323393,1.0,0.9847581179589132,0.5904572564612326,27.0,27.0,736.5155555555556,0.4888888888888889,617.2142857142857,780.5578093306289,0.0,736.5155555555556,1.0,0.9881481481481481,0.6325925925925926,37.300000000000004,37.300000000000004,625.4755130837781,0.5033630571069463,546.9392305007548,678.076950780665,0.0,625.4755130837781,1.0,0.9955589556756378,0.5196789258350833
eval_results-global_step_60,6.7,6.7,831.7,1.1,564.5,850.7857142857143,0.0,831.7,1.0,0.9666666666666667,0.7333333333333333,3.3,3.3,772.5333333333333,0.8333333333333334,510.0,781.5862068965517,0.0,772.5333333333333,1.0,1.0,0.6666666666666666,22.5,22.5,755.75,0.55,617.1111111111111,796.0,0.0,755.75,1.0,1.0,0.625,80.2,80.2,299.41243366186507,0.06595905989385899,286.5037807183365,351.7394636015326,0.0,299.41243366186507,1.0,0.9992418498862775,0.16148597422289612,64.2,64.2,537.652,0.28,472.93146417445485,653.7150837988827,0.0,537.652,1.0,0.996,0.45,24.6,24.6,584.2426470588235,0.2757352941176471,464.95522388059703,623.229268292683,0.0,584.2426470588235,1.0,0.9889705882352942,0.40441176470588236,59.2,59.2,387.0238568588469,0.48840291583830353,363.62136465324386,421.0430894308943,0.0,387.0238568588469,1.0,0.9837640821736249,0.6030483764082174,25.9,25.9,719.2281481481482,0.5703703703703704,618.4457142857143,754.502,0.0,719.2281481481482,1.0,0.9940740740740741,0.5896296296296296,35.824999999999996,35.824999999999996,610.9428023826272,0.5204751216941892,487.2585823529322,654.0751032882824,0.0,610.9428023826272,1.0,0.9910896576294922,0.5291969681208282
eval_results-global_step_70,13.3,13.3,762.2666666666667,1.1,664.25,777.3461538461538,0.0,762.2666666666667,1.0,0.9666666666666667,0.6666666666666666,6.7,6.7,778.9666666666667,1.0,1018.5,761.8571428571429,0.0,778.9666666666667,1.0,1.0,0.5666666666666667,32.5,32.5,741.1,0.375,652.0769230769231,783.9629629629629,0.0,741.1,1.0,0.925,0.55,80.7,80.7,309.48673237300983,0.0932524639878696,295.12488262910796,369.70472440944883,0.0,309.48673237300983,1.0,0.9901440485216073,0.1516300227445034,63.2,63.2,558.01,0.412,471.31962025316454,706.8913043478261,0.0,558.01,1.0,0.978,0.472,27.2,27.2,598.8235294117648,0.3235294117647059,505.6216216216216,633.6565656565657,0.0,598.8235294117648,1.0,0.9742647058823529,0.4338235294117647,60.9,60.9,406.6583830351226,0.546388336646786,383.4507348938487,442.7569856054191,0.0,406.6583830351226,1.0,0.9615639496355202,0.5960901259111995,26.1,26.1,733.8488888888888,0.642962962962963,623.6875,772.7034068136272,0.0,733.8488888888888,1.0,0.9762962962962963,0.6088888888888889,38.824999999999996,38.824999999999996,611.145108380265,0.5616416469202905,576.7539103093332,656.1099058123933,0.0,611.145108380265,1.0,0.9714919583753054,0.5057207375362112
eval_results-global_step_80,3.3,3.3,641.4666666666667,0.9333333333333333,443.0,648.3103448275862,0.0,641.4666666666667,1.0,0.5333333333333333,0.5333333333333333,0.0,0.0,723.8,1.0666666666666667,0.0,723.8,0.0,723.8,1.0,0.6,0.7333333333333333,25.0,25.0,677.8,0.6,595.8,705.1333333333333,0.0,677.8,1.0,0.7,0.575,78.7,78.7,317.65731614859743,0.10007581501137225,303.45375722543355,370.12455516014234,0.0,317.65731614859743,1.0,0.9257012888551933,0.15921152388172857,57.0,57.0,546.294,0.37,459.4877192982456,661.3627906976744,0.0,546.294,1.0,0.772,0.44,21.7,21.7,593.0330882352941,0.34558823529411764,512.728813559322,615.2769953051643,0.0,593.0330882352941,1.0,0.8455882352941176,0.43014705882352944,44.6,44.6,389.63916500994037,0.49734923790589797,388.177431328879,390.81747456612806,0.0,389.63916500994037,1.0,0.6156394963552021,0.5728959575878065,22.7,22.7,652.5511111111111,0.5703703703703704,606.3202614379085,666.1015325670498,0.0,652.5511111111111,1.0,0.6755555555555556,0.5466666666666666,31.624999999999996,31.624999999999996,567.7801683964512,0.5604229573227197,413.62099785622354,597.6158783071347,0.0,567.7801683964512,1.0,0.7084772386741752,0.4988234842032997
eval_results-global_step_90,3.3,3.3,735.5666666666667,0.9333333333333333,665.0,738.0,0.0,735.5666666666667,1.0,0.9,0.6666666666666666,3.3,3.3,723.1333333333333,0.9333333333333333,704.0,723.7931034482758,0.0,723.1333333333333,1.0,0.9333333333333333,0.6666666666666666,37.5,37.5,694.375,0.375,596.2,753.28,0.0,694.375,1.0,0.975,0.675,80.0,80.0,324.56633813495074,0.0887035633055345,308.15829383886256,390.1363636363636,0.0,324.56633813495074,1.0,0.9825625473843821,0.1463229719484458,60.4,60.4,543.8,0.304,460.19867549668874,671.3131313131313,0.0,543.8,1.0,0.952,0.444,21.7,21.7,589.5955882352941,0.31985294117647056,485.271186440678,618.4929577464789,0.0,589.5955882352941,1.0,0.9154411764705882,0.39705882352941174,53.1,53.1,394.06726308813785,0.49105367793240556,395.34956304619226,392.61652542372883,0.0,394.06726308813785,1.0,0.7975480450629556,0.5838303512259775,25.3,25.3,689.4666666666667,0.5866666666666667,604.4093567251462,718.3253968253969,0.0,689.4666666666667,1.0,0.9155555555555556,0.5925925925925926,35.575,35.575,586.8213570156311,0.503992939468468,527.323384443446,625.7446847991719,0.0,586.8213570156311,1.0,0.9214300822258519,0.52151725907872
eval_results-global_step_100,6.7,6.7,729.7666666666667,1.1666666666666667,561.5,741.7857142857143,0.0,729.7666666666667,1.0,0.9666666666666667,0.6333333333333333,0.0,0.0,737.4333333333333,1.0333333333333334,0.0,737.4333333333333,0.0,737.4333333333333,1.0,0.9333333333333333,0.6,30.0,30.0,716.975,0.5,628.5,754.8928571428571,0.0,716.975,1.0,0.9,0.6,80.9,80.9,322.8817285822593,0.07429871114480667,311.7357075913777,370.0753968253968,0.0,322.8817285822593,1.0,0.9772554965883244,0.15238817285822592,56.2,56.2,525.704,0.358,454.0711743772242,617.6164383561644,0.0,525.704,1.0,0.922,0.428,21.0,21.0,554.9963235294117,0.2867647058823529,493.4035087719298,571.3255813953489,0.0,554.9963235294117,1.0,0.8088235294117647,0.4117647058823529,45.3,45.3,374.2266401590457,0.44897282968853547,400.4035087719298,352.52363636363634,0.0,374.2266401590457,1.0,0.6504307488402916,0.5417495029821073,25.3,25.3,667.0251851851851,0.5392592592592592,605.9590643274854,687.7440476190476,0.0,667.0251851851851,1.0,0.92,0.5674074074074074,33.175000000000004,33.175000000000004,578.6261096819877,0.5509119382468693,431.9466204799934,604.1746256651874,0.0,578.6261096819877,1.0,0.8848137218550476,0.4918303903079284