run,step,metric,value,stderr Baseline,1000,ai2d_exact_match,0.2548575129533679,0.007843322436924496 Baseline,1000,average,0.27120689295763617, Baseline,1000,average_rank,1.7, Baseline,1000,chartqa_relaxed_overall,0.3308,0.009411906161401973 Baseline,1000,docvqa_val_anls,0.3528553494243383,0.005852289239342309 Baseline,1000,infovqa_val_anls,0.17320578642581314,0.006297063452679795 Baseline,1000,mme_total_score,977.4280712284914, Baseline,1000,mmmu_val_mmmu_acc,0.25222, Baseline,1000,mmstar_average,0.23215874078908072, Baseline,1000,ocrbench_ocrbench_accuracy,0.286, Baseline,1000,seedbench_seed_all,0.2563646470261256, Baseline,1000,textvqa_val_exact_match,0.3024,0.00628900296642181 Baseline,2000,ai2d_exact_match,0.26295336787564766,0.007923526907377255 Baseline,2000,average,0.3202068275596269, Baseline,2000,average_rank,1.5, Baseline,2000,chartqa_relaxed_overall,0.4688,0.009982508912777261 Baseline,2000,docvqa_val_anls,0.4452261510942785,0.00614755494712251 Baseline,2000,infovqa_val_anls,0.1820547866557169,0.006217861455795791 Baseline,2000,mme_total_score,1049.3036214485794, Baseline,2000,mmmu_val_mmmu_acc,0.24556, Baseline,2000,mmstar_average,0.21305462434540698, Baseline,2000,ocrbench_ocrbench_accuracy,0.395, Baseline,2000,seedbench_seed_all,0.258532518065592, Baseline,2000,textvqa_val_exact_match,0.41068000000000005,0.006697862330024289 Baseline,3000,ai2d_exact_match,0.25226683937823835,0.007816909588794397 Baseline,3000,average,0.3507423834414229, Baseline,3000,average_rank,1.6, Baseline,3000,chartqa_relaxed_overall,0.5028,0.010001843767601082 Baseline,3000,docvqa_val_anls,0.502653993831009,0.006267072346683124 Baseline,3000,infovqa_val_anls,0.21728617578189535,0.006796941784959762 Baseline,3000,mme_total_score,1170.2383953581434, Baseline,3000,mmmu_val_mmmu_acc,0.27556, Baseline,3000,mmstar_average,0.25432376938577683, Baseline,3000,ocrbench_ocrbench_accuracy,0.436, Baseline,3000,seedbench_seed_all,0.2792106725958866, Baseline,3000,textvqa_val_exact_match,0.43658,0.006766885462882726 Baseline,4000,ai2d_exact_match,0.2645725388601036,0.007939149662089447 Baseline,4000,average,0.36961781722974835, Baseline,4000,average_rank,1.6, Baseline,4000,chartqa_relaxed_overall,0.5312,0.009982508912777261 Baseline,4000,docvqa_val_anls,0.5374434618615119,0.0062905728113059655 Baseline,4000,infovqa_val_anls,0.2287924838861707,0.006994568698639919 Baseline,4000,mme_total_score,1155.203781512605, Baseline,4000,mmmu_val_mmmu_acc,0.25556, Baseline,4000,mmstar_average,0.2575590188757354, Baseline,4000,ocrbench_ocrbench_accuracy,0.453, Baseline,4000,seedbench_seed_all,0.33913285158421347, Baseline,4000,textvqa_val_exact_match,0.4593,0.006791695475025738 Baseline,5000,ai2d_exact_match,0.3125,0.008342439145556371 Baseline,5000,average,0.3974627910380972, Baseline,5000,average_rank,1.6, Baseline,5000,chartqa_relaxed_overall,0.5488,0.00995424828018316 Baseline,5000,docvqa_val_anls,0.552360266782429,0.006300308519952055 Baseline,5000,infovqa_val_anls,0.23425555286643698,0.007002254622066442 Baseline,5000,mme_total_score,1181.4653861544618, Baseline,5000,mmmu_val_mmmu_acc,0.26667, Baseline,5000,mmstar_average,0.29596648146165705, Baseline,5000,ocrbench_ocrbench_accuracy,0.462, Baseline,5000,seedbench_seed_all,0.43107281823235133, Baseline,5000,textvqa_val_exact_match,0.47354000000000007,0.0068172185364497985 Baseline,6000,ai2d_exact_match,0.358160621761658,0.008629463221867162 Baseline,6000,average,0.4161227404571003, Baseline,6000,average_rank,1.7, Baseline,6000,chartqa_relaxed_overall,0.5628,0.00992279440175477 Baseline,6000,docvqa_val_anls,0.5747451497228876,0.00625495440870239 Baseline,6000,infovqa_val_anls,0.22152017368968838,0.006604546680525351 Baseline,6000,mme_total_score,1284.1648659463785, Baseline,6000,mmmu_val_mmmu_acc,0.27111, Baseline,6000,mmstar_average,0.2978489412854164, Baseline,6000,ocrbench_ocrbench_accuracy,0.495, Baseline,6000,seedbench_seed_all,0.4795997776542524, Baseline,6000,textvqa_val_exact_match,0.48432,0.006800535050670284 Baseline,7000,ai2d_exact_match,0.3707901554404145,0.00869347755587734 Baseline,7000,average,0.4291083177345374, Baseline,7000,average_rank,1.6, Baseline,7000,chartqa_relaxed_overall,0.5656,0.009915542506251351 Baseline,7000,docvqa_val_anls,0.5940907049431567,0.006224236305767187 Baseline,7000,infovqa_val_anls,0.2515675215816963,0.007105097396092786 Baseline,7000,mme_total_score,1185.875650260104, Baseline,7000,mmmu_val_mmmu_acc,0.26556, Baseline,7000,mmstar_average,0.31372400960777047, Baseline,7000,ocrbench_ocrbench_accuracy,0.504, Baseline,7000,seedbench_seed_all,0.4964424680377988, Baseline,7000,textvqa_val_exact_match,0.5002,0.006794794025220267 Baseline,8000,ai2d_exact_match,0.37759067357512954,0.008725299846043883 Baseline,8000,average,0.43846759477995995, Baseline,8000,average_rank,1.8, Baseline,8000,chartqa_relaxed_overall,0.5832,0.009862556058385773 Baseline,8000,docvqa_val_anls,0.6017336419437208,0.006231612198089698 Baseline,8000,infovqa_val_anls,0.2449256624147254,0.006992518502948913 Baseline,8000,mme_total_score,1199.2409963985594, Baseline,8000,mmmu_val_mmmu_acc,0.28111, Baseline,8000,mmstar_average,0.33512257186205047, Baseline,8000,ocrbench_ocrbench_accuracy,0.51, Baseline,8000,seedbench_seed_all,0.5024458032240133, Baseline,8000,textvqa_val_exact_match,0.51008,0.006796301690135059 Baseline,9000,ai2d_exact_match,0.4067357512953368,0.008841214921078996 Baseline,9000,average,0.4422510732201056, Baseline,9000,average_rank,1.8, Baseline,9000,chartqa_relaxed_overall,0.5912,0.009834211136815875 Baseline,9000,docvqa_val_anls,0.6170968481662739,0.00617235763542544 Baseline,9000,infovqa_val_anls,0.23537031288570615,0.00670318154156447 Baseline,9000,mme_total_score,1231.5195078031213, Baseline,9000,mmmu_val_mmmu_acc,0.25889, Baseline,9000,mmstar_average,0.3216444898242951, Baseline,9000,ocrbench_ocrbench_accuracy,0.515, Baseline,9000,seedbench_seed_all,0.5120622568093385, Baseline,9000,textvqa_val_exact_match,0.52226,0.006792711289708482 Baseline,10000,ai2d_exact_match,0.39993523316062174,0.008817096257082848 Baseline,10000,average,0.4523875703250908, Baseline,10000,average_rank,1.7, Baseline,10000,chartqa_relaxed_overall,0.5996,0.00980154906867574 Baseline,10000,docvqa_val_anls,0.6262613496433054,0.006147756371688175 Baseline,10000,infovqa_val_anls,0.263290074230132,0.007186788766942786 Baseline,10000,mme_total_score,1240.8218287314926, Baseline,10000,mmmu_val_mmmu_acc,0.28778, Baseline,10000,mmstar_average,0.32972717906018517, Baseline,10000,ocrbench_ocrbench_accuracy,0.517, Baseline,10000,seedbench_seed_all,0.5217342968315731, Baseline,10000,textvqa_val_exact_match,0.5261600000000001,0.006785774843600811 Baseline,11000,ai2d_exact_match,0.422279792746114,0.008889771831066474 Baseline,11000,average,0.4561398159525099, Baseline,11000,average_rank,1.7, Baseline,11000,chartqa_relaxed_overall,0.6104,0.009755142291143075 Baseline,11000,docvqa_val_anls,0.6373130149166712,0.006128022584995044 Baseline,11000,infovqa_val_anls,0.24419378339723755,0.006897644885887063 Baseline,11000,mme_total_score,1322.9488795518205, Baseline,11000,mmmu_val_mmmu_acc,0.27778, Baseline,11000,mmstar_average,0.3298563439522548, Baseline,11000,ocrbench_ocrbench_accuracy,0.521, Baseline,11000,seedbench_seed_all,0.5237354085603113, Baseline,11000,textvqa_val_exact_match,0.5387,0.006770851562852138 Baseline,12000,ai2d_exact_match,0.42001295336787564,0.008883255931688034 Baseline,12000,average,0.4582751140055433, Baseline,12000,average_rank,1.7, Baseline,12000,chartqa_relaxed_overall,0.618,0.009719474639861454 Baseline,12000,docvqa_val_anls,0.6393961983751871,0.0061228747388476674 Baseline,12000,infovqa_val_anls,0.24798874058574302,0.006855374548993139 Baseline,12000,mme_total_score,1225.6453581432572, Baseline,12000,mmmu_val_mmmu_acc,0.27889, Baseline,12000,mmstar_average,0.34010867846816534, Baseline,12000,ocrbench_ocrbench_accuracy,0.512, Baseline,12000,seedbench_seed_all,0.5350194552529183, Baseline,12000,textvqa_val_exact_match,0.5330600000000001,0.006777713092109446 Baseline,13000,ai2d_exact_match,0.4375,0.008928571428571428 Baseline,13000,average,0.4692868662590049, Baseline,13000,average_rank,1.4, Baseline,13000,chartqa_relaxed_overall,0.6148,0.00973479791861169 Baseline,13000,docvqa_val_anls,0.6511374872549951,0.006086953065248391 Baseline,13000,infovqa_val_anls,0.24465055100441893,0.006808432538374664 Baseline,13000,mme_total_score,1281.7122849139657, Baseline,13000,mmmu_val_mmmu_acc,0.28222, Baseline,13000,mmstar_average,0.3453069542917521, Baseline,13000,ocrbench_ocrbench_accuracy,0.549, Baseline,13000,seedbench_seed_all,0.5442468037798777, Baseline,13000,textvqa_val_exact_match,0.55472,0.0067416788982325 Baseline,14000,ai2d_exact_match,0.4572538860103627,0.00896620675297095 Baseline,14000,average,0.47352486841689195, Baseline,14000,average_rank,1.3, Baseline,14000,chartqa_relaxed_overall,0.6172,0.009723347231923635 Baseline,14000,docvqa_val_anls,0.6502269393708169,0.006057950730638126 Baseline,14000,infovqa_val_anls,0.25805460837190913,0.007037735231659539 Baseline,14000,mme_total_score,1309.1444577831132, Baseline,14000,mmmu_val_mmmu_acc,0.28111, Baseline,14000,mmstar_average,0.34575818188776586, Baseline,14000,ocrbench_ocrbench_accuracy,0.551, Baseline,14000,seedbench_seed_all,0.5483602001111729, Baseline,14000,textvqa_val_exact_match,0.55276,0.006751206724612103 Baseline,15000,ai2d_exact_match,0.45045336787564766,0.008954861634252399 Baseline,15000,average,0.47878665012878824, Baseline,15000,average_rank,1.2, Baseline,15000,chartqa_relaxed_overall,0.612,0.009747841205275417 Baseline,15000,docvqa_val_anls,0.6621413031955148,0.006056838050222495 Baseline,15000,infovqa_val_anls,0.2706898598157733,0.007200315730154543 Baseline,15000,mme_total_score,1384.2171868747498, Baseline,15000,mmmu_val_mmmu_acc,0.30222, Baseline,15000,mmstar_average,0.35408135695920684, Baseline,15000,ocrbench_ocrbench_accuracy,0.558, Baseline,15000,seedbench_seed_all,0.5411339633129516, Baseline,15000,textvqa_val_exact_match,0.5583600000000001,0.0067279027203879065 Baseline,16000,ai2d_exact_match,0.45077720207253885,0.008955440137395838 Baseline,16000,average,0.47665128022935843, Baseline,16000,average_rank,1.5, Baseline,16000,chartqa_relaxed_overall,0.632,0.00964715642305132 Baseline,16000,docvqa_val_anls,0.6709415729142987,0.005999818105621502 Baseline,16000,infovqa_val_anls,0.26050032542402035,0.006997451875879188 Baseline,16000,mme_total_score,1317.8491396558625, Baseline,16000,mmmu_val_mmmu_acc,0.27556, Baseline,16000,mmstar_average,0.33214333327093315, Baseline,16000,ocrbench_ocrbench_accuracy,0.56, Baseline,16000,seedbench_seed_all,0.5463590883824346, Baseline,16000,textvqa_val_exact_match,0.56158,0.006723854754867398 Baseline,17000,ai2d_exact_match,0.45919689119170987,0.008969138793675545 Baseline,17000,average,0.4777141780162423, Baseline,17000,average_rank,1.2, Baseline,17000,chartqa_relaxed_overall,0.632,0.00964715642305132 Baseline,17000,docvqa_val_anls,0.6796338519136422,0.005948761388267941 Baseline,17000,infovqa_val_anls,0.28070956072505215,0.007298333094144192 Baseline,17000,mme_total_score,1381.9161664665867, Baseline,17000,mmmu_val_mmmu_acc,0.27667, Baseline,17000,mmstar_average,0.3370289492329521, Baseline,17000,ocrbench_ocrbench_accuracy,0.519, Baseline,17000,seedbench_seed_all,0.5510283490828238, Baseline,17000,textvqa_val_exact_match,0.56416,0.006724830373229479 Baseline,18000,ai2d_exact_match,0.46567357512953367,0.008977921602780726 Baseline,18000,average,0.4819834595278701, Baseline,18000,average_rank,1.1, Baseline,18000,chartqa_relaxed_overall,0.6376,0.009615793331418735 Baseline,18000,docvqa_val_anls,0.6775884603912571,0.005972234236435759 Baseline,18000,infovqa_val_anls,0.27154318420389256,0.007164903131667027 Baseline,18000,mme_total_score,1336.922769107643, Baseline,18000,mmmu_val_mmmu_acc,0.28667, Baseline,18000,mmstar_average,0.34482796716566916, Baseline,18000,ocrbench_ocrbench_accuracy,0.533, Baseline,18000,seedbench_seed_all,0.5543079488604781, Baseline,18000,textvqa_val_exact_match,0.5666399999999999,0.006713392287599574 Baseline,19000,ai2d_exact_match,0.4682642487046632,0.008981008686994101 Baseline,19000,average,0.4899006713916878, Baseline,19000,chartqa_relaxed_overall,0.6444,0.009575809858898698 Baseline,19000,docvqa_val_anls,0.678226526479947,0.005970619221588814 Baseline,19000,infovqa_val_anls,0.26993847247278,0.0071348470764911525 Baseline,19000,mme_total_score,1406.6628651460583, Baseline,19000,mmmu_val_mmmu_acc,0.28333, Baseline,19000,mmstar_average,0.356220913822775, Baseline,19000,ocrbench_ocrbench_accuracy,0.577, Baseline,19000,seedbench_seed_all,0.554585881045025, Baseline,19000,textvqa_val_exact_match,0.57714,0.0066918487914812905 Baseline,20000,ai2d_exact_match,0.47571243523316065,0.00898853090258662 Baseline,20000,average,0.4873169067639118, Baseline,20000,chartqa_relaxed_overall,0.6336,0.009638338810708618 Baseline,20000,docvqa_val_anls,0.6895214454380043,0.005896462073053767 Baseline,20000,infovqa_val_anls,0.2655657550458317,0.007033265532032538 Baseline,20000,mme_total_score,1324.6738695478193, Baseline,20000,mmmu_val_mmmu_acc,0.30111, Baseline,20000,mmstar_average,0.33806766134497995, Baseline,20000,ocrbench_ocrbench_accuracy,0.555, Baseline,20000,seedbench_seed_all,0.5587548638132296, Baseline,20000,textvqa_val_exact_match,0.56852,0.006720151338087659 Remove Multilingual Data,1000,ai2d_exact_match,0.2619818652849741,0.007914086941902855 Remove Multilingual Data,1000,average,0.29340443385847137, Remove Multilingual Data,1000,average_rank,1.3, Remove Multilingual Data,1000,chartqa_relaxed_overall,0.3736,0.009677121197436144 Remove Multilingual Data,1000,docvqa_val_anls,0.403140100303888,0.006111323163666132 Remove Multilingual Data,1000,infovqa_val_anls,0.1764617576183696,0.006251319736392345 Remove Multilingual Data,1000,mme_total_score,979.3045218087235, Remove Multilingual Data,1000,mmmu_val_mmmu_acc,0.25222, Remove Multilingual Data,1000,mmstar_average,0.2073057646207335, Remove Multilingual Data,1000,ocrbench_ocrbench_accuracy,0.333, Remove Multilingual Data,1000,seedbench_seed_all,0.2507504168982768, Remove Multilingual Data,1000,textvqa_val_exact_match,0.38218,0.006631325992355026 Remove Multilingual Data,2000,ai2d_exact_match,0.25291450777202074,0.007823547213659585 Remove Multilingual Data,2000,average,0.32254499165624334, Remove Multilingual Data,2000,average_rank,1.5, Remove Multilingual Data,2000,chartqa_relaxed_overall,0.4692,0.009983005968307607 Remove Multilingual Data,2000,docvqa_val_anls,0.472590835723597,0.006255090657185791 Remove Multilingual Data,2000,infovqa_val_anls,0.19402428600531574,0.006415305613638088 Remove Multilingual Data,2000,mme_total_score,1067.5286114445778, Remove Multilingual Data,2000,mmmu_val_mmmu_acc,0.24444, Remove Multilingual Data,2000,mmstar_average,0.20544885849586278, Remove Multilingual Data,2000,ocrbench_ocrbench_accuracy,0.409, Remove Multilingual Data,2000,seedbench_seed_all,0.2555864369093941, Remove Multilingual Data,2000,textvqa_val_exact_match,0.3997,0.006677042652231296 Remove Multilingual Data,3000,ai2d_exact_match,0.2658678756476684,0.00795154886571598 Remove Multilingual Data,3000,average,0.35383248024337044, Remove Multilingual Data,3000,average_rank,1.4, Remove Multilingual Data,3000,chartqa_relaxed_overall,0.536,0.009976041728231964 Remove Multilingual Data,3000,docvqa_val_anls,0.5115050780592246,0.006297134520533815 Remove Multilingual Data,3000,infovqa_val_anls,0.1959317380528948,0.006353999153527862 Remove Multilingual Data,3000,mme_total_score,1055.7074829931971, Remove Multilingual Data,3000,mmmu_val_mmmu_acc,0.26, Remove Multilingual Data,3000,mmstar_average,0.2325690534433309, Remove Multilingual Data,3000,ocrbench_ocrbench_accuracy,0.449, Remove Multilingual Data,3000,seedbench_seed_all,0.28943857698721515, Remove Multilingual Data,3000,textvqa_val_exact_match,0.44418,0.0067730052591185854 Remove Multilingual Data,4000,ai2d_exact_match,0.2856217616580311,0.008130016747303466 Remove Multilingual Data,4000,average,0.3775873253769421, Remove Multilingual Data,4000,average_rank,1.4, Remove Multilingual Data,4000,chartqa_relaxed_overall,0.55,0.009951864943131942 Remove Multilingual Data,4000,docvqa_val_anls,0.5339851175847934,0.0062957385772197255 Remove Multilingual Data,4000,infovqa_val_anls,0.20750676546327357,0.006369425500899887 Remove Multilingual Data,4000,mme_total_score,1228.202280912365, Remove Multilingual Data,4000,mmmu_val_mmmu_acc,0.27111, Remove Multilingual Data,4000,mmstar_average,0.24655460164079995, Remove Multilingual Data,4000,ocrbench_ocrbench_accuracy,0.456, Remove Multilingual Data,4000,seedbench_seed_all,0.3898276820455809, Remove Multilingual Data,4000,textvqa_val_exact_match,0.45768000000000003,0.006781666588703993 Remove Multilingual Data,5000,ai2d_exact_match,0.3121761658031088,0.008340079044408505 Remove Multilingual Data,5000,average,0.3976192139479395, Remove Multilingual Data,5000,average_rank,1.4, Remove Multilingual Data,5000,chartqa_relaxed_overall,0.5684,0.009907968668564455 Remove Multilingual Data,5000,docvqa_val_anls,0.5611339219828478,0.006260862186673622 Remove Multilingual Data,5000,infovqa_val_anls,0.21913407408993218,0.006638320670102091 Remove Multilingual Data,5000,mme_total_score,1219.2377951180472, Remove Multilingual Data,5000,mmmu_val_mmmu_acc,0.29444, Remove Multilingual Data,5000,mmstar_average,0.23556637343877926, Remove Multilingual Data,5000,ocrbench_ocrbench_accuracy,0.472, Remove Multilingual Data,5000,seedbench_seed_all,0.4443023902167871, Remove Multilingual Data,5000,textvqa_val_exact_match,0.47142,0.006807048104779351 Remove Multilingual Data,6000,ai2d_exact_match,0.35200777202072536,0.008595926828224822 Remove Multilingual Data,6000,average,0.42451996443270734, Remove Multilingual Data,6000,average_rank,1.3, Remove Multilingual Data,6000,chartqa_relaxed_overall,0.5744,0.009890651444389179 Remove Multilingual Data,6000,docvqa_val_anls,0.5825552977560686,0.006257174245982806 Remove Multilingual Data,6000,infovqa_val_anls,0.252828230577843,0.007149939162213116 Remove Multilingual Data,6000,mme_total_score,1216.607643057223, Remove Multilingual Data,6000,mmmu_val_mmmu_acc,0.30222, Remove Multilingual Data,6000,mmstar_average,0.2807390632529032, Remove Multilingual Data,6000,ocrbench_ocrbench_accuracy,0.497, Remove Multilingual Data,6000,seedbench_seed_all,0.484769316286826, Remove Multilingual Data,6000,textvqa_val_exact_match,0.49416000000000004,0.006798707477504303 Remove Multilingual Data,7000,ai2d_exact_match,0.3801813471502591,0.008736941116932581 Remove Multilingual Data,7000,average,0.428085510128325, Remove Multilingual Data,7000,average_rank,1.4, Remove Multilingual Data,7000,chartqa_relaxed_overall,0.5796,0.009874438607593145 Remove Multilingual Data,7000,docvqa_val_anls,0.5966369586509165,0.006224801729990067 Remove Multilingual Data,7000,infovqa_val_anls,0.23354910759447625,0.006817906701297544 Remove Multilingual Data,7000,mme_total_score,1188.1020408163265, Remove Multilingual Data,7000,mmmu_val_mmmu_acc,0.27556, Remove Multilingual Data,7000,mmstar_average,0.292518909276783, Remove Multilingual Data,7000,ocrbench_ocrbench_accuracy,0.503, Remove Multilingual Data,7000,seedbench_seed_all,0.48988326848249025, Remove Multilingual Data,7000,textvqa_val_exact_match,0.5018400000000001,0.006795274684043781 Remove Multilingual Data,8000,ai2d_exact_match,0.3863341968911917,0.008763532923326706 Remove Multilingual Data,8000,average,0.4413787447198958, Remove Multilingual Data,8000,average_rank,1.2, Remove Multilingual Data,8000,chartqa_relaxed_overall,0.5964,0.009814343815957088 Remove Multilingual Data,8000,docvqa_val_anls,0.603351366738696,0.006235087701254087 Remove Multilingual Data,8000,infovqa_val_anls,0.25307646024963104,0.007198626238671866 Remove Multilingual Data,8000,mme_total_score,1261.5517206882753, Remove Multilingual Data,8000,mmmu_val_mmmu_acc,0.29556, Remove Multilingual Data,8000,mmstar_average,0.30595531673183934, Remove Multilingual Data,8000,ocrbench_ocrbench_accuracy,0.505, Remove Multilingual Data,8000,seedbench_seed_all,0.5124513618677042, Remove Multilingual Data,8000,textvqa_val_exact_match,0.51428,0.006792322389925977 Remove Multilingual Data,9000,ai2d_exact_match,0.3908678756476684,0.008782181865213609 Remove Multilingual Data,9000,average,0.4483393474436153, Remove Multilingual Data,9000,average_rank,1.2, Remove Multilingual Data,9000,chartqa_relaxed_overall,0.6008,0.00979663889573671 Remove Multilingual Data,9000,docvqa_val_anls,0.6206417157518567,0.006160046717594884 Remove Multilingual Data,9000,infovqa_val_anls,0.2517144366407357,0.007092352700671051 Remove Multilingual Data,9000,mme_total_score,1270.4974989995999, Remove Multilingual Data,9000,mmmu_val_mmmu_acc,0.29333, Remove Multilingual Data,9000,mmstar_average,0.32657768650091523, Remove Multilingual Data,9000,ocrbench_ocrbench_accuracy,0.52, Remove Multilingual Data,9000,seedbench_seed_all,0.5163424124513619, Remove Multilingual Data,9000,textvqa_val_exact_match,0.51478,0.006772730933446224 Remove Multilingual Data,10000,ai2d_exact_match,0.41450777202072536,0.008866630113019596 Remove Multilingual Data,10000,average,0.45448389614950035, Remove Multilingual Data,10000,average_rank,1.3, Remove Multilingual Data,10000,chartqa_relaxed_overall,0.6068,0.009771166474772143 Remove Multilingual Data,10000,docvqa_val_anls,0.6232449599819007,0.006177718712473361 Remove Multilingual Data,10000,infovqa_val_anls,0.23737546748097776,0.006778926597473845 Remove Multilingual Data,10000,mme_total_score,1276.3549419767905, Remove Multilingual Data,10000,mmmu_val_mmmu_acc,0.29889, Remove Multilingual Data,10000,mmstar_average,0.3130758097195978, Remove Multilingual Data,10000,ocrbench_ocrbench_accuracy,0.539, Remove Multilingual Data,10000,seedbench_seed_all,0.5219010561423013, Remove Multilingual Data,10000,textvqa_val_exact_match,0.53556,0.00676001751827386 Remove Multilingual Data,11000,ai2d_exact_match,0.41904145077720206,0.008880404559123601 Remove Multilingual Data,11000,average,0.4609227111862355, Remove Multilingual Data,11000,average_rank,1.3, Remove Multilingual Data,11000,chartqa_relaxed_overall,0.6108,0.00975332737879659 Remove Multilingual Data,11000,docvqa_val_anls,0.6387481065492241,0.006094036395159673 Remove Multilingual Data,11000,infovqa_val_anls,0.25052436731474453,0.006993658213921465 Remove Multilingual Data,11000,mme_total_score,1258.2553021208482, Remove Multilingual Data,11000,mmmu_val_mmmu_acc,0.28, Remove Multilingual Data,11000,mmstar_average,0.3213557456291676, Remove Multilingual Data,11000,ocrbench_ocrbench_accuracy,0.561, Remove Multilingual Data,11000,seedbench_seed_all,0.526514730405781, Remove Multilingual Data,11000,textvqa_val_exact_match,0.54032,0.0067608876222200335 Remove Multilingual Data,12000,ai2d_exact_match,0.41353626943005184,0.00886357792887845 Remove Multilingual Data,12000,average,0.46149948562642984, Remove Multilingual Data,12000,average_rank,1.3, Remove Multilingual Data,12000,chartqa_relaxed_overall,0.622,0.009699692449425671 Remove Multilingual Data,12000,docvqa_val_anls,0.6481870346272672,0.0060803752132680255 Remove Multilingual Data,12000,infovqa_val_anls,0.25116762340113796,0.006993814336062128 Remove Multilingual Data,12000,mme_total_score,1256.7357943177271, Remove Multilingual Data,12000,mmmu_val_mmmu_acc,0.28222, Remove Multilingual Data,12000,mmstar_average,0.311104865636332, Remove Multilingual Data,12000,ocrbench_ocrbench_accuracy,0.547, Remove Multilingual Data,12000,seedbench_seed_all,0.5312395775430795, Remove Multilingual Data,12000,textvqa_val_exact_match,0.54704,0.006750774938661079 Remove Multilingual Data,13000,ai2d_exact_match,0.42810880829015546,0.008905646879422012 Remove Multilingual Data,13000,average,0.4658949593838579, Remove Multilingual Data,13000,average_rank,1.6, Remove Multilingual Data,13000,chartqa_relaxed_overall,0.622,0.009699692449425671 Remove Multilingual Data,13000,docvqa_val_anls,0.6461697403304425,0.006072036108570188 Remove Multilingual Data,13000,infovqa_val_anls,0.2635164421127001,0.007102540516236264 Remove Multilingual Data,13000,mme_total_score,1295.0039015606244, Remove Multilingual Data,13000,mmmu_val_mmmu_acc,0.29, Remove Multilingual Data,13000,mmstar_average,0.3296444797414335, Remove Multilingual Data,13000,ocrbench_ocrbench_accuracy,0.54, Remove Multilingual Data,13000,seedbench_seed_all,0.5312951639799889, Remove Multilingual Data,13000,textvqa_val_exact_match,0.54232,0.006771571040376891 Remove Multilingual Data,14000,ai2d_exact_match,0.42487046632124353,0.008896983637113786 Remove Multilingual Data,14000,average,0.46755416993970794, Remove Multilingual Data,14000,average_rank,1.7, Remove Multilingual Data,14000,chartqa_relaxed_overall,0.6256,0.009681288495793083 Remove Multilingual Data,14000,docvqa_val_anls,0.6470833619171145,0.006119244473927763 Remove Multilingual Data,14000,infovqa_val_anls,0.2541720455309047,0.007006172199083197 Remove Multilingual Data,14000,mme_total_score,1262.1793717486994, Remove Multilingual Data,14000,mmmu_val_mmmu_acc,0.28556, Remove Multilingual Data,14000,mmstar_average,0.327544946405174, Remove Multilingual Data,14000,ocrbench_ocrbench_accuracy,0.559, Remove Multilingual Data,14000,seedbench_seed_all,0.5380767092829349, Remove Multilingual Data,14000,textvqa_val_exact_match,0.5460799999999999,0.006754587449305995 Remove Multilingual Data,15000,ai2d_exact_match,0.42908031088082904,0.00890816984689523 Remove Multilingual Data,15000,average,0.4720258172705174, Remove Multilingual Data,15000,average_rank,1.8, Remove Multilingual Data,15000,chartqa_relaxed_overall,0.626,0.009679208378267924 Remove Multilingual Data,15000,docvqa_val_anls,0.655881547989144,0.006058079036611966 Remove Multilingual Data,15000,infovqa_val_anls,0.2538472956751567,0.006929926842577286 Remove Multilingual Data,15000,mme_total_score,1283.2800120048018, Remove Multilingual Data,15000,mmmu_val_mmmu_acc,0.29, Remove Multilingual Data,15000,mmstar_average,0.3309383426349411, Remove Multilingual Data,15000,ocrbench_ocrbench_accuracy,0.572, Remove Multilingual Data,15000,seedbench_seed_all,0.5407448582545858, Remove Multilingual Data,15000,textvqa_val_exact_match,0.54974,0.006738090742441116 Remove Multilingual Data,16000,ai2d_exact_match,0.42940414507772023,0.008909003051055714 Remove Multilingual Data,16000,average,0.476926180401357, Remove Multilingual Data,16000,average_rank,1.5, Remove Multilingual Data,16000,chartqa_relaxed_overall,0.626,0.009679208378267924 Remove Multilingual Data,16000,docvqa_val_anls,0.6622394005833824,0.006046858134280091 Remove Multilingual Data,16000,infovqa_val_anls,0.2633356312454137,0.007137388413784386 Remove Multilingual Data,16000,mme_total_score,1328.4599839935972, Remove Multilingual Data,16000,mmmu_val_mmmu_acc,0.29556, Remove Multilingual Data,16000,mmstar_average,0.33932578522709744, Remove Multilingual Data,16000,ocrbench_ocrbench_accuracy,0.578, Remove Multilingual Data,16000,seedbench_seed_all,0.5431906614785992, Remove Multilingual Data,16000,textvqa_val_exact_match,0.55528,0.006733817132847886 Remove Multilingual Data,17000,ai2d_exact_match,0.42940414507772023,0.008909003051055712 Remove Multilingual Data,17000,average,0.4732087844936434, Remove Multilingual Data,17000,average_rank,1.8, Remove Multilingual Data,17000,chartqa_relaxed_overall,0.6264,0.009677121197436144 Remove Multilingual Data,17000,docvqa_val_anls,0.661817176575324,0.0060368801840957114 Remove Multilingual Data,17000,infovqa_val_anls,0.25584519300448166,0.007033162778192734 Remove Multilingual Data,17000,mme_total_score,1270.766606642657, Remove Multilingual Data,17000,mmmu_val_mmmu_acc,0.28, Remove Multilingual Data,17000,mmstar_average,0.3233592606268431, Remove Multilingual Data,17000,ocrbench_ocrbench_accuracy,0.58, Remove Multilingual Data,17000,seedbench_seed_all,0.5439132851584213, Remove Multilingual Data,17000,textvqa_val_exact_match,0.5581400000000001,0.006731048171116916 Remove Multilingual Data,18000,ai2d_exact_match,0.4368523316062176,0.008927095061184944 Remove Multilingual Data,18000,average,0.4769341122300441, Remove Multilingual Data,18000,average_rank,1.9, Remove Multilingual Data,18000,chartqa_relaxed_overall,0.636,0.009624897685803465 Remove Multilingual Data,18000,docvqa_val_anls,0.671397164123935,0.006004837667492473 Remove Multilingual Data,18000,infovqa_val_anls,0.2570865428675732,0.007022334730795061 Remove Multilingual Data,18000,mme_total_score,1330.2323929571828, Remove Multilingual Data,18000,mmmu_val_mmmu_acc,0.28444, Remove Multilingual Data,18000,mmstar_average,0.3272633338962395, Remove Multilingual Data,18000,ocrbench_ocrbench_accuracy,0.579, Remove Multilingual Data,18000,seedbench_seed_all,0.5457476375764313, Remove Multilingual Data,18000,textvqa_val_exact_match,0.55462,0.0067429981999808505