lusxvr's picture
new plots
a024e38
raw
history blame
28.8 kB
run,step,metric,value,stderr
Baseline,1000,ai2d_exact_match,0.2548575129533679,0.007843322436924496
Baseline,1000,average,0.27120689295763617,
Baseline,1000,average_rank,1.7,
Baseline,1000,chartqa_relaxed_overall,0.3308,0.009411906161401973
Baseline,1000,docvqa_val_anls,0.3528553494243383,0.005852289239342309
Baseline,1000,infovqa_val_anls,0.17320578642581314,0.006297063452679795
Baseline,1000,mme_total_score,977.4280712284914,
Baseline,1000,mmmu_val_mmmu_acc,0.25222,
Baseline,1000,mmstar_average,0.23215874078908072,
Baseline,1000,ocrbench_ocrbench_accuracy,0.286,
Baseline,1000,seedbench_seed_all,0.2563646470261256,
Baseline,1000,textvqa_val_exact_match,0.3024,0.00628900296642181
Baseline,2000,ai2d_exact_match,0.26295336787564766,0.007923526907377255
Baseline,2000,average,0.3202068275596269,
Baseline,2000,average_rank,1.5,
Baseline,2000,chartqa_relaxed_overall,0.4688,0.009982508912777261
Baseline,2000,docvqa_val_anls,0.4452261510942785,0.00614755494712251
Baseline,2000,infovqa_val_anls,0.1820547866557169,0.006217861455795791
Baseline,2000,mme_total_score,1049.3036214485794,
Baseline,2000,mmmu_val_mmmu_acc,0.24556,
Baseline,2000,mmstar_average,0.21305462434540698,
Baseline,2000,ocrbench_ocrbench_accuracy,0.395,
Baseline,2000,seedbench_seed_all,0.258532518065592,
Baseline,2000,textvqa_val_exact_match,0.41068000000000005,0.006697862330024289
Baseline,3000,ai2d_exact_match,0.25226683937823835,0.007816909588794397
Baseline,3000,average,0.3507423834414229,
Baseline,3000,average_rank,1.6,
Baseline,3000,chartqa_relaxed_overall,0.5028,0.010001843767601082
Baseline,3000,docvqa_val_anls,0.502653993831009,0.006267072346683124
Baseline,3000,infovqa_val_anls,0.21728617578189535,0.006796941784959762
Baseline,3000,mme_total_score,1170.2383953581434,
Baseline,3000,mmmu_val_mmmu_acc,0.27556,
Baseline,3000,mmstar_average,0.25432376938577683,
Baseline,3000,ocrbench_ocrbench_accuracy,0.436,
Baseline,3000,seedbench_seed_all,0.2792106725958866,
Baseline,3000,textvqa_val_exact_match,0.43658,0.006766885462882726
Baseline,4000,ai2d_exact_match,0.2645725388601036,0.007939149662089447
Baseline,4000,average,0.36961781722974835,
Baseline,4000,average_rank,1.6,
Baseline,4000,chartqa_relaxed_overall,0.5312,0.009982508912777261
Baseline,4000,docvqa_val_anls,0.5374434618615119,0.0062905728113059655
Baseline,4000,infovqa_val_anls,0.2287924838861707,0.006994568698639919
Baseline,4000,mme_total_score,1155.203781512605,
Baseline,4000,mmmu_val_mmmu_acc,0.25556,
Baseline,4000,mmstar_average,0.2575590188757354,
Baseline,4000,ocrbench_ocrbench_accuracy,0.453,
Baseline,4000,seedbench_seed_all,0.33913285158421347,
Baseline,4000,textvqa_val_exact_match,0.4593,0.006791695475025738
Baseline,5000,ai2d_exact_match,0.3125,0.008342439145556371
Baseline,5000,average,0.3974627910380972,
Baseline,5000,average_rank,1.6,
Baseline,5000,chartqa_relaxed_overall,0.5488,0.00995424828018316
Baseline,5000,docvqa_val_anls,0.552360266782429,0.006300308519952055
Baseline,5000,infovqa_val_anls,0.23425555286643698,0.007002254622066442
Baseline,5000,mme_total_score,1181.4653861544618,
Baseline,5000,mmmu_val_mmmu_acc,0.26667,
Baseline,5000,mmstar_average,0.29596648146165705,
Baseline,5000,ocrbench_ocrbench_accuracy,0.462,
Baseline,5000,seedbench_seed_all,0.43107281823235133,
Baseline,5000,textvqa_val_exact_match,0.47354000000000007,0.0068172185364497985
Baseline,6000,ai2d_exact_match,0.358160621761658,0.008629463221867162
Baseline,6000,average,0.4161227404571003,
Baseline,6000,average_rank,1.7,
Baseline,6000,chartqa_relaxed_overall,0.5628,0.00992279440175477
Baseline,6000,docvqa_val_anls,0.5747451497228876,0.00625495440870239
Baseline,6000,infovqa_val_anls,0.22152017368968838,0.006604546680525351
Baseline,6000,mme_total_score,1284.1648659463785,
Baseline,6000,mmmu_val_mmmu_acc,0.27111,
Baseline,6000,mmstar_average,0.2978489412854164,
Baseline,6000,ocrbench_ocrbench_accuracy,0.495,
Baseline,6000,seedbench_seed_all,0.4795997776542524,
Baseline,6000,textvqa_val_exact_match,0.48432,0.006800535050670284
Baseline,7000,ai2d_exact_match,0.3707901554404145,0.00869347755587734
Baseline,7000,average,0.4291083177345374,
Baseline,7000,average_rank,1.6,
Baseline,7000,chartqa_relaxed_overall,0.5656,0.009915542506251351
Baseline,7000,docvqa_val_anls,0.5940907049431567,0.006224236305767187
Baseline,7000,infovqa_val_anls,0.2515675215816963,0.007105097396092786
Baseline,7000,mme_total_score,1185.875650260104,
Baseline,7000,mmmu_val_mmmu_acc,0.26556,
Baseline,7000,mmstar_average,0.31372400960777047,
Baseline,7000,ocrbench_ocrbench_accuracy,0.504,
Baseline,7000,seedbench_seed_all,0.4964424680377988,
Baseline,7000,textvqa_val_exact_match,0.5002,0.006794794025220267
Baseline,8000,ai2d_exact_match,0.37759067357512954,0.008725299846043883
Baseline,8000,average,0.43846759477995995,
Baseline,8000,average_rank,1.8,
Baseline,8000,chartqa_relaxed_overall,0.5832,0.009862556058385773
Baseline,8000,docvqa_val_anls,0.6017336419437208,0.006231612198089698
Baseline,8000,infovqa_val_anls,0.2449256624147254,0.006992518502948913
Baseline,8000,mme_total_score,1199.2409963985594,
Baseline,8000,mmmu_val_mmmu_acc,0.28111,
Baseline,8000,mmstar_average,0.33512257186205047,
Baseline,8000,ocrbench_ocrbench_accuracy,0.51,
Baseline,8000,seedbench_seed_all,0.5024458032240133,
Baseline,8000,textvqa_val_exact_match,0.51008,0.006796301690135059
Baseline,9000,ai2d_exact_match,0.4067357512953368,0.008841214921078996
Baseline,9000,average,0.4422510732201056,
Baseline,9000,average_rank,1.8,
Baseline,9000,chartqa_relaxed_overall,0.5912,0.009834211136815875
Baseline,9000,docvqa_val_anls,0.6170968481662739,0.00617235763542544
Baseline,9000,infovqa_val_anls,0.23537031288570615,0.00670318154156447
Baseline,9000,mme_total_score,1231.5195078031213,
Baseline,9000,mmmu_val_mmmu_acc,0.25889,
Baseline,9000,mmstar_average,0.3216444898242951,
Baseline,9000,ocrbench_ocrbench_accuracy,0.515,
Baseline,9000,seedbench_seed_all,0.5120622568093385,
Baseline,9000,textvqa_val_exact_match,0.52226,0.006792711289708482
Baseline,10000,ai2d_exact_match,0.39993523316062174,0.008817096257082848
Baseline,10000,average,0.4523875703250908,
Baseline,10000,average_rank,1.7,
Baseline,10000,chartqa_relaxed_overall,0.5996,0.00980154906867574
Baseline,10000,docvqa_val_anls,0.6262613496433054,0.006147756371688175
Baseline,10000,infovqa_val_anls,0.263290074230132,0.007186788766942786
Baseline,10000,mme_total_score,1240.8218287314926,
Baseline,10000,mmmu_val_mmmu_acc,0.28778,
Baseline,10000,mmstar_average,0.32972717906018517,
Baseline,10000,ocrbench_ocrbench_accuracy,0.517,
Baseline,10000,seedbench_seed_all,0.5217342968315731,
Baseline,10000,textvqa_val_exact_match,0.5261600000000001,0.006785774843600811
Baseline,11000,ai2d_exact_match,0.422279792746114,0.008889771831066474
Baseline,11000,average,0.4561398159525099,
Baseline,11000,average_rank,1.7,
Baseline,11000,chartqa_relaxed_overall,0.6104,0.009755142291143075
Baseline,11000,docvqa_val_anls,0.6373130149166712,0.006128022584995044
Baseline,11000,infovqa_val_anls,0.24419378339723755,0.006897644885887063
Baseline,11000,mme_total_score,1322.9488795518205,
Baseline,11000,mmmu_val_mmmu_acc,0.27778,
Baseline,11000,mmstar_average,0.3298563439522548,
Baseline,11000,ocrbench_ocrbench_accuracy,0.521,
Baseline,11000,seedbench_seed_all,0.5237354085603113,
Baseline,11000,textvqa_val_exact_match,0.5387,0.006770851562852138
Baseline,12000,ai2d_exact_match,0.42001295336787564,0.008883255931688034
Baseline,12000,average,0.4582751140055433,
Baseline,12000,average_rank,1.7,
Baseline,12000,chartqa_relaxed_overall,0.618,0.009719474639861454
Baseline,12000,docvqa_val_anls,0.6393961983751871,0.0061228747388476674
Baseline,12000,infovqa_val_anls,0.24798874058574302,0.006855374548993139
Baseline,12000,mme_total_score,1225.6453581432572,
Baseline,12000,mmmu_val_mmmu_acc,0.27889,
Baseline,12000,mmstar_average,0.34010867846816534,
Baseline,12000,ocrbench_ocrbench_accuracy,0.512,
Baseline,12000,seedbench_seed_all,0.5350194552529183,
Baseline,12000,textvqa_val_exact_match,0.5330600000000001,0.006777713092109446
Baseline,13000,ai2d_exact_match,0.4375,0.008928571428571428
Baseline,13000,average,0.4692868662590049,
Baseline,13000,average_rank,1.4,
Baseline,13000,chartqa_relaxed_overall,0.6148,0.00973479791861169
Baseline,13000,docvqa_val_anls,0.6511374872549951,0.006086953065248391
Baseline,13000,infovqa_val_anls,0.24465055100441893,0.006808432538374664
Baseline,13000,mme_total_score,1281.7122849139657,
Baseline,13000,mmmu_val_mmmu_acc,0.28222,
Baseline,13000,mmstar_average,0.3453069542917521,
Baseline,13000,ocrbench_ocrbench_accuracy,0.549,
Baseline,13000,seedbench_seed_all,0.5442468037798777,
Baseline,13000,textvqa_val_exact_match,0.55472,0.0067416788982325
Baseline,14000,ai2d_exact_match,0.4572538860103627,0.00896620675297095
Baseline,14000,average,0.47352486841689195,
Baseline,14000,average_rank,1.3,
Baseline,14000,chartqa_relaxed_overall,0.6172,0.009723347231923635
Baseline,14000,docvqa_val_anls,0.6502269393708169,0.006057950730638126
Baseline,14000,infovqa_val_anls,0.25805460837190913,0.007037735231659539
Baseline,14000,mme_total_score,1309.1444577831132,
Baseline,14000,mmmu_val_mmmu_acc,0.28111,
Baseline,14000,mmstar_average,0.34575818188776586,
Baseline,14000,ocrbench_ocrbench_accuracy,0.551,
Baseline,14000,seedbench_seed_all,0.5483602001111729,
Baseline,14000,textvqa_val_exact_match,0.55276,0.006751206724612103
Baseline,15000,ai2d_exact_match,0.45045336787564766,0.008954861634252399
Baseline,15000,average,0.47878665012878824,
Baseline,15000,average_rank,1.2,
Baseline,15000,chartqa_relaxed_overall,0.612,0.009747841205275417
Baseline,15000,docvqa_val_anls,0.6621413031955148,0.006056838050222495
Baseline,15000,infovqa_val_anls,0.2706898598157733,0.007200315730154543
Baseline,15000,mme_total_score,1384.2171868747498,
Baseline,15000,mmmu_val_mmmu_acc,0.30222,
Baseline,15000,mmstar_average,0.35408135695920684,
Baseline,15000,ocrbench_ocrbench_accuracy,0.558,
Baseline,15000,seedbench_seed_all,0.5411339633129516,
Baseline,15000,textvqa_val_exact_match,0.5583600000000001,0.0067279027203879065
Baseline,16000,ai2d_exact_match,0.45077720207253885,0.008955440137395838
Baseline,16000,average,0.47665128022935843,
Baseline,16000,average_rank,1.5,
Baseline,16000,chartqa_relaxed_overall,0.632,0.00964715642305132
Baseline,16000,docvqa_val_anls,0.6709415729142987,0.005999818105621502
Baseline,16000,infovqa_val_anls,0.26050032542402035,0.006997451875879188
Baseline,16000,mme_total_score,1317.8491396558625,
Baseline,16000,mmmu_val_mmmu_acc,0.27556,
Baseline,16000,mmstar_average,0.33214333327093315,
Baseline,16000,ocrbench_ocrbench_accuracy,0.56,
Baseline,16000,seedbench_seed_all,0.5463590883824346,
Baseline,16000,textvqa_val_exact_match,0.56158,0.006723854754867398
Baseline,17000,ai2d_exact_match,0.45919689119170987,0.008969138793675545
Baseline,17000,average,0.4777141780162423,
Baseline,17000,average_rank,1.2,
Baseline,17000,chartqa_relaxed_overall,0.632,0.00964715642305132
Baseline,17000,docvqa_val_anls,0.6796338519136422,0.005948761388267941
Baseline,17000,infovqa_val_anls,0.28070956072505215,0.007298333094144192
Baseline,17000,mme_total_score,1381.9161664665867,
Baseline,17000,mmmu_val_mmmu_acc,0.27667,
Baseline,17000,mmstar_average,0.3370289492329521,
Baseline,17000,ocrbench_ocrbench_accuracy,0.519,
Baseline,17000,seedbench_seed_all,0.5510283490828238,
Baseline,17000,textvqa_val_exact_match,0.56416,0.006724830373229479
Baseline,18000,ai2d_exact_match,0.46567357512953367,0.008977921602780726
Baseline,18000,average,0.4819834595278701,
Baseline,18000,average_rank,1.1,
Baseline,18000,chartqa_relaxed_overall,0.6376,0.009615793331418735
Baseline,18000,docvqa_val_anls,0.6775884603912571,0.005972234236435759
Baseline,18000,infovqa_val_anls,0.27154318420389256,0.007164903131667027
Baseline,18000,mme_total_score,1336.922769107643,
Baseline,18000,mmmu_val_mmmu_acc,0.28667,
Baseline,18000,mmstar_average,0.34482796716566916,
Baseline,18000,ocrbench_ocrbench_accuracy,0.533,
Baseline,18000,seedbench_seed_all,0.5543079488604781,
Baseline,18000,textvqa_val_exact_match,0.5666399999999999,0.006713392287599574
Baseline,19000,ai2d_exact_match,0.4682642487046632,0.008981008686994101
Baseline,19000,average,0.4899006713916878,
Baseline,19000,chartqa_relaxed_overall,0.6444,0.009575809858898698
Baseline,19000,docvqa_val_anls,0.678226526479947,0.005970619221588814
Baseline,19000,infovqa_val_anls,0.26993847247278,0.0071348470764911525
Baseline,19000,mme_total_score,1406.6628651460583,
Baseline,19000,mmmu_val_mmmu_acc,0.28333,
Baseline,19000,mmstar_average,0.356220913822775,
Baseline,19000,ocrbench_ocrbench_accuracy,0.577,
Baseline,19000,seedbench_seed_all,0.554585881045025,
Baseline,19000,textvqa_val_exact_match,0.57714,0.0066918487914812905
Baseline,20000,ai2d_exact_match,0.47571243523316065,0.00898853090258662
Baseline,20000,average,0.4873169067639118,
Baseline,20000,chartqa_relaxed_overall,0.6336,0.009638338810708618
Baseline,20000,docvqa_val_anls,0.6895214454380043,0.005896462073053767
Baseline,20000,infovqa_val_anls,0.2655657550458317,0.007033265532032538
Baseline,20000,mme_total_score,1324.6738695478193,
Baseline,20000,mmmu_val_mmmu_acc,0.30111,
Baseline,20000,mmstar_average,0.33806766134497995,
Baseline,20000,ocrbench_ocrbench_accuracy,0.555,
Baseline,20000,seedbench_seed_all,0.5587548638132296,
Baseline,20000,textvqa_val_exact_match,0.56852,0.006720151338087659
Remove Multilingual Data,1000,ai2d_exact_match,0.2619818652849741,0.007914086941902855
Remove Multilingual Data,1000,average,0.29340443385847137,
Remove Multilingual Data,1000,average_rank,1.3,
Remove Multilingual Data,1000,chartqa_relaxed_overall,0.3736,0.009677121197436144
Remove Multilingual Data,1000,docvqa_val_anls,0.403140100303888,0.006111323163666132
Remove Multilingual Data,1000,infovqa_val_anls,0.1764617576183696,0.006251319736392345
Remove Multilingual Data,1000,mme_total_score,979.3045218087235,
Remove Multilingual Data,1000,mmmu_val_mmmu_acc,0.25222,
Remove Multilingual Data,1000,mmstar_average,0.2073057646207335,
Remove Multilingual Data,1000,ocrbench_ocrbench_accuracy,0.333,
Remove Multilingual Data,1000,seedbench_seed_all,0.2507504168982768,
Remove Multilingual Data,1000,textvqa_val_exact_match,0.38218,0.006631325992355026
Remove Multilingual Data,2000,ai2d_exact_match,0.25291450777202074,0.007823547213659585
Remove Multilingual Data,2000,average,0.32254499165624334,
Remove Multilingual Data,2000,average_rank,1.5,
Remove Multilingual Data,2000,chartqa_relaxed_overall,0.4692,0.009983005968307607
Remove Multilingual Data,2000,docvqa_val_anls,0.472590835723597,0.006255090657185791
Remove Multilingual Data,2000,infovqa_val_anls,0.19402428600531574,0.006415305613638088
Remove Multilingual Data,2000,mme_total_score,1067.5286114445778,
Remove Multilingual Data,2000,mmmu_val_mmmu_acc,0.24444,
Remove Multilingual Data,2000,mmstar_average,0.20544885849586278,
Remove Multilingual Data,2000,ocrbench_ocrbench_accuracy,0.409,
Remove Multilingual Data,2000,seedbench_seed_all,0.2555864369093941,
Remove Multilingual Data,2000,textvqa_val_exact_match,0.3997,0.006677042652231296
Remove Multilingual Data,3000,ai2d_exact_match,0.2658678756476684,0.00795154886571598
Remove Multilingual Data,3000,average,0.35383248024337044,
Remove Multilingual Data,3000,average_rank,1.4,
Remove Multilingual Data,3000,chartqa_relaxed_overall,0.536,0.009976041728231964
Remove Multilingual Data,3000,docvqa_val_anls,0.5115050780592246,0.006297134520533815
Remove Multilingual Data,3000,infovqa_val_anls,0.1959317380528948,0.006353999153527862
Remove Multilingual Data,3000,mme_total_score,1055.7074829931971,
Remove Multilingual Data,3000,mmmu_val_mmmu_acc,0.26,
Remove Multilingual Data,3000,mmstar_average,0.2325690534433309,
Remove Multilingual Data,3000,ocrbench_ocrbench_accuracy,0.449,
Remove Multilingual Data,3000,seedbench_seed_all,0.28943857698721515,
Remove Multilingual Data,3000,textvqa_val_exact_match,0.44418,0.0067730052591185854
Remove Multilingual Data,4000,ai2d_exact_match,0.2856217616580311,0.008130016747303466
Remove Multilingual Data,4000,average,0.3775873253769421,
Remove Multilingual Data,4000,average_rank,1.4,
Remove Multilingual Data,4000,chartqa_relaxed_overall,0.55,0.009951864943131942
Remove Multilingual Data,4000,docvqa_val_anls,0.5339851175847934,0.0062957385772197255
Remove Multilingual Data,4000,infovqa_val_anls,0.20750676546327357,0.006369425500899887
Remove Multilingual Data,4000,mme_total_score,1228.202280912365,
Remove Multilingual Data,4000,mmmu_val_mmmu_acc,0.27111,
Remove Multilingual Data,4000,mmstar_average,0.24655460164079995,
Remove Multilingual Data,4000,ocrbench_ocrbench_accuracy,0.456,
Remove Multilingual Data,4000,seedbench_seed_all,0.3898276820455809,
Remove Multilingual Data,4000,textvqa_val_exact_match,0.45768000000000003,0.006781666588703993
Remove Multilingual Data,5000,ai2d_exact_match,0.3121761658031088,0.008340079044408505
Remove Multilingual Data,5000,average,0.3976192139479395,
Remove Multilingual Data,5000,average_rank,1.4,
Remove Multilingual Data,5000,chartqa_relaxed_overall,0.5684,0.009907968668564455
Remove Multilingual Data,5000,docvqa_val_anls,0.5611339219828478,0.006260862186673622
Remove Multilingual Data,5000,infovqa_val_anls,0.21913407408993218,0.006638320670102091
Remove Multilingual Data,5000,mme_total_score,1219.2377951180472,
Remove Multilingual Data,5000,mmmu_val_mmmu_acc,0.29444,
Remove Multilingual Data,5000,mmstar_average,0.23556637343877926,
Remove Multilingual Data,5000,ocrbench_ocrbench_accuracy,0.472,
Remove Multilingual Data,5000,seedbench_seed_all,0.4443023902167871,
Remove Multilingual Data,5000,textvqa_val_exact_match,0.47142,0.006807048104779351
Remove Multilingual Data,6000,ai2d_exact_match,0.35200777202072536,0.008595926828224822
Remove Multilingual Data,6000,average,0.42451996443270734,
Remove Multilingual Data,6000,average_rank,1.3,
Remove Multilingual Data,6000,chartqa_relaxed_overall,0.5744,0.009890651444389179
Remove Multilingual Data,6000,docvqa_val_anls,0.5825552977560686,0.006257174245982806
Remove Multilingual Data,6000,infovqa_val_anls,0.252828230577843,0.007149939162213116
Remove Multilingual Data,6000,mme_total_score,1216.607643057223,
Remove Multilingual Data,6000,mmmu_val_mmmu_acc,0.30222,
Remove Multilingual Data,6000,mmstar_average,0.2807390632529032,
Remove Multilingual Data,6000,ocrbench_ocrbench_accuracy,0.497,
Remove Multilingual Data,6000,seedbench_seed_all,0.484769316286826,
Remove Multilingual Data,6000,textvqa_val_exact_match,0.49416000000000004,0.006798707477504303
Remove Multilingual Data,7000,ai2d_exact_match,0.3801813471502591,0.008736941116932581
Remove Multilingual Data,7000,average,0.428085510128325,
Remove Multilingual Data,7000,average_rank,1.4,
Remove Multilingual Data,7000,chartqa_relaxed_overall,0.5796,0.009874438607593145
Remove Multilingual Data,7000,docvqa_val_anls,0.5966369586509165,0.006224801729990067
Remove Multilingual Data,7000,infovqa_val_anls,0.23354910759447625,0.006817906701297544
Remove Multilingual Data,7000,mme_total_score,1188.1020408163265,
Remove Multilingual Data,7000,mmmu_val_mmmu_acc,0.27556,
Remove Multilingual Data,7000,mmstar_average,0.292518909276783,
Remove Multilingual Data,7000,ocrbench_ocrbench_accuracy,0.503,
Remove Multilingual Data,7000,seedbench_seed_all,0.48988326848249025,
Remove Multilingual Data,7000,textvqa_val_exact_match,0.5018400000000001,0.006795274684043781
Remove Multilingual Data,8000,ai2d_exact_match,0.3863341968911917,0.008763532923326706
Remove Multilingual Data,8000,average,0.4413787447198958,
Remove Multilingual Data,8000,average_rank,1.2,
Remove Multilingual Data,8000,chartqa_relaxed_overall,0.5964,0.009814343815957088
Remove Multilingual Data,8000,docvqa_val_anls,0.603351366738696,0.006235087701254087
Remove Multilingual Data,8000,infovqa_val_anls,0.25307646024963104,0.007198626238671866
Remove Multilingual Data,8000,mme_total_score,1261.5517206882753,
Remove Multilingual Data,8000,mmmu_val_mmmu_acc,0.29556,
Remove Multilingual Data,8000,mmstar_average,0.30595531673183934,
Remove Multilingual Data,8000,ocrbench_ocrbench_accuracy,0.505,
Remove Multilingual Data,8000,seedbench_seed_all,0.5124513618677042,
Remove Multilingual Data,8000,textvqa_val_exact_match,0.51428,0.006792322389925977
Remove Multilingual Data,9000,ai2d_exact_match,0.3908678756476684,0.008782181865213609
Remove Multilingual Data,9000,average,0.4483393474436153,
Remove Multilingual Data,9000,average_rank,1.2,
Remove Multilingual Data,9000,chartqa_relaxed_overall,0.6008,0.00979663889573671
Remove Multilingual Data,9000,docvqa_val_anls,0.6206417157518567,0.006160046717594884
Remove Multilingual Data,9000,infovqa_val_anls,0.2517144366407357,0.007092352700671051
Remove Multilingual Data,9000,mme_total_score,1270.4974989995999,
Remove Multilingual Data,9000,mmmu_val_mmmu_acc,0.29333,
Remove Multilingual Data,9000,mmstar_average,0.32657768650091523,
Remove Multilingual Data,9000,ocrbench_ocrbench_accuracy,0.52,
Remove Multilingual Data,9000,seedbench_seed_all,0.5163424124513619,
Remove Multilingual Data,9000,textvqa_val_exact_match,0.51478,0.006772730933446224
Remove Multilingual Data,10000,ai2d_exact_match,0.41450777202072536,0.008866630113019596
Remove Multilingual Data,10000,average,0.45448389614950035,
Remove Multilingual Data,10000,average_rank,1.3,
Remove Multilingual Data,10000,chartqa_relaxed_overall,0.6068,0.009771166474772143
Remove Multilingual Data,10000,docvqa_val_anls,0.6232449599819007,0.006177718712473361
Remove Multilingual Data,10000,infovqa_val_anls,0.23737546748097776,0.006778926597473845
Remove Multilingual Data,10000,mme_total_score,1276.3549419767905,
Remove Multilingual Data,10000,mmmu_val_mmmu_acc,0.29889,
Remove Multilingual Data,10000,mmstar_average,0.3130758097195978,
Remove Multilingual Data,10000,ocrbench_ocrbench_accuracy,0.539,
Remove Multilingual Data,10000,seedbench_seed_all,0.5219010561423013,
Remove Multilingual Data,10000,textvqa_val_exact_match,0.53556,0.00676001751827386
Remove Multilingual Data,11000,ai2d_exact_match,0.41904145077720206,0.008880404559123601
Remove Multilingual Data,11000,average,0.4609227111862355,
Remove Multilingual Data,11000,average_rank,1.3,
Remove Multilingual Data,11000,chartqa_relaxed_overall,0.6108,0.00975332737879659
Remove Multilingual Data,11000,docvqa_val_anls,0.6387481065492241,0.006094036395159673
Remove Multilingual Data,11000,infovqa_val_anls,0.25052436731474453,0.006993658213921465
Remove Multilingual Data,11000,mme_total_score,1258.2553021208482,
Remove Multilingual Data,11000,mmmu_val_mmmu_acc,0.28,
Remove Multilingual Data,11000,mmstar_average,0.3213557456291676,
Remove Multilingual Data,11000,ocrbench_ocrbench_accuracy,0.561,
Remove Multilingual Data,11000,seedbench_seed_all,0.526514730405781,
Remove Multilingual Data,11000,textvqa_val_exact_match,0.54032,0.0067608876222200335
Remove Multilingual Data,12000,ai2d_exact_match,0.41353626943005184,0.00886357792887845
Remove Multilingual Data,12000,average,0.46149948562642984,
Remove Multilingual Data,12000,average_rank,1.3,
Remove Multilingual Data,12000,chartqa_relaxed_overall,0.622,0.009699692449425671
Remove Multilingual Data,12000,docvqa_val_anls,0.6481870346272672,0.0060803752132680255
Remove Multilingual Data,12000,infovqa_val_anls,0.25116762340113796,0.006993814336062128
Remove Multilingual Data,12000,mme_total_score,1256.7357943177271,
Remove Multilingual Data,12000,mmmu_val_mmmu_acc,0.28222,
Remove Multilingual Data,12000,mmstar_average,0.311104865636332,
Remove Multilingual Data,12000,ocrbench_ocrbench_accuracy,0.547,
Remove Multilingual Data,12000,seedbench_seed_all,0.5312395775430795,
Remove Multilingual Data,12000,textvqa_val_exact_match,0.54704,0.006750774938661079
Remove Multilingual Data,13000,ai2d_exact_match,0.42810880829015546,0.008905646879422012
Remove Multilingual Data,13000,average,0.4658949593838579,
Remove Multilingual Data,13000,average_rank,1.6,
Remove Multilingual Data,13000,chartqa_relaxed_overall,0.622,0.009699692449425671
Remove Multilingual Data,13000,docvqa_val_anls,0.6461697403304425,0.006072036108570188
Remove Multilingual Data,13000,infovqa_val_anls,0.2635164421127001,0.007102540516236264
Remove Multilingual Data,13000,mme_total_score,1295.0039015606244,
Remove Multilingual Data,13000,mmmu_val_mmmu_acc,0.29,
Remove Multilingual Data,13000,mmstar_average,0.3296444797414335,
Remove Multilingual Data,13000,ocrbench_ocrbench_accuracy,0.54,
Remove Multilingual Data,13000,seedbench_seed_all,0.5312951639799889,
Remove Multilingual Data,13000,textvqa_val_exact_match,0.54232,0.006771571040376891
Remove Multilingual Data,14000,ai2d_exact_match,0.42487046632124353,0.008896983637113786
Remove Multilingual Data,14000,average,0.46755416993970794,
Remove Multilingual Data,14000,average_rank,1.7,
Remove Multilingual Data,14000,chartqa_relaxed_overall,0.6256,0.009681288495793083
Remove Multilingual Data,14000,docvqa_val_anls,0.6470833619171145,0.006119244473927763
Remove Multilingual Data,14000,infovqa_val_anls,0.2541720455309047,0.007006172199083197
Remove Multilingual Data,14000,mme_total_score,1262.1793717486994,
Remove Multilingual Data,14000,mmmu_val_mmmu_acc,0.28556,
Remove Multilingual Data,14000,mmstar_average,0.327544946405174,
Remove Multilingual Data,14000,ocrbench_ocrbench_accuracy,0.559,
Remove Multilingual Data,14000,seedbench_seed_all,0.5380767092829349,
Remove Multilingual Data,14000,textvqa_val_exact_match,0.5460799999999999,0.006754587449305995
Remove Multilingual Data,15000,ai2d_exact_match,0.42908031088082904,0.00890816984689523
Remove Multilingual Data,15000,average,0.4720258172705174,
Remove Multilingual Data,15000,average_rank,1.8,
Remove Multilingual Data,15000,chartqa_relaxed_overall,0.626,0.009679208378267924
Remove Multilingual Data,15000,docvqa_val_anls,0.655881547989144,0.006058079036611966
Remove Multilingual Data,15000,infovqa_val_anls,0.2538472956751567,0.006929926842577286
Remove Multilingual Data,15000,mme_total_score,1283.2800120048018,
Remove Multilingual Data,15000,mmmu_val_mmmu_acc,0.29,
Remove Multilingual Data,15000,mmstar_average,0.3309383426349411,
Remove Multilingual Data,15000,ocrbench_ocrbench_accuracy,0.572,
Remove Multilingual Data,15000,seedbench_seed_all,0.5407448582545858,
Remove Multilingual Data,15000,textvqa_val_exact_match,0.54974,0.006738090742441116
Remove Multilingual Data,16000,ai2d_exact_match,0.42940414507772023,0.008909003051055714
Remove Multilingual Data,16000,average,0.476926180401357,
Remove Multilingual Data,16000,average_rank,1.5,
Remove Multilingual Data,16000,chartqa_relaxed_overall,0.626,0.009679208378267924
Remove Multilingual Data,16000,docvqa_val_anls,0.6622394005833824,0.006046858134280091
Remove Multilingual Data,16000,infovqa_val_anls,0.2633356312454137,0.007137388413784386
Remove Multilingual Data,16000,mme_total_score,1328.4599839935972,
Remove Multilingual Data,16000,mmmu_val_mmmu_acc,0.29556,
Remove Multilingual Data,16000,mmstar_average,0.33932578522709744,
Remove Multilingual Data,16000,ocrbench_ocrbench_accuracy,0.578,
Remove Multilingual Data,16000,seedbench_seed_all,0.5431906614785992,
Remove Multilingual Data,16000,textvqa_val_exact_match,0.55528,0.006733817132847886
Remove Multilingual Data,17000,ai2d_exact_match,0.42940414507772023,0.008909003051055712
Remove Multilingual Data,17000,average,0.4732087844936434,
Remove Multilingual Data,17000,average_rank,1.8,
Remove Multilingual Data,17000,chartqa_relaxed_overall,0.6264,0.009677121197436144
Remove Multilingual Data,17000,docvqa_val_anls,0.661817176575324,0.0060368801840957114
Remove Multilingual Data,17000,infovqa_val_anls,0.25584519300448166,0.007033162778192734
Remove Multilingual Data,17000,mme_total_score,1270.766606642657,
Remove Multilingual Data,17000,mmmu_val_mmmu_acc,0.28,
Remove Multilingual Data,17000,mmstar_average,0.3233592606268431,
Remove Multilingual Data,17000,ocrbench_ocrbench_accuracy,0.58,
Remove Multilingual Data,17000,seedbench_seed_all,0.5439132851584213,
Remove Multilingual Data,17000,textvqa_val_exact_match,0.5581400000000001,0.006731048171116916
Remove Multilingual Data,18000,ai2d_exact_match,0.4368523316062176,0.008927095061184944
Remove Multilingual Data,18000,average,0.4769341122300441,
Remove Multilingual Data,18000,average_rank,1.9,
Remove Multilingual Data,18000,chartqa_relaxed_overall,0.636,0.009624897685803465
Remove Multilingual Data,18000,docvqa_val_anls,0.671397164123935,0.006004837667492473
Remove Multilingual Data,18000,infovqa_val_anls,0.2570865428675732,0.007022334730795061
Remove Multilingual Data,18000,mme_total_score,1330.2323929571828,
Remove Multilingual Data,18000,mmmu_val_mmmu_acc,0.28444,
Remove Multilingual Data,18000,mmstar_average,0.3272633338962395,
Remove Multilingual Data,18000,ocrbench_ocrbench_accuracy,0.579,
Remove Multilingual Data,18000,seedbench_seed_all,0.5457476375764313,
Remove Multilingual Data,18000,textvqa_val_exact_match,0.55462,0.0067429981999808505