FineVision / app /src /content /assets /data /all_ratings_luis.csv
lusxvr's picture
new plots
a024e38
raw
history blame
63.3 kB
run,step,metric,value,stderr
Baseline,1000,ai2d_exact_match,0.2548575129533679,0.007843322436924496
Baseline,1000,average,0.27120689295763617,
Baseline,1000,average_rank,3.0,
Baseline,1000,chartqa_relaxed_overall,0.3308,0.009411906161401973
Baseline,1000,docvqa_val_anls,0.3528553494243383,0.005852289239342309
Baseline,1000,infovqa_val_anls,0.17320578642581314,0.006297063452679795
Baseline,1000,mme_total_score,977.4280712284914,
Baseline,1000,mmmu_val_mmmu_acc,0.25222,
Baseline,1000,mmstar_average,0.23215874078908072,
Baseline,1000,ocrbench_ocrbench_accuracy,0.286,
Baseline,1000,seedbench_seed_all,0.2563646470261256,
Baseline,1000,textvqa_val_exact_match,0.3024,0.00628900296642181
Baseline,2000,ai2d_exact_match,0.26295336787564766,0.007923526907377255
Baseline,2000,average,0.3202068275596269,
Baseline,2000,average_rank,2.8,
Baseline,2000,chartqa_relaxed_overall,0.4688,0.009982508912777261
Baseline,2000,docvqa_val_anls,0.4452261510942785,0.00614755494712251
Baseline,2000,infovqa_val_anls,0.1820547866557169,0.006217861455795791
Baseline,2000,mme_total_score,1049.3036214485794,
Baseline,2000,mmmu_val_mmmu_acc,0.24556,
Baseline,2000,mmstar_average,0.21305462434540698,
Baseline,2000,ocrbench_ocrbench_accuracy,0.395,
Baseline,2000,seedbench_seed_all,0.258532518065592,
Baseline,2000,textvqa_val_exact_match,0.41068000000000005,0.006697862330024289
Baseline,3000,ai2d_exact_match,0.25226683937823835,0.007816909588794397
Baseline,3000,average,0.3507423834414229,
Baseline,3000,average_rank,2.6,
Baseline,3000,chartqa_relaxed_overall,0.5028,0.010001843767601082
Baseline,3000,docvqa_val_anls,0.502653993831009,0.006267072346683124
Baseline,3000,infovqa_val_anls,0.21728617578189535,0.006796941784959762
Baseline,3000,mme_total_score,1170.2383953581434,
Baseline,3000,mmmu_val_mmmu_acc,0.27556,
Baseline,3000,mmstar_average,0.25432376938577683,
Baseline,3000,ocrbench_ocrbench_accuracy,0.436,
Baseline,3000,seedbench_seed_all,0.2792106725958866,
Baseline,3000,textvqa_val_exact_match,0.43658,0.006766885462882726
Baseline,4000,ai2d_exact_match,0.2645725388601036,0.007939149662089447
Baseline,4000,average,0.36961781722974835,
Baseline,4000,average_rank,2.8,
Baseline,4000,chartqa_relaxed_overall,0.5312,0.009982508912777261
Baseline,4000,docvqa_val_anls,0.5374434618615119,0.0062905728113059655
Baseline,4000,infovqa_val_anls,0.2287924838861707,0.006994568698639919
Baseline,4000,mme_total_score,1155.203781512605,
Baseline,4000,mmmu_val_mmmu_acc,0.25556,
Baseline,4000,mmstar_average,0.2575590188757354,
Baseline,4000,ocrbench_ocrbench_accuracy,0.453,
Baseline,4000,seedbench_seed_all,0.33913285158421347,
Baseline,4000,textvqa_val_exact_match,0.4593,0.006791695475025738
Baseline,5000,ai2d_exact_match,0.3125,0.008342439145556371
Baseline,5000,average,0.3974627910380972,
Baseline,5000,average_rank,2.3,
Baseline,5000,chartqa_relaxed_overall,0.5488,0.00995424828018316
Baseline,5000,docvqa_val_anls,0.552360266782429,0.006300308519952055
Baseline,5000,infovqa_val_anls,0.23425555286643698,0.007002254622066442
Baseline,5000,mme_total_score,1181.4653861544618,
Baseline,5000,mmmu_val_mmmu_acc,0.26667,
Baseline,5000,mmstar_average,0.29596648146165705,
Baseline,5000,ocrbench_ocrbench_accuracy,0.462,
Baseline,5000,seedbench_seed_all,0.43107281823235133,
Baseline,5000,textvqa_val_exact_match,0.47354000000000007,0.0068172185364497985
Baseline,6000,ai2d_exact_match,0.358160621761658,0.008629463221867162
Baseline,6000,average,0.4161227404571003,
Baseline,6000,average_rank,2.3,
Baseline,6000,chartqa_relaxed_overall,0.5628,0.00992279440175477
Baseline,6000,docvqa_val_anls,0.5747451497228876,0.00625495440870239
Baseline,6000,infovqa_val_anls,0.22152017368968838,0.006604546680525351
Baseline,6000,mme_total_score,1284.1648659463785,
Baseline,6000,mmmu_val_mmmu_acc,0.27111,
Baseline,6000,mmstar_average,0.2978489412854164,
Baseline,6000,ocrbench_ocrbench_accuracy,0.495,
Baseline,6000,seedbench_seed_all,0.4795997776542524,
Baseline,6000,textvqa_val_exact_match,0.48432,0.006800535050670284
Baseline,7000,ai2d_exact_match,0.3707901554404145,0.00869347755587734
Baseline,7000,average,0.4291083177345374,
Baseline,7000,average_rank,2.1,
Baseline,7000,chartqa_relaxed_overall,0.5656,0.009915542506251351
Baseline,7000,docvqa_val_anls,0.5940907049431567,0.006224236305767187
Baseline,7000,infovqa_val_anls,0.2515675215816963,0.007105097396092786
Baseline,7000,mme_total_score,1185.875650260104,
Baseline,7000,mmmu_val_mmmu_acc,0.26556,
Baseline,7000,mmstar_average,0.31372400960777047,
Baseline,7000,ocrbench_ocrbench_accuracy,0.504,
Baseline,7000,seedbench_seed_all,0.4964424680377988,
Baseline,7000,textvqa_val_exact_match,0.5002,0.006794794025220267
Baseline,8000,ai2d_exact_match,0.37759067357512954,0.008725299846043883
Baseline,8000,average,0.43846759477995995,
Baseline,8000,average_rank,1.9,
Baseline,8000,chartqa_relaxed_overall,0.5832,0.009862556058385773
Baseline,8000,docvqa_val_anls,0.6017336419437208,0.006231612198089698
Baseline,8000,infovqa_val_anls,0.2449256624147254,0.006992518502948913
Baseline,8000,mme_total_score,1199.2409963985594,
Baseline,8000,mmmu_val_mmmu_acc,0.28111,
Baseline,8000,mmstar_average,0.33512257186205047,
Baseline,8000,ocrbench_ocrbench_accuracy,0.51,
Baseline,8000,seedbench_seed_all,0.5024458032240133,
Baseline,8000,textvqa_val_exact_match,0.51008,0.006796301690135059
Baseline,9000,ai2d_exact_match,0.4067357512953368,0.008841214921078996
Baseline,9000,average,0.4422510732201056,
Baseline,9000,average_rank,2.5,
Baseline,9000,chartqa_relaxed_overall,0.5912,0.009834211136815875
Baseline,9000,docvqa_val_anls,0.6170968481662739,0.00617235763542544
Baseline,9000,infovqa_val_anls,0.23537031288570615,0.00670318154156447
Baseline,9000,mme_total_score,1231.5195078031213,
Baseline,9000,mmmu_val_mmmu_acc,0.25889,
Baseline,9000,mmstar_average,0.3216444898242951,
Baseline,9000,ocrbench_ocrbench_accuracy,0.515,
Baseline,9000,seedbench_seed_all,0.5120622568093385,
Baseline,9000,textvqa_val_exact_match,0.52226,0.006792711289708482
Baseline,10000,ai2d_exact_match,0.39993523316062174,0.008817096257082848
Baseline,10000,average,0.4523875703250908,
Baseline,10000,average_rank,1.9,
Baseline,10000,chartqa_relaxed_overall,0.5996,0.00980154906867574
Baseline,10000,docvqa_val_anls,0.6262613496433054,0.006147756371688175
Baseline,10000,infovqa_val_anls,0.263290074230132,0.007186788766942786
Baseline,10000,mme_total_score,1240.8218287314926,
Baseline,10000,mmmu_val_mmmu_acc,0.28778,
Baseline,10000,mmstar_average,0.32972717906018517,
Baseline,10000,ocrbench_ocrbench_accuracy,0.517,
Baseline,10000,seedbench_seed_all,0.5217342968315731,
Baseline,10000,textvqa_val_exact_match,0.5261600000000001,0.006785774843600811
Baseline,11000,ai2d_exact_match,0.422279792746114,0.008889771831066474
Baseline,11000,average,0.4561398159525099,
Baseline,11000,average_rank,2.3,
Baseline,11000,chartqa_relaxed_overall,0.6104,0.009755142291143075
Baseline,11000,docvqa_val_anls,0.6373130149166712,0.006128022584995044
Baseline,11000,infovqa_val_anls,0.24419378339723755,0.006897644885887063
Baseline,11000,mme_total_score,1322.9488795518205,
Baseline,11000,mmmu_val_mmmu_acc,0.27778,
Baseline,11000,mmstar_average,0.3298563439522548,
Baseline,11000,ocrbench_ocrbench_accuracy,0.521,
Baseline,11000,seedbench_seed_all,0.5237354085603113,
Baseline,11000,textvqa_val_exact_match,0.5387,0.006770851562852138
Baseline,12000,ai2d_exact_match,0.42001295336787564,0.008883255931688034
Baseline,12000,average,0.4582751140055433,
Baseline,12000,average_rank,2.4,
Baseline,12000,chartqa_relaxed_overall,0.618,0.009719474639861454
Baseline,12000,docvqa_val_anls,0.6393961983751871,0.0061228747388476674
Baseline,12000,infovqa_val_anls,0.24798874058574302,0.006855374548993139
Baseline,12000,mme_total_score,1225.6453581432572,
Baseline,12000,mmmu_val_mmmu_acc,0.27889,
Baseline,12000,mmstar_average,0.34010867846816534,
Baseline,12000,ocrbench_ocrbench_accuracy,0.512,
Baseline,12000,seedbench_seed_all,0.5350194552529183,
Baseline,12000,textvqa_val_exact_match,0.5330600000000001,0.006777713092109446
Baseline,13000,ai2d_exact_match,0.4375,0.008928571428571428
Baseline,13000,average,0.4692868662590049,
Baseline,13000,average_rank,1.7,
Baseline,13000,chartqa_relaxed_overall,0.6148,0.00973479791861169
Baseline,13000,docvqa_val_anls,0.6511374872549951,0.006086953065248391
Baseline,13000,infovqa_val_anls,0.24465055100441893,0.006808432538374664
Baseline,13000,mme_total_score,1281.7122849139657,
Baseline,13000,mmmu_val_mmmu_acc,0.28222,
Baseline,13000,mmstar_average,0.3453069542917521,
Baseline,13000,ocrbench_ocrbench_accuracy,0.549,
Baseline,13000,seedbench_seed_all,0.5442468037798777,
Baseline,13000,textvqa_val_exact_match,0.55472,0.0067416788982325
Baseline,14000,ai2d_exact_match,0.4572538860103627,0.00896620675297095
Baseline,14000,average,0.47352486841689195,
Baseline,14000,average_rank,1.9,
Baseline,14000,chartqa_relaxed_overall,0.6172,0.009723347231923635
Baseline,14000,docvqa_val_anls,0.6502269393708169,0.006057950730638126
Baseline,14000,infovqa_val_anls,0.25805460837190913,0.007037735231659539
Baseline,14000,mme_total_score,1309.1444577831132,
Baseline,14000,mmmu_val_mmmu_acc,0.28111,
Baseline,14000,mmstar_average,0.34575818188776586,
Baseline,14000,ocrbench_ocrbench_accuracy,0.551,
Baseline,14000,seedbench_seed_all,0.5483602001111729,
Baseline,14000,textvqa_val_exact_match,0.55276,0.006751206724612103
Baseline,15000,ai2d_exact_match,0.45045336787564766,0.008954861634252399
Baseline,15000,average,0.47878665012878824,
Baseline,15000,average_rank,1.4,
Baseline,15000,chartqa_relaxed_overall,0.612,0.009747841205275417
Baseline,15000,docvqa_val_anls,0.6621413031955148,0.006056838050222495
Baseline,15000,infovqa_val_anls,0.2706898598157733,0.007200315730154543
Baseline,15000,mme_total_score,1384.2171868747498,
Baseline,15000,mmmu_val_mmmu_acc,0.30222,
Baseline,15000,mmstar_average,0.35408135695920684,
Baseline,15000,ocrbench_ocrbench_accuracy,0.558,
Baseline,15000,seedbench_seed_all,0.5411339633129516,
Baseline,15000,textvqa_val_exact_match,0.5583600000000001,0.0067279027203879065
Baseline,16000,ai2d_exact_match,0.45077720207253885,0.008955440137395838
Baseline,16000,average,0.47665128022935843,
Baseline,16000,average_rank,2.1,
Baseline,16000,chartqa_relaxed_overall,0.632,0.00964715642305132
Baseline,16000,docvqa_val_anls,0.6709415729142987,0.005999818105621502
Baseline,16000,infovqa_val_anls,0.26050032542402035,0.006997451875879188
Baseline,16000,mme_total_score,1317.8491396558625,
Baseline,16000,mmmu_val_mmmu_acc,0.27556,
Baseline,16000,mmstar_average,0.33214333327093315,
Baseline,16000,ocrbench_ocrbench_accuracy,0.56,
Baseline,16000,seedbench_seed_all,0.5463590883824346,
Baseline,16000,textvqa_val_exact_match,0.56158,0.006723854754867398
Baseline,17000,ai2d_exact_match,0.45919689119170987,0.008969138793675545
Baseline,17000,average,0.4777141780162423,
Baseline,17000,average_rank,1.8,
Baseline,17000,chartqa_relaxed_overall,0.632,0.00964715642305132
Baseline,17000,docvqa_val_anls,0.6796338519136422,0.005948761388267941
Baseline,17000,infovqa_val_anls,0.28070956072505215,0.007298333094144192
Baseline,17000,mme_total_score,1381.9161664665867,
Baseline,17000,mmmu_val_mmmu_acc,0.27667,
Baseline,17000,mmstar_average,0.3370289492329521,
Baseline,17000,ocrbench_ocrbench_accuracy,0.519,
Baseline,17000,seedbench_seed_all,0.5510283490828238,
Baseline,17000,textvqa_val_exact_match,0.56416,0.006724830373229479
Baseline,18000,ai2d_exact_match,0.46567357512953367,0.008977921602780726
Baseline,18000,average,0.4819834595278701,
Baseline,18000,average_rank,1.6,
Baseline,18000,chartqa_relaxed_overall,0.6376,0.009615793331418735
Baseline,18000,docvqa_val_anls,0.6775884603912571,0.005972234236435759
Baseline,18000,infovqa_val_anls,0.27154318420389256,0.007164903131667027
Baseline,18000,mme_total_score,1336.922769107643,
Baseline,18000,mmmu_val_mmmu_acc,0.28667,
Baseline,18000,mmstar_average,0.34482796716566916,
Baseline,18000,ocrbench_ocrbench_accuracy,0.533,
Baseline,18000,seedbench_seed_all,0.5543079488604781,
Baseline,18000,textvqa_val_exact_match,0.5666399999999999,0.006713392287599574
Baseline,19000,ai2d_exact_match,0.4682642487046632,0.008981008686994101
Baseline,19000,average,0.4899006713916878,
Baseline,19000,average_rank,1.4,
Baseline,19000,chartqa_relaxed_overall,0.6444,0.009575809858898698
Baseline,19000,docvqa_val_anls,0.678226526479947,0.005970619221588814
Baseline,19000,infovqa_val_anls,0.26993847247278,0.0071348470764911525
Baseline,19000,mme_total_score,1406.6628651460583,
Baseline,19000,mmmu_val_mmmu_acc,0.28333,
Baseline,19000,mmstar_average,0.356220913822775,
Baseline,19000,ocrbench_ocrbench_accuracy,0.577,
Baseline,19000,seedbench_seed_all,0.554585881045025,
Baseline,19000,textvqa_val_exact_match,0.57714,0.0066918487914812905
Baseline,20000,ai2d_exact_match,0.47571243523316065,0.00898853090258662
Baseline,20000,average,0.4873169067639118,
Baseline,20000,average_rank,1.4,
Baseline,20000,chartqa_relaxed_overall,0.6336,0.009638338810708618
Baseline,20000,docvqa_val_anls,0.6895214454380043,0.005896462073053767
Baseline,20000,infovqa_val_anls,0.2655657550458317,0.007033265532032538
Baseline,20000,mme_total_score,1324.6738695478193,
Baseline,20000,mmmu_val_mmmu_acc,0.30111,
Baseline,20000,mmstar_average,0.33806766134497995,
Baseline,20000,ocrbench_ocrbench_accuracy,0.555,
Baseline,20000,seedbench_seed_all,0.5587548638132296,
Baseline,20000,textvqa_val_exact_match,0.56852,0.006720151338087659
2,1000,ai2d_exact_match,0.27331606217616583,0.008021157484423315
2,1000,average,0.2964817591841572,
2,1000,average_rank,2.0,
2,1000,chartqa_relaxed_overall,0.4016,0.009806398022560107
2,1000,docvqa_val_anls,0.38703197724603455,0.0059317827343935035
2,1000,infovqa_val_anls,0.17280000404070578,0.006201144732918485
2,1000,mme_total_score,961.9496798719488,
2,1000,mmmu_val_mmmu_acc,0.27556,
2,1000,mmstar_average,0.20051212493658782,
2,1000,ocrbench_ocrbench_accuracy,0.331,
2,1000,seedbench_seed_all,0.25219566425792106,
2,1000,textvqa_val_exact_match,0.37432,0.006614110432353112
2,2000,ai2d_exact_match,0.27428756476683935,0.008030027397236182
2,2000,average,0.3376151239444176,
2,2000,average_rank,1.8,
2,2000,chartqa_relaxed_overall,0.4984,0.010001949389825897
2,2000,docvqa_val_anls,0.47035044389194575,0.006171152822696564
2,2000,infovqa_val_anls,0.21264444578610614,0.006798221032077756
2,2000,mme_total_score,995.0442176870747,
2,2000,mmmu_val_mmmu_acc,0.26111,
2,2000,mmstar_average,0.2371410151404708,
2,2000,ocrbench_ocrbench_accuracy,0.386,
2,2000,seedbench_seed_all,0.27276264591439686,
2,2000,textvqa_val_exact_match,0.42583999999999994,0.006752390527477444
2,3000,ai2d_exact_match,0.28886010362694303,0.008157423105367313
2,3000,average,0.3650476191493284,
2,3000,average_rank,2.1,
2,3000,chartqa_relaxed_overall,0.5296,0.009984458511341809
2,3000,docvqa_val_anls,0.5084048093337913,0.006266409805144786
2,3000,infovqa_val_anls,0.226696840609911,0.0070183318907300766
2,3000,mme_total_score,966.6394557823129,
2,3000,mmmu_val_mmmu_acc,0.27556,
2,3000,mmstar_average,0.25798680765602255,
2,3000,ocrbench_ocrbench_accuracy,0.423,
2,3000,seedbench_seed_all,0.3360200111172874,
2,3000,textvqa_val_exact_match,0.4393,0.0067683280101374045
2,4000,ai2d_exact_match,0.3180051813471503,0.00838183912252989
2,4000,average,0.3939919625964655,
2,4000,average_rank,2.0,
2,4000,chartqa_relaxed_overall,0.5392,0.009971214271372281
2,4000,docvqa_val_anls,0.5318426170932731,0.006287567577266625
2,4000,infovqa_val_anls,0.24176968468370258,0.007226680233814427
2,4000,mme_total_score,1052.9128651460585,
2,4000,mmmu_val_mmmu_acc,0.27778,
2,4000,mmstar_average,0.30433696178936676,
2,4000,ocrbench_ocrbench_accuracy,0.447,
2,4000,seedbench_seed_all,0.42779321845469703,
2,4000,textvqa_val_exact_match,0.4581999999999999,0.006800867765254084
2,5000,ai2d_exact_match,0.3448834196891192,0.008555140353607655
2,5000,average,0.40963271881608265,
2,5000,average_rank,2.1,
2,5000,chartqa_relaxed_overall,0.548,0.009955804699716018
2,5000,docvqa_val_anls,0.575799913178854,0.006211088978189562
2,5000,infovqa_val_anls,0.25711323262099633,0.0073775881337487925
2,5000,mme_total_score,1010.4850940376151,
2,5000,mmmu_val_mmmu_acc,0.27667,
2,5000,mmstar_average,0.2871021117490485,
2,5000,ocrbench_ocrbench_accuracy,0.455,
2,5000,seedbench_seed_all,0.46642579210672597,
2,5000,textvqa_val_exact_match,0.4757,0.006785477915527278
2,6000,ai2d_exact_match,0.3795336787564767,0.008734055590837087
2,6000,average,0.423161039572533,
2,6000,average_rank,1.4,
2,6000,chartqa_relaxed_overall,0.5668,0.009912336039617753
2,6000,docvqa_val_anls,0.5827000147792567,0.006217654063020532
2,6000,infovqa_val_anls,0.24558020684647988,0.0071473774205313935
2,6000,mme_total_score,1096.4623849539817,
2,6000,mmmu_val_mmmu_acc,0.27222,
2,6000,mmstar_average,0.3026938215293386,
2,6000,ocrbench_ocrbench_accuracy,0.475,
2,6000,seedbench_seed_all,0.49494163424124515,
2,6000,textvqa_val_exact_match,0.4889799999999999,0.006798040496416463
2,7000,ai2d_exact_match,0.3863341968911917,0.00876353292332671
2,7000,average,0.43260201849012403,
2,7000,average_rank,2.1,
2,7000,chartqa_relaxed_overall,0.572,0.009897756626351943
2,7000,docvqa_val_anls,0.5958889673096114,0.006197986096231253
2,7000,infovqa_val_anls,0.24831461076228495,0.0071830066608344805
2,7000,mme_total_score,1098.0422168867549,
2,7000,mmmu_val_mmmu_acc,0.28333,
2,7000,mmstar_average,0.31254705626181345,
2,7000,ocrbench_ocrbench_accuracy,0.493,
2,7000,seedbench_seed_all,0.5060033351862145,
2,7000,textvqa_val_exact_match,0.496,0.006798444216786202
2,8000,ai2d_exact_match,0.4025259067357513,0.00882649222855129
2,8000,average,0.4423608272909927,
2,8000,average_rank,2.1,
2,8000,chartqa_relaxed_overall,0.5832,0.009862556058385773
2,8000,docvqa_val_anls,0.6081292058298197,0.006190473638311687
2,8000,infovqa_val_anls,0.25707448915865344,0.007179410853014501
2,8000,mme_total_score,1100.4132653061224,
2,8000,mmmu_val_mmmu_acc,0.28,
2,8000,mmstar_average,0.3170263263849818,
2,8000,ocrbench_ocrbench_accuracy,0.504,
2,8000,seedbench_seed_all,0.5167315175097277,
2,8000,textvqa_val_exact_match,0.5125600000000001,0.006790351320381798
2,9000,ai2d_exact_match,0.4106217616580311,0.008854207883828036
2,9000,average,0.4477239927349069,
2,9000,average_rank,1.8,
2,9000,chartqa_relaxed_overall,0.5884,0.009844437067525526
2,9000,docvqa_val_anls,0.6233981201771228,0.006152789393932141
2,9000,infovqa_val_anls,0.25099979430746866,0.006997337550850154
2,9000,mme_total_score,1100.9423769507803,
2,9000,mmmu_val_mmmu_acc,0.27778,
2,9000,mmstar_average,0.3172130122236236,
2,9000,ocrbench_ocrbench_accuracy,0.518,
2,9000,seedbench_seed_all,0.5178432462479156,
2,9000,textvqa_val_exact_match,0.5252600000000001,0.006790435073078627
2,10000,ai2d_exact_match,0.41904145077720206,0.008880404559123598
2,10000,average,0.450650749528602,
2,10000,average_rank,2.2,
2,10000,chartqa_relaxed_overall,0.5956,0.009817474681589429
2,10000,docvqa_val_anls,0.6254308760372823,0.006142114135609194
2,10000,infovqa_val_anls,0.23792853517114784,0.006776022015067822
2,10000,mme_total_score,1157.0735294117646,
2,10000,mmmu_val_mmmu_acc,0.27667,
2,10000,mmstar_average,0.31479930233765546,
2,10000,ocrbench_ocrbench_accuracy,0.53,
2,10000,seedbench_seed_all,0.5238465814341301,
2,10000,textvqa_val_exact_match,0.53254,0.006777862315178193
2,11000,ai2d_exact_match,0.43555699481865284,0.008924095913829727
2,11000,average,0.4613124059808435,
2,11000,average_rank,1.8,
2,11000,chartqa_relaxed_overall,0.5984,0.009806398022560106
2,11000,docvqa_val_anls,0.6453200065413649,0.0060722869307158955
2,11000,infovqa_val_anls,0.24059820801450565,0.006814633527776416
2,11000,mme_total_score,1262.6299519807922,
2,11000,mmmu_val_mmmu_acc,0.3,
2,11000,mmstar_average,0.33559717819403534,
2,11000,ocrbench_ocrbench_accuracy,0.527,
2,11000,seedbench_seed_all,0.5226792662590328,
2,11000,textvqa_val_exact_match,0.54666,0.0067526356704400645
2,12000,ai2d_exact_match,0.44073834196891193,0.008935721506916777
2,12000,average,0.46516707040731664,
2,12000,average_rank,1.9,
2,12000,chartqa_relaxed_overall,0.598,0.009808000752013664
2,12000,docvqa_val_anls,0.6402481933825662,0.006107198073878916
2,12000,infovqa_val_anls,0.2601009880983462,0.0070991293032872695
2,12000,mme_total_score,1112.7142857142858,
2,12000,mmmu_val_mmmu_acc,0.31,
2,12000,mmstar_average,0.32603422027717016,
2,12000,ocrbench_ocrbench_accuracy,0.547,
2,12000,seedbench_seed_all,0.523401889938855,
2,12000,textvqa_val_exact_match,0.54098,0.006767635340177507
2,13000,ai2d_exact_match,0.44041450777202074,0.008935023865613881
2,13000,average,0.46553651974650545,
2,13000,average_rank,2.2,
2,13000,chartqa_relaxed_overall,0.6092,0.009760545645634788
2,13000,docvqa_val_anls,0.6433035796450283,0.006095519860378371
2,13000,infovqa_val_anls,0.2594356954563223,0.007105630672634776
2,13000,mme_total_score,1207.9944977991197,
2,13000,mmmu_val_mmmu_acc,0.28111,
2,13000,mmstar_average,0.3383640832831994,
2,13000,ocrbench_ocrbench_accuracy,0.539,
2,13000,seedbench_seed_all,0.5294608115619789,
2,13000,textvqa_val_exact_match,0.5495399999999999,0.006753508692222968
2,14000,ai2d_exact_match,0.44462435233160624,0.008943792697097361
2,14000,average,0.46921726913331274,
2,14000,average_rank,2.1,
2,14000,chartqa_relaxed_overall,0.612,0.009747841205275417
2,14000,docvqa_val_anls,0.65515509916543,0.006051151525703575
2,14000,infovqa_val_anls,0.2677755343748415,0.007100955702899581
2,14000,mme_total_score,1163.8374349739895,
2,14000,mmmu_val_mmmu_acc,0.28556,
2,14000,mmstar_average,0.32353974705611904,
2,14000,ocrbench_ocrbench_accuracy,0.543,
2,14000,seedbench_seed_all,0.5332406892718177,
2,14000,textvqa_val_exact_match,0.55806,0.006725656411892758
2,15000,ai2d_exact_match,0.44624352331606215,0.008946992176353901
2,15000,average,0.4737967933693773,
2,15000,average_rank,2.2,
2,15000,chartqa_relaxed_overall,0.618,0.009719474639861454
2,15000,docvqa_val_anls,0.6614354910767699,0.006013191753461033
2,15000,infovqa_val_anls,0.26176573129112124,0.007093151287118967
2,15000,mme_total_score,1229.438475390156,
2,15000,mmmu_val_mmmu_acc,0.29556,
2,15000,mmstar_average,0.32387576651370553,
2,15000,ocrbench_ocrbench_accuracy,0.561,
2,15000,seedbench_seed_all,0.5351306281267371,
2,15000,textvqa_val_exact_match,0.56116,0.006722390124486763
2,16000,ai2d_exact_match,0.4478626943005181,0.0089500956222288
2,16000,average,0.4748174802839308,
2,16000,average_rank,2.2,
2,16000,chartqa_relaxed_overall,0.6192,0.009713613422114641
2,16000,docvqa_val_anls,0.6585392720477772,0.0060616936904167125
2,16000,infovqa_val_anls,0.2653830027853819,0.007108417358601188
2,16000,mme_total_score,1157.8782513005203,
2,16000,mmmu_val_mmmu_acc,0.29889,
2,16000,mmstar_average,0.3217940710425999,
2,16000,ocrbench_ocrbench_accuracy,0.561,
2,16000,seedbench_seed_all,0.5349082823790995,
2,16000,textvqa_val_exact_match,0.5657800000000001,0.006716429140851619
2,17000,ai2d_exact_match,0.4540155440414508,0.00896101461327443
2,17000,average,0.4765363782507968,
2,17000,average_rank,2.2,
2,17000,chartqa_relaxed_overall,0.6184,0.009717527882093043
2,17000,docvqa_val_anls,0.6605538305641464,0.006048170352990264
2,17000,infovqa_val_anls,0.27438351817158263,0.007183740557624646
2,17000,mme_total_score,1231.31512605042,
2,17000,mmmu_val_mmmu_acc,0.30111,
2,17000,mmstar_average,0.3273406426639828,
2,17000,ocrbench_ocrbench_accuracy,0.555,
2,17000,seedbench_seed_all,0.5349638688160089,
2,17000,textvqa_val_exact_match,0.5630599999999999,0.006726822229512349
2,18000,ai2d_exact_match,0.4540155440414508,0.008961014613274428
2,18000,average,0.4749977548559891,
2,18000,average_rank,2.3,
2,18000,chartqa_relaxed_overall,0.614,0.009738559226822298
2,18000,docvqa_val_anls,0.6647865229953943,0.00602531683337989
2,18000,infovqa_val_anls,0.26486387970800995,0.006977819681460442
2,18000,mme_total_score,1245.188775510204,
2,18000,mmmu_val_mmmu_acc,0.29222,
2,18000,mmstar_average,0.32473355790957514,
2,18000,ocrbench_ocrbench_accuracy,0.555,
2,18000,seedbench_seed_all,0.5365202890494719,
2,18000,textvqa_val_exact_match,0.56884,0.006699820027260398
2,19000,ai2d_exact_match,0.45466321243523317,0.00896208360613934
2,19000,average,0.4768734192584572,
2,19000,average_rank,2.6,
2,19000,chartqa_relaxed_overall,0.62,0.009709671008043154
2,19000,docvqa_val_anls,0.6628357233664792,0.006042075311037487
2,19000,infovqa_val_anls,0.2657171063652747,0.007078002720459511
2,19000,mme_total_score,1248.7323929571828,
2,19000,mmmu_val_mmmu_acc,0.28889,
2,19000,mmstar_average,0.32802808302127334,
2,19000,ocrbench_ocrbench_accuracy,0.565,
2,19000,seedbench_seed_all,0.5399666481378543,
2,19000,textvqa_val_exact_match,0.5667599999999999,0.00671422643700147
2,20000,ai2d_exact_match,0.46178756476683935,0.008972834678172942
2,20000,average,0.47802392695549656,
2,20000,average_rank,2.3,
2,20000,chartqa_relaxed_overall,0.618,0.009719474639861454
2,20000,docvqa_val_anls,0.666568303416173,0.0059980334517589756
2,20000,infovqa_val_anls,0.2651324480102521,0.0070565217028431
2,20000,mme_total_score,1233.6009403761504,
2,20000,mmmu_val_mmmu_acc,0.28,
2,20000,mmstar_average,0.33277914424945065,
2,20000,ocrbench_ocrbench_accuracy,0.562,
2,20000,seedbench_seed_all,0.5381878821567537,
2,20000,textvqa_val_exact_match,0.5777599999999999,0.00668799090343766
3,1000,ai2d_exact_match,0.2661917098445596,0.007954634970279362
3,1000,average,0.2680725844272073,
3,1000,average_rank,3.2,
3,1000,chartqa_relaxed_overall,0.3476,0.009526069199715017
3,1000,docvqa_val_anls,0.3752729856163278,0.005939283617489936
3,1000,infovqa_val_anls,0.17325429231808173,0.0062340220795234725
3,1000,mme_total_score,707.53231292517,
3,1000,mmmu_val_mmmu_acc,0.23889,
3,1000,mmstar_average,0.19784737378907616,
3,1000,ocrbench_ocrbench_accuracy,0.288,
3,1000,seedbench_seed_all,0.25041689827682045,
3,1000,textvqa_val_exact_match,0.27518000000000004,0.006128613668775364
3,2000,ai2d_exact_match,0.27266839378238344,0.008015217564479073
3,2000,average,0.31253656058741547,
3,2000,average_rank,3.4,
3,2000,chartqa_relaxed_overall,0.4308,0.00990574548014469
3,2000,docvqa_val_anls,0.4481749259885666,0.00619992092326252
3,2000,infovqa_val_anls,0.19674507942801486,0.006580664003046453
3,2000,mme_total_score,786.0510204081633,
3,2000,mmmu_val_mmmu_acc,0.23556,
3,2000,mmstar_average,0.19768658271923586,
3,2000,ocrbench_ocrbench_accuracy,0.377,
3,2000,seedbench_seed_all,0.2653140633685381,
3,2000,textvqa_val_exact_match,0.38888,0.006660461055234364
3,3000,ai2d_exact_match,0.28270725388601037,0.008104913435481193
3,3000,average,0.34936609328629703,
3,3000,average_rank,3.2,
3,3000,chartqa_relaxed_overall,0.4844,0.009997131241172205
3,3000,docvqa_val_anls,0.49044354643512195,0.0062294371457984315
3,3000,infovqa_val_anls,0.21295743099446893,0.006855571779287104
3,3000,mme_total_score,861.8877551020407,
3,3000,mmmu_val_mmmu_acc,0.24889,
3,3000,mmstar_average,0.258368014597926,
3,3000,ocrbench_ocrbench_accuracy,0.394,
3,3000,seedbench_seed_all,0.3434685936631462,
3,3000,textvqa_val_exact_match,0.42906000000000005,0.0067494454796565755
3,4000,ai2d_exact_match,0.3325777202072539,0.00847966336079129
3,4000,average,0.3855383645559374,
3,4000,average_rank,2.8,
3,4000,chartqa_relaxed_overall,0.508,0.010000720262176365
3,4000,docvqa_val_anls,0.5226854794419781,0.006293466169647169
3,4000,infovqa_val_anls,0.2322658206586996,0.007103396837310004
3,4000,mme_total_score,912.9521808723489,
3,4000,mmmu_val_mmmu_acc,0.26667,
3,4000,mmstar_average,0.3070035703119584,
3,4000,ocrbench_ocrbench_accuracy,0.438,
3,4000,seedbench_seed_all,0.41684269038354643,
3,4000,textvqa_val_exact_match,0.4458,0.006781745381100857
3,5000,ai2d_exact_match,0.34520725388601037,0.008557040186364025
3,5000,average,0.39676974212184324,
3,5000,average_rank,2.7,
3,5000,chartqa_relaxed_overall,0.51,0.01
3,5000,docvqa_val_anls,0.5420071464866951,0.006256421242173299
3,5000,infovqa_val_anls,0.21485812900527704,0.0066319183580626885
3,5000,mme_total_score,957.2279911964786,
3,5000,mmmu_val_mmmu_acc,0.26111,
3,5000,mmstar_average,0.30632830702822333,
3,5000,ocrbench_ocrbench_accuracy,0.44,
3,5000,seedbench_seed_all,0.47031684269038354,
3,5000,textvqa_val_exact_match,0.4811,0.00681344572213808
3,6000,ai2d_exact_match,0.37629533678756477,0.008719379877890884
3,6000,average,0.40447034433869705,
3,6000,average_rank,3.3,
3,6000,chartqa_relaxed_overall,0.5084,0.010000589018267121
3,6000,docvqa_val_anls,0.5540669563018141,0.006258072329892215
3,6000,infovqa_val_anls,0.216535214445592,0.00668611609159469
3,6000,mme_total_score,864.5272108843537,
3,6000,mmmu_val_mmmu_acc,0.26889,
3,6000,mmstar_average,0.2932406887895669,
3,6000,ocrbench_ocrbench_accuracy,0.454,
3,6000,seedbench_seed_all,0.4848249027237354,
3,6000,textvqa_val_exact_match,0.48398,0.006803464510517356
3,7000,ai2d_exact_match,0.3947538860103627,0.008797532848529207
3,7000,average,0.42355543935120793,
3,7000,average_rank,2.7,
3,7000,chartqa_relaxed_overall,0.5488,0.00995424828018316
3,7000,docvqa_val_anls,0.5797391833660968,0.006220930330963092
3,7000,infovqa_val_anls,0.2221619185818123,0.00673372198453672
3,7000,mme_total_score,866.3928571428571,
3,7000,mmmu_val_mmmu_acc,0.28667,
3,7000,mmstar_average,0.3209799250686363,
3,7000,ocrbench_ocrbench_accuracy,0.476,
3,7000,seedbench_seed_all,0.49327404113396334,
3,7000,textvqa_val_exact_match,0.48962000000000006,0.006807769110659733
3,8000,ai2d_exact_match,0.4102979274611399,0.008853146969712133
3,8000,average,0.42791468613133354,
3,8000,average_rank,3.4,
3,8000,chartqa_relaxed_overall,0.5456,0.00996031822662661
3,8000,docvqa_val_anls,0.5824594046059755,0.006268157085435711
3,8000,infovqa_val_anls,0.22074277862778585,0.006618518997755148
3,8000,mme_total_score,788.9880952380953,
3,8000,mmmu_val_mmmu_acc,0.27556,
3,8000,mmstar_average,0.32537357643818443,
3,8000,ocrbench_ocrbench_accuracy,0.5,
3,8000,seedbench_seed_all,0.5012784880489161,
3,8000,textvqa_val_exact_match,0.48991999999999997,0.006810591424473371
3,9000,ai2d_exact_match,0.4251943005181347,0.0088978675214111
3,9000,average,0.4411468725875502,
3,9000,average_rank,2.9,
3,9000,chartqa_relaxed_overall,0.5648,0.009917647296166388
3,9000,docvqa_val_anls,0.6050413765127355,0.006187758928771102
3,9000,infovqa_val_anls,0.23301995192200392,0.00676964747288323
3,9000,mme_total_score,825.0221088435375,
3,9000,mmmu_val_mmmu_acc,0.27556,
3,9000,mmstar_average,0.33219983189483276,
3,9000,ocrbench_ocrbench_accuracy,0.504,
3,9000,seedbench_seed_all,0.5115063924402445,
3,9000,textvqa_val_exact_match,0.519,0.006787356896666665
3,10000,ai2d_exact_match,0.4258419689119171,0.00889962357526378
3,10000,average,0.44419201562479543,
3,10000,average_rank,2.7,
3,10000,chartqa_relaxed_overall,0.576,0.009885782289560632
3,10000,docvqa_val_anls,0.6087522279355707,0.006173079977045839
3,10000,infovqa_val_anls,0.24383042893389267,0.0069221731872859795
3,10000,mme_total_score,915.8061224489795,
3,10000,mmmu_val_mmmu_acc,0.27333,
3,10000,mmstar_average,0.3351679228462254,
3,10000,ocrbench_ocrbench_accuracy,0.489,
3,10000,seedbench_seed_all,0.5180655919955531,
3,10000,textvqa_val_exact_match,0.5277400000000001,0.006769908774345677
3,11000,ai2d_exact_match,0.43426165803108807,0.008921034830887029
3,11000,average,0.45138194167282136,
3,11000,average_rank,2.9,
3,11000,chartqa_relaxed_overall,0.5784,0.009878279615563902
3,11000,docvqa_val_anls,0.6240570866567314,0.006144737191710238
3,11000,infovqa_val_anls,0.2562175057951717,0.0071028888697453095
3,11000,mme_total_score,852.3894557823129,
3,11000,mmmu_val_mmmu_acc,0.28778,
3,11000,mmstar_average,0.3331474836051967,
3,11000,ocrbench_ocrbench_accuracy,0.5,
3,11000,seedbench_seed_all,0.520733740967204,
3,11000,textvqa_val_exact_match,0.5278400000000001,0.00678178334931745
3,12000,ai2d_exact_match,0.4381476683937824,0.008930032335354965
3,12000,average,0.45691171338244096,
3,12000,average_rank,2.5,
3,12000,chartqa_relaxed_overall,0.572,0.009897756626351943
3,12000,docvqa_val_anls,0.6273497290110698,0.006129247411332687
3,12000,infovqa_val_anls,0.268135358118058,0.007380056393275344
3,12000,mme_total_score,893.8265306122448,
3,12000,mmmu_val_mmmu_acc,0.29556,
3,12000,mmstar_average,0.34474290394073753,
3,12000,ocrbench_ocrbench_accuracy,0.508,
3,12000,seedbench_seed_all,0.5255697609783213,
3,12000,textvqa_val_exact_match,0.5327,0.006782133990735781
3,13000,ai2d_exact_match,0.43458549222797926,0.008921805911548512
3,13000,average,0.4607824778908788,
3,13000,average_rank,2.7,
3,13000,chartqa_relaxed_overall,0.5876,0.009847298295140926
3,13000,docvqa_val_anls,0.6386402725745638,0.006069984676680257
3,13000,infovqa_val_anls,0.2536816276782758,0.00704241123014852
3,13000,mme_total_score,941.5953381352541,
3,13000,mmmu_val_mmmu_acc,0.29667,
3,13000,mmstar_average,0.34638755445148733,
3,13000,ocrbench_ocrbench_accuracy,0.53,
3,13000,seedbench_seed_all,0.5272373540856031,
3,13000,textvqa_val_exact_match,0.53224,0.00678673179267349
3,14000,ai2d_exact_match,0.43490932642487046,0.008922573118260885
3,14000,average,0.4621098839598732,
3,14000,average_rank,2.8,
3,14000,chartqa_relaxed_overall,0.5936,0.009825183443166683
3,14000,docvqa_val_anls,0.6373184890679852,0.006105256249191251
3,14000,infovqa_val_anls,0.2624975120280117,0.007131056805776271
3,14000,mme_total_score,901.2585034013605,
3,14000,mmmu_val_mmmu_acc,0.28444,
3,14000,mmstar_average,0.34930389493288816,
3,14000,ocrbench_ocrbench_accuracy,0.517,
3,14000,seedbench_seed_all,0.5355197331851028,
3,14000,textvqa_val_exact_match,0.5444,0.006752217894092123
3,15000,ai2d_exact_match,0.44527202072538863,0.008945084019331405
3,15000,average,0.46643076140543904,
3,15000,average_rank,3.0,
3,15000,chartqa_relaxed_overall,0.5848,0.00985710144918839
3,15000,docvqa_val_anls,0.642316016710227,0.006100312721783546
3,15000,infovqa_val_anls,0.2596632231498878,0.007146587424008848
3,15000,mme_total_score,891.8367346938775,
3,15000,mmmu_val_mmmu_acc,0.29778,
3,15000,mmstar_average,0.34413882163543197,
3,15000,ocrbench_ocrbench_accuracy,0.538,
3,15000,seedbench_seed_all,0.5361867704280155,
3,15000,textvqa_val_exact_match,0.54972,0.006745330549116431
3,16000,ai2d_exact_match,0.4494818652849741,0.008953103134587205
3,16000,average,0.46786516199576034,
3,16000,average_rank,2.7,
3,16000,chartqa_relaxed_overall,0.5976,0.009809596692775395
3,16000,docvqa_val_anls,0.6432815750341822,0.006081847680686157
3,16000,infovqa_val_anls,0.2702450654855036,0.007372825383364985
3,16000,mme_total_score,919.3826530612245,
3,16000,mmmu_val_mmmu_acc,0.28333,
3,16000,mmstar_average,0.3386692973489569,
3,16000,ocrbench_ocrbench_accuracy,0.534,
3,16000,seedbench_seed_all,0.5415786548082268,
3,16000,textvqa_val_exact_match,0.5526,0.006745409410081935
3,17000,ai2d_exact_match,0.4494818652849741,0.008953103134587206
3,17000,average,0.4694732091424512,
3,17000,average_rank,2.8,
3,17000,chartqa_relaxed_overall,0.596,0.009815912634917984
3,17000,docvqa_val_anls,0.6468732282054332,0.006069886071041202
3,17000,infovqa_val_anls,0.2650584835459577,0.0072427928867972455
3,17000,mme_total_score,889.5646258503401,
3,17000,mmmu_val_mmmu_acc,0.29333,
3,17000,mmstar_average,0.342978718252922,
3,17000,ocrbench_ocrbench_accuracy,0.53,
3,17000,seedbench_seed_all,0.5418565869927737,
3,17000,textvqa_val_exact_match,0.5596800000000001,0.006734324743131207
3,18000,ai2d_exact_match,0.45531088082901555,0.008963137311190377
3,18000,average,0.46991408851845295,
3,18000,average_rank,2.7,
3,18000,chartqa_relaxed_overall,0.6036,0.009784943231599163
3,18000,docvqa_val_anls,0.6501128555487647,0.006068985343727089
3,18000,infovqa_val_anls,0.26796275265157754,0.007202201134473747
3,18000,mme_total_score,894.1054421768707,
3,18000,mmmu_val_mmmu_acc,0.28333,
3,18000,mmstar_average,0.33590517144994875,
3,18000,ocrbench_ocrbench_accuracy,0.534,
3,18000,seedbench_seed_all,0.5412451361867704,
3,18000,textvqa_val_exact_match,0.5577599999999999,0.0067408786051132655
3,19000,ai2d_exact_match,0.4498056994818653,0.008953693133598168
3,19000,average,0.47011136523574254,
3,19000,average_rank,3.0,
3,19000,chartqa_relaxed_overall,0.6096,0.009758751420735989
3,19000,docvqa_val_anls,0.6538834113203496,0.006040538366936906
3,19000,infovqa_val_anls,0.2705360277052952,0.007291872911349649
3,19000,mme_total_score,906.3231292517007,
3,19000,mmmu_val_mmmu_acc,0.27556,
3,19000,mmstar_average,0.3356215177081144,
3,19000,ocrbench_ocrbench_accuracy,0.539,
3,19000,seedbench_seed_all,0.5441356309060589,
3,19000,textvqa_val_exact_match,0.5528599999999999,0.006753272200724876
3,20000,ai2d_exact_match,0.44656735751295334,0.008947620544957215
3,20000,average,0.4679556547685855,
3,20000,average_rank,2.9,
3,20000,chartqa_relaxed_overall,0.5976,0.009809596692775395
3,20000,docvqa_val_anls,0.6493769742508846,0.006072933213063366
3,20000,infovqa_val_anls,0.26540905854876357,0.007209592372844281
3,20000,mme_total_score,926.0901360544218,
3,20000,mmmu_val_mmmu_acc,0.27333,
3,20000,mmstar_average,0.34157097675697473,
3,20000,ocrbench_ocrbench_accuracy,0.539,
3,20000,seedbench_seed_all,0.5437465258476931,
3,20000,textvqa_val_exact_match,0.555,0.0067346322137300735
4,1000,ai2d_exact_match,0.25874352331606215,0.00788225861008497
4,1000,average,0.27914578527127093,
4,1000,average_rank,2.6,
4,1000,chartqa_relaxed_overall,0.3512,0.009548816468986268
4,1000,docvqa_val_anls,0.36858592315444033,0.005921151680127505
4,1000,infovqa_val_anls,0.17699311795329079,0.006346227986201575
4,1000,mme_total_score,671.343537414966,
4,1000,mmmu_val_mmmu_acc,0.27111,
4,1000,mmstar_average,0.2086858732233149,
4,1000,ocrbench_ocrbench_accuracy,0.261,
4,1000,seedbench_seed_all,0.2605336297943302,
4,1000,textvqa_val_exact_match,0.35546000000000005,0.006549153835664011
4,2000,ai2d_exact_match,0.280440414507772,0.008085099461783339
4,2000,average,0.320025358717614,
4,2000,average_rank,2.8,
4,2000,chartqa_relaxed_overall,0.4488,0.009949423119365426
4,2000,docvqa_val_anls,0.43140645952438456,0.006042366638541379
4,2000,infovqa_val_anls,0.16528808420419083,0.005907032628809945
4,2000,mme_total_score,705.5901360544218,
4,2000,mmmu_val_mmmu_acc,0.27222,
4,2000,mmstar_average,0.24877125799316246,
4,2000,ocrbench_ocrbench_accuracy,0.329,
4,2000,seedbench_seed_all,0.3196220122290161,
4,2000,textvqa_val_exact_match,0.38468,0.006645983248449226
4,3000,ai2d_exact_match,0.34617875647668395,0.008562713351618977
4,3000,average,0.3596236953408542,
4,3000,average_rank,2.8,
4,3000,chartqa_relaxed_overall,0.468,0.009981495484186743
4,3000,docvqa_val_anls,0.464923009199496,0.006156900593094097
4,3000,infovqa_val_anls,0.18011502045718095,0.0061004080312330325
4,3000,mme_total_score,709.3333333333333,
4,3000,mmmu_val_mmmu_acc,0.27778,
4,3000,mmstar_average,0.30716380934399923,
4,3000,ocrbench_ocrbench_accuracy,0.351,
4,3000,seedbench_seed_all,0.42679266259032794,
4,3000,textvqa_val_exact_match,0.41466,0.006725300202411972
4,4000,ai2d_exact_match,0.36593264248704666,0.008669617940526182
4,4000,average,0.3829150140884673,
4,4000,average_rank,2.7,
4,4000,chartqa_relaxed_overall,0.5136,0.009998299975543861
4,4000,docvqa_val_anls,0.5002844765367886,0.0062258433013991955
4,4000,infovqa_val_anls,0.18808280764611432,0.006209185081756124
4,4000,mme_total_score,700.1989795918367,
4,4000,mmmu_val_mmmu_acc,0.28889,
4,4000,mmstar_average,0.3128795636615537,
4,4000,ocrbench_ocrbench_accuracy,0.379,
4,4000,seedbench_seed_all,0.46214563646470264,
4,4000,textvqa_val_exact_match,0.4354199999999999,0.006770365742739316
4,5000,ai2d_exact_match,0.39702072538860106,0.008806218703419164
4,5000,average,0.3990130243200321,
4,5000,average_rank,3.0,
4,5000,chartqa_relaxed_overall,0.5432,0.009964598400764347
4,5000,docvqa_val_anls,0.5330701388059006,0.006244542429703876
4,5000,infovqa_val_anls,0.20064814149562474,0.006400433745304747
4,5000,mme_total_score,687.6802721088436,
4,5000,mmmu_val_mmmu_acc,0.26333,
4,5000,mmstar_average,0.30889646221740025,
4,5000,ocrbench_ocrbench_accuracy,0.412,
4,5000,seedbench_seed_all,0.47315175097276263,
4,5000,textvqa_val_exact_match,0.4598,0.006799443983716428
4,6000,ai2d_exact_match,0.41224093264248707,0.00885945303235887
4,6000,average,0.4037939250305515,
4,6000,average_rank,3.2,
4,6000,chartqa_relaxed_overall,0.5312,0.009982508912777261
4,6000,docvqa_val_anls,0.5259911309932884,0.006272635836910295
4,6000,infovqa_val_anls,0.22056731437063212,0.00674209963892894
4,6000,mme_total_score,717.0051020408164,
4,6000,mmmu_val_mmmu_acc,0.26667,
4,6000,mmstar_average,0.3316518783413741,
4,6000,ocrbench_ocrbench_accuracy,0.408,
4,6000,seedbench_seed_all,0.48332406892718177,
4,6000,textvqa_val_exact_match,0.4544999999999999,0.006790726970992053
4,7000,ai2d_exact_match,0.4102979274611399,0.008853146969712133
4,7000,average,0.41740315045514464,
4,7000,average_rank,3.1,
4,7000,chartqa_relaxed_overall,0.5588,0.009932597172675325
4,7000,docvqa_val_anls,0.5597972576652357,0.0062571833970283125
4,7000,infovqa_val_anls,0.21665617889681224,0.006562362156515704
4,7000,mme_total_score,716.7908163265306,
4,7000,mmmu_val_mmmu_acc,0.28556,
4,7000,mmstar_average,0.32150517239662685,
4,7000,ocrbench_ocrbench_accuracy,0.431,
4,7000,seedbench_seed_all,0.4892718176764869,
4,7000,textvqa_val_exact_match,0.48374000000000006,0.006820617761268334
4,8000,ai2d_exact_match,0.4213082901554404,0.00888700282309854
4,8000,average,0.4251708917847074,
4,8000,average_rank,2.9,
4,8000,chartqa_relaxed_overall,0.564,0.009919725822025206
4,8000,docvqa_val_anls,0.5702706873242411,0.006237250618852069
4,8000,infovqa_val_anls,0.24000454829818865,0.006935520157929643
4,8000,mme_total_score,705.8180272108843,
4,8000,mmmu_val_mmmu_acc,0.28778,
4,8000,mmstar_average,0.3384645614295773,
4,8000,ocrbench_ocrbench_accuracy,0.42,
4,8000,seedbench_seed_all,0.5018899388549194,
4,8000,textvqa_val_exact_match,0.48281999999999997,0.006811185503977551
4,9000,ai2d_exact_match,0.4319948186528497,0.008915528710615487
4,9000,average,0.4318231930659084,
4,9000,average_rank,2.9,
4,9000,chartqa_relaxed_overall,0.5676,0.009910165515884228
4,9000,docvqa_val_anls,0.5846178021051754,0.006187149390116838
4,9000,infovqa_val_anls,0.2228617948699063,0.0066001763459020155
4,9000,mme_total_score,733.3503401360545,
4,9000,mmmu_val_mmmu_acc,0.28444,
4,9000,mmstar_average,0.3307124653782511,
4,9000,ocrbench_ocrbench_accuracy,0.463,
4,9000,seedbench_seed_all,0.5153418565869927,
4,9000,textvqa_val_exact_match,0.48583999999999994,0.0068269187957708125
4,10000,ai2d_exact_match,0.4410621761658031,0.0089364152923413
4,10000,average,0.436822457787989,
4,10000,average_rank,3.4,
4,10000,chartqa_relaxed_overall,0.5756,0.009887009516677585
4,10000,docvqa_val_anls,0.591441723638793,0.0062031994384821754
4,10000,infovqa_val_anls,0.22327754225685992,0.00649750251357461
4,10000,mme_total_score,695.8112244897959,
4,10000,mmmu_val_mmmu_acc,0.28778,
4,10000,mmstar_average,0.33369690927002266,
4,10000,ocrbench_ocrbench_accuracy,0.472,
4,10000,seedbench_seed_all,0.5107837687604224,
4,10000,textvqa_val_exact_match,0.49576000000000003,0.006808118284439173
4,11000,ai2d_exact_match,0.44332901554404147,0.008941163900483134
4,11000,average,0.44624717945755144,
4,11000,average_rank,3.0,
4,11000,chartqa_relaxed_overall,0.5868,0.009850132691777215
4,11000,docvqa_val_anls,0.60625861922937,0.006159202385167996
4,11000,infovqa_val_anls,0.2435454505191485,0.006860039872881237
4,11000,mme_total_score,751.1462585034014,
4,11000,mmmu_val_mmmu_acc,0.29222,
4,11000,mmstar_average,0.3470954764624236,
4,11000,ocrbench_ocrbench_accuracy,0.486,
4,11000,seedbench_seed_all,0.5128960533629794,
4,11000,textvqa_val_exact_match,0.49808,0.006799508024988012
4,12000,ai2d_exact_match,0.45142487046632124,0.008956585653027465
4,12000,average,0.44514971381341617,
4,12000,average_rank,3.3,
4,12000,chartqa_relaxed_overall,0.5868,0.009850132691777215
4,12000,docvqa_val_anls,0.6047188055272135,0.0061847009209673315
4,12000,infovqa_val_anls,0.2506217753279014,0.006972909032069362
4,12000,mme_total_score,742.969387755102,
4,12000,mmmu_val_mmmu_acc,0.28556,
4,12000,mmstar_average,0.33917912697374,
4,12000,ocrbench_ocrbench_accuracy,0.459,
4,12000,seedbench_seed_all,0.5211228460255698,
4,12000,textvqa_val_exact_match,0.5079199999999999,0.006798462954205747
4,13000,ai2d_exact_match,0.44689119170984454,0.00894824507304496
4,13000,average,0.4478540374461813,
4,13000,average_rank,3.5,
4,13000,chartqa_relaxed_overall,0.5936,0.009825183443166683
4,13000,docvqa_val_anls,0.6123877664020703,0.0061423212651813735
4,13000,infovqa_val_anls,0.23197941094655744,0.0066388766376455225
4,13000,mme_total_score,705.0068027210884,
4,13000,mmmu_val_mmmu_acc,0.29444,
4,13000,mmstar_average,0.3172158612312008,
4,13000,ocrbench_ocrbench_accuracy,0.5,
4,13000,seedbench_seed_all,0.5257921067259589,
4,13000,textvqa_val_exact_match,0.50838,0.006803735244897213
4,14000,ai2d_exact_match,0.45628238341968913,0.008964689215887884
4,14000,average,0.4541657280018954,
4,14000,average_rank,3.3,
4,14000,chartqa_relaxed_overall,0.5988,0.0098047885010856
4,14000,docvqa_val_anls,0.6230752215069362,0.006110772532320183
4,14000,infovqa_val_anls,0.23950752488444424,0.00673701613611272
4,14000,mme_total_score,693.2602040816327,
4,14000,mmmu_val_mmmu_acc,0.29,
4,14000,mmstar_average,0.3462132371031542,
4,14000,ocrbench_ocrbench_accuracy,0.492,
4,14000,seedbench_seed_all,0.519733185102835,
4,14000,textvqa_val_exact_match,0.52188,0.0067822601638824
4,15000,ai2d_exact_match,0.4536917098445596,0.008960474382205331
4,15000,average,0.4546421832173102,
4,15000,average_rank,3.5,
4,15000,chartqa_relaxed_overall,0.6012,0.0097949885513097
4,15000,docvqa_val_anls,0.6265798815467575,0.006118388682866076
4,15000,infovqa_val_anls,0.24253641235942872,0.006778846024017067
4,15000,mme_total_score,745.8826530612245,
4,15000,mmmu_val_mmmu_acc,0.28111,
4,15000,mmstar_average,0.3514223789460134,
4,15000,ocrbench_ocrbench_accuracy,0.493,
4,15000,seedbench_seed_all,0.5226792662590328,
4,15000,textvqa_val_exact_match,0.51956,0.006792518600768668
4,16000,ai2d_exact_match,0.4582253886010363,0.008967689939886603
4,16000,average,0.46307812033280477,
4,16000,average_rank,3.1,
4,16000,chartqa_relaxed_overall,0.6092,0.009760545645634788
4,16000,docvqa_val_anls,0.6397697311161549,0.006077931892063438
4,16000,infovqa_val_anls,0.2566929717899322,0.007049147355082826
4,16000,mme_total_score,769.8112244897959,
4,16000,mmmu_val_mmmu_acc,0.29,
4,16000,mmstar_average,0.3500855084419833,
4,16000,ocrbench_ocrbench_accuracy,0.512,
4,16000,seedbench_seed_all,0.5250694830461368,
4,16000,textvqa_val_exact_match,0.5266599999999999,0.006785297114451678
4,17000,ai2d_exact_match,0.46243523316062174,0.008973720555405783
4,17000,average,0.4637285100748874,
4,17000,average_rank,3.2,
4,17000,chartqa_relaxed_overall,0.6072,0.00976941352263433
4,17000,docvqa_val_anls,0.6316407990464801,0.006115829668357635
4,17000,infovqa_val_anls,0.26095289130380417,0.007179006033610968
4,17000,mme_total_score,772.2568027210884,
4,17000,mmmu_val_mmmu_acc,0.29222,
4,17000,mmstar_average,0.3487846654954876,
4,17000,ocrbench_ocrbench_accuracy,0.516,
4,17000,seedbench_seed_all,0.5254030016675931,
4,17000,textvqa_val_exact_match,0.52892,0.006777692390690844
4,18000,ai2d_exact_match,0.46729274611398963,0.00897987952745343
4,18000,average,0.46301237822364466,
4,18000,average_rank,3.5,
4,18000,chartqa_relaxed_overall,0.6024,0.009789996609470577
4,18000,docvqa_val_anls,0.6353229754668962,0.006102794809473289
4,18000,infovqa_val_anls,0.2566414572268362,0.006998597263140097
4,18000,mme_total_score,770.295918367347,
4,18000,mmmu_val_mmmu_acc,0.27778,
4,18000,mmstar_average,0.3522173046936848,
4,18000,ocrbench_ocrbench_accuracy,0.518,
4,18000,seedbench_seed_all,0.5224569205113953,
4,18000,textvqa_val_exact_match,0.535,0.006782934589123506
4,19000,ai2d_exact_match,0.4647020725388601,0.008976701230834869
4,19000,average,0.4657296959805982,
4,19000,average_rank,3.3,
4,19000,chartqa_relaxed_overall,0.6088,0.009762332982341016
4,19000,docvqa_val_anls,0.6386155506856869,0.006091782897731878
4,19000,infovqa_val_anls,0.2477875071753752,0.006879861435025137
4,19000,mme_total_score,772.204081632653,
4,19000,mmmu_val_mmmu_acc,0.30333,
4,19000,mmstar_average,0.3470027726694857,
4,19000,ocrbench_ocrbench_accuracy,0.512,
4,19000,seedbench_seed_all,0.5288493607559756,
4,19000,textvqa_val_exact_match,0.54048,0.006763536279536092
4,20000,ai2d_exact_match,0.4634067357512953,0.008975020819363737
4,20000,average,0.46162598712482705,
4,20000,average_rank,3.4,
4,20000,chartqa_relaxed_overall,0.61,0.009756950303844571
4,20000,docvqa_val_anls,0.6435026807424298,0.006070985460919362
4,20000,infovqa_val_anls,0.2543282868714285,0.006962743278022537
4,20000,mme_total_score,765.8690476190477,
4,20000,mmmu_val_mmmu_acc,0.27222,
4,20000,mmstar_average,0.34236379610014667,
4,20000,ocrbench_ocrbench_accuracy,0.509,
4,20000,seedbench_seed_all,0.5262923846581434,
4,20000,textvqa_val_exact_match,0.53352,0.006776464123213716
5,1000,ai2d_exact_match,0.24902849740932642,0.007783374690341817
5,1000,average,0.23561247048158757,
5,1000,average_rank,4.2,
5,1000,chartqa_relaxed_overall,0.2548,0.008716718216771047
5,1000,docvqa_val_anls,0.24096701334945672,0.004990683419188375
5,1000,infovqa_val_anls,0.12232054164836681,0.0051959928578510384
5,1000,mme_total_score,620.9336734693877,
5,1000,mmmu_val_mmmu_acc,0.23778,
5,1000,mmstar_average,0.26414819971479786,
5,1000,ocrbench_ocrbench_accuracy,0.216,
5,1000,seedbench_seed_all,0.2623679822123402,
5,1000,textvqa_val_exact_match,0.27310000000000006,0.0061250290771750005
5,2000,ai2d_exact_match,0.2344559585492228,0.007625132817591135
5,2000,average,0.2752283006434932,
5,2000,average_rank,4.2,
5,2000,chartqa_relaxed_overall,0.3732,0.009675026948726469
5,2000,docvqa_val_anls,0.331054267713041,0.005645142408620243
5,2000,infovqa_val_anls,0.1253737215538702,0.00524700917894423
5,2000,mme_total_score,678.2414965986395,
5,2000,mmmu_val_mmmu_acc,0.24,
5,2000,mmstar_average,0.24144442112149672,
5,2000,ocrbench_ocrbench_accuracy,0.264,
5,2000,seedbench_seed_all,0.33140633685380766,
5,2000,textvqa_val_exact_match,0.33612,0.006470505591414144
5,3000,ai2d_exact_match,0.22409326424870465,0.007505002611196186
5,3000,average,0.29997958942235364,
5,3000,average_rank,4.3,
5,3000,chartqa_relaxed_overall,0.392,0.00976588700628918
5,3000,docvqa_val_anls,0.37299390513630937,0.005683849773109756
5,3000,infovqa_val_anls,0.13605101039483827,0.005410567699808442
5,3000,mme_total_score,659.7210884353742,
5,3000,mmmu_val_mmmu_acc,0.27,
5,3000,mmstar_average,0.2682811266889234,
5,3000,ocrbench_ocrbench_accuracy,0.287,
5,3000,seedbench_seed_all,0.3745969983324069,
5,3000,textvqa_val_exact_match,0.3748,0.006628980364742018
5,4000,ai2d_exact_match,0.22733160621761658,0.007543244231635894
5,4000,average,0.3084813519082869,
5,4000,average_rank,4.7,
5,4000,chartqa_relaxed_overall,0.43,0.00990349593288537
5,4000,docvqa_val_anls,0.4066720118815712,0.006028824654560211
5,4000,infovqa_val_anls,0.14319025154556023,0.005617800071290847
5,4000,mme_total_score,656.1462585034013,
5,4000,mmmu_val_mmmu_acc,0.25667,
5,4000,mmstar_average,0.2585945343280555,
5,4000,ocrbench_ocrbench_accuracy,0.294,
5,4000,seedbench_seed_all,0.39277376320177876,
5,4000,textvqa_val_exact_match,0.3671,0.006592830278584186
5,5000,ai2d_exact_match,0.24028497409326424,0.007689893942245019
5,5000,average,0.3230129052623469,
5,5000,average_rank,4.9,
5,5000,chartqa_relaxed_overall,0.442,0.009934479228979264
5,5000,docvqa_val_anls,0.43465518326761016,0.006092084287625314
5,5000,infovqa_val_anls,0.16044569408280707,0.005985099003597859
5,5000,mme_total_score,700.9234693877552,
5,5000,mmmu_val_mmmu_acc,0.26,
5,5000,mmstar_average,0.27948727201527235,
5,5000,ocrbench_ocrbench_accuracy,0.309,
5,5000,seedbench_seed_all,0.39744302390216785,
5,5000,textvqa_val_exact_match,0.3838,0.006651041968883851
5,6000,ai2d_exact_match,0.21761658031088082,0.007426556596739526
5,6000,average,0.3285664644731758,
5,6000,average_rank,4.8,
5,6000,chartqa_relaxed_overall,0.4708,0.009984929820955767
5,6000,docvqa_val_anls,0.4274906773084525,0.005930539560380286
5,6000,infovqa_val_anls,0.15122815225662642,0.005687399721363878
5,6000,mme_total_score,692.2227891156463,
5,6000,mmmu_val_mmmu_acc,0.27,
5,6000,mmstar_average,0.27596736182231085,
5,6000,ocrbench_ocrbench_accuracy,0.341,
5,6000,seedbench_seed_all,0.4237354085603113,
5,6000,textvqa_val_exact_match,0.37926,0.006628782590470618
5,7000,ai2d_exact_match,0.22959844559585493,0.007569631399592313
5,7000,average,0.3397133831241853,
5,7000,average_rank,5.0,
5,7000,chartqa_relaxed_overall,0.4864,0.009998299975543861
5,7000,docvqa_val_anls,0.4538685197224749,0.00598758370400633
5,7000,infovqa_val_anls,0.15500462855057698,0.005842239614739797
5,7000,mme_total_score,662.3809523809523,
5,7000,mmmu_val_mmmu_acc,0.26444,
5,7000,mmstar_average,0.2946102327923966,
5,7000,ocrbench_ocrbench_accuracy,0.339,
5,7000,seedbench_seed_all,0.43351862145636466,
5,7000,textvqa_val_exact_match,0.40098,0.00668858395709213
5,8000,ai2d_exact_match,0.26878238341968913,0.007979127569354613
5,8000,average,0.3468669425903158,
5,8000,average_rank,4.7,
5,8000,chartqa_relaxed_overall,0.4644,0.009976616117083942
5,8000,docvqa_val_anls,0.43320064291973065,0.005825461000081097
5,8000,infovqa_val_anls,0.1525871677997588,0.0057380999639673955
5,8000,mme_total_score,714.7789115646258,
5,8000,mmmu_val_mmmu_acc,0.27667,
5,8000,mmstar_average,0.3189178311414238,
5,8000,ocrbench_ocrbench_accuracy,0.358,
5,8000,seedbench_seed_all,0.4440244580322401,
5,8000,textvqa_val_exact_match,0.40522,0.006705157876473132
5,9000,ai2d_exact_match,0.23834196891191708,0.007668527149232641
5,9000,average,0.34742834361066494,
5,9000,average_rank,4.9,
5,9000,chartqa_relaxed_overall,0.4832,0.009996353076494045
5,9000,docvqa_val_anls,0.44997891177952337,0.005999690608407377
5,9000,infovqa_val_anls,0.15249014258349003,0.005725765633377559
5,9000,mme_total_score,696.5544217687075,
5,9000,mmmu_val_mmmu_acc,0.26444,
5,9000,mmstar_average,0.3019547640515156,
5,9000,ocrbench_ocrbench_accuracy,0.384,
5,9000,seedbench_seed_all,0.44874930516953865,
5,9000,textvqa_val_exact_match,0.4037,0.006699928343494548
5,10000,ai2d_exact_match,0.2979274611398964,0.008231480357867917
5,10000,average,0.3538147252476138,
5,10000,average_rank,4.8,
5,10000,chartqa_relaxed_overall,0.48,0.009993995796516643
5,10000,docvqa_val_anls,0.45125781190343667,0.0059273100312449535
5,10000,infovqa_val_anls,0.15739085013451903,0.005776029267754871
5,10000,mme_total_score,718.7227891156463,
5,10000,mmmu_val_mmmu_acc,0.27556,
5,10000,mmstar_average,0.3004387942674594,
5,10000,ocrbench_ocrbench_accuracy,0.357,
5,10000,seedbench_seed_all,0.4556976097832129,
5,10000,textvqa_val_exact_match,0.40906000000000003,0.006714715240436636
5,11000,ai2d_exact_match,0.3167098445595855,0.008372690712254882
5,11000,average,0.36396020347184427,
5,11000,average_rank,5.0,
5,11000,chartqa_relaxed_overall,0.4924,0.010000845102345324
5,11000,docvqa_val_anls,0.4691277601070516,0.0060867637597330085
5,11000,infovqa_val_anls,0.15562897334070494,0.005768608804593679
5,11000,mme_total_score,680.7667066826731,
5,11000,mmmu_val_mmmu_acc,0.27667,
5,11000,mmstar_average,0.3111702671358657,
5,11000,ocrbench_ocrbench_accuracy,0.388,
5,11000,seedbench_seed_all,0.45497498610339077,
5,11000,textvqa_val_exact_match,0.41096000000000005,0.006715250896200365
5,12000,ai2d_exact_match,0.24838082901554404,0.007776597937116943
5,12000,average,0.35400963042471534,
5,12000,average_rank,4.9,
5,12000,chartqa_relaxed_overall,0.4624,0.00997367964766694
5,12000,docvqa_val_anls,0.46480289866811825,0.005910238300168798
5,12000,infovqa_val_anls,0.15657154481637633,0.0057842205757870115
5,12000,mme_total_score,742.4894957983194,
5,12000,mmmu_val_mmmu_acc,0.28444,
5,12000,mmstar_average,0.30237252416842486,
5,12000,ocrbench_ocrbench_accuracy,0.391,
5,12000,seedbench_seed_all,0.46197887715397445,
5,12000,textvqa_val_exact_match,0.41414000000000006,0.0067237975855013775
5,13000,ai2d_exact_match,0.27266839378238344,0.008015217564479081
5,13000,average,0.3605408154099655,
5,13000,average_rank,4.9,
5,13000,chartqa_relaxed_overall,0.4796,0.00999367226769808
5,13000,docvqa_val_anls,0.4888368998254502,0.006080092164054846
5,13000,infovqa_val_anls,0.1685412928680358,0.006153102666352037
5,13000,mme_total_score,715.9022609043617,
5,13000,mmmu_val_mmmu_acc,0.27,
5,13000,mmstar_average,0.30550310907874534,
5,13000,ocrbench_ocrbench_accuracy,0.39,
5,13000,seedbench_seed_all,0.46375764313507506,
5,13000,textvqa_val_exact_match,0.40596000000000004,0.006708225975557757
5,14000,ai2d_exact_match,0.27266839378238344,0.008015217564479094
5,14000,average,0.35876061642606916,
5,14000,average_rank,4.9,
5,14000,chartqa_relaxed_overall,0.4832,0.009996353076494045
5,14000,docvqa_val_anls,0.4686745608937551,0.005954780465596843
5,14000,infovqa_val_anls,0.16026985404926572,0.00587737555538511
5,14000,mme_total_score,694.7702080832332,
5,14000,mmmu_val_mmmu_acc,0.27778,
5,14000,mmstar_average,0.3065739842454048,
5,14000,ocrbench_ocrbench_accuracy,0.388,
5,14000,seedbench_seed_all,0.46575875486381324,
5,14000,textvqa_val_exact_match,0.40592,0.006717590038338499
5,15000,ai2d_exact_match,0.26295336787564766,0.007923526907377253
5,15000,average,0.3594508046372947,
5,15000,average_rank,4.9,
5,15000,chartqa_relaxed_overall,0.4904,0.010000156861514821
5,15000,docvqa_val_anls,0.47702085294845603,0.006014469495902542
5,15000,infovqa_val_anls,0.1709556715444569,0.006117350998294382
5,15000,mme_total_score,748.1163465386154,
5,15000,mmmu_val_mmmu_acc,0.25667,
5,15000,mmstar_average,0.2990729469212882,
5,15000,ocrbench_ocrbench_accuracy,0.404,
5,15000,seedbench_seed_all,0.46392440244580324,
5,15000,textvqa_val_exact_match,0.4100599999999999,0.0067243737790625615
5,16000,ai2d_exact_match,0.28950777202072536,0.00816284339533906
5,16000,average,0.3652803192394071,
5,16000,average_rank,4.9,
5,16000,chartqa_relaxed_overall,0.5004,0.010001997399559365
5,16000,docvqa_val_anls,0.4789319968433556,0.005936381904079473
5,16000,infovqa_val_anls,0.16818261112655605,0.006062058685336811
5,16000,mme_total_score,703.8838535414166,
5,16000,mmmu_val_mmmu_acc,0.28111,
5,16000,mmstar_average,0.30021933140749574,
5,16000,ocrbench_ocrbench_accuracy,0.392,
5,16000,seedbench_seed_all,0.4640911617565314,
5,16000,textvqa_val_exact_match,0.41308,0.006723304491442948
5,17000,ai2d_exact_match,0.28335492227979275,0.008110527983566212
5,17000,average,0.36065417779712866,
5,17000,average_rank,5.0,
5,17000,chartqa_relaxed_overall,0.4688,0.009982508912777261
5,17000,docvqa_val_anls,0.4676527518642357,0.00590362287731878
5,17000,infovqa_val_anls,0.16818540516392913,0.00605571000794457
5,17000,mme_total_score,754.0354141656662,
5,17000,mmmu_val_mmmu_acc,0.26222,
5,17000,mmstar_average,0.31626391497403816,
5,17000,ocrbench_ocrbench_accuracy,0.404,
5,17000,seedbench_seed_all,0.46309060589216233,
5,17000,textvqa_val_exact_match,0.41231999999999996,0.006722044383678169
5,18000,ai2d_exact_match,0.2911269430051813,0.00817630569100236
5,18000,average,0.3642489832139911,
5,18000,average_rank,4.9,
5,18000,chartqa_relaxed_overall,0.488,0.009999119609104738
5,18000,docvqa_val_anls,0.4852288069276555,0.006044640681137398
5,18000,infovqa_val_anls,0.1659765406298008,0.006009331694189444
5,18000,mme_total_score,748.4861944777911,
5,18000,mmmu_val_mmmu_acc,0.28111,
5,18000,mmstar_average,0.3014618713149217,
5,18000,ocrbench_ocrbench_accuracy,0.389,
5,18000,seedbench_seed_all,0.4660366870483602,
5,18000,textvqa_val_exact_match,0.4103,0.0067180509406887
5,19000,ai2d_exact_match,0.2817357512953368,0.008096452844781159
5,19000,average,0.35871512802442374,
5,19000,average_rank,4.7,
5,19000,chartqa_relaxed_overall,0.452,0.009955804699716018
5,19000,docvqa_val_anls,0.4693437417424619,0.005945802716190409
5,19000,infovqa_val_anls,0.17352672765291935,0.006108049035774969
5,19000,mme_total_score,757.4390756302521,
5,19000,mmmu_val_mmmu_acc,0.29556,
5,19000,mmstar_average,0.299605929305638,
5,19000,ocrbench_ocrbench_accuracy,0.382,
5,19000,seedbench_seed_all,0.4672040022234575,
5,19000,textvqa_val_exact_match,0.40746,0.006711235192985202
5,20000,ai2d_exact_match,0.28950777202072536,0.008162843395339051
5,20000,average,0.3571101844602158,
5,20000,average_rank,5.0,
5,20000,chartqa_relaxed_overall,0.452,0.009955804699716018
5,20000,docvqa_val_anls,0.4781541164812954,0.006040598891772297
5,20000,infovqa_val_anls,0.16871824680773087,0.00599943702354704
5,20000,mme_total_score,713.3514405762305,
5,20000,mmmu_val_mmmu_acc,0.26667,
5,20000,mmstar_average,0.30644375940695473,
5,20000,ocrbench_ocrbench_accuracy,0.398,
5,20000,seedbench_seed_all,0.4599777654252362,
5,20000,textvqa_val_exact_match,0.39452000000000004,0.006680937127692554