FineVision / app /src /content /assets /data /against_baselines.csv
lusxvr's picture
new plots
a024e38
raw
history blame
53.6 kB
run,step,metric,value,stderr
FineVision,1000,ai2d_exact_match,0.2548575129533679,0.007843322436924496
FineVision,1000,average,0.27120689295763617,
FineVision,1000,average_rank,2.8,
FineVision,1000,chartqa_relaxed_overall,0.3308,0.009411906161401973
FineVision,1000,docvqa_val_anls,0.3528553494243383,0.005852289239342309
FineVision,1000,infovqa_val_anls,0.17320578642581314,0.006297063452679795
FineVision,1000,mme_total_score,977.4280712284914,
FineVision,1000,mmmu_val_mmmu_acc,0.25222,
FineVision,1000,mmstar_average,0.23215874078908072,
FineVision,1000,ocrbench_ocrbench_accuracy,0.286,
FineVision,1000,seedbench_seed_all,0.2563646470261256,
FineVision,1000,textvqa_val_exact_match,0.3024,0.00628900296642181
FineVision,2000,ai2d_exact_match,0.26295336787564766,0.007923526907377255
FineVision,2000,average,0.3202068275596269,
FineVision,2000,average_rank,2.6,
FineVision,2000,chartqa_relaxed_overall,0.4688,0.009982508912777261
FineVision,2000,docvqa_val_anls,0.4452261510942785,0.00614755494712251
FineVision,2000,infovqa_val_anls,0.1820547866557169,0.006217861455795791
FineVision,2000,mme_total_score,1049.3036214485794,
FineVision,2000,mmmu_val_mmmu_acc,0.24556,
FineVision,2000,mmstar_average,0.21305462434540698,
FineVision,2000,ocrbench_ocrbench_accuracy,0.395,
FineVision,2000,seedbench_seed_all,0.258532518065592,
FineVision,2000,textvqa_val_exact_match,0.41068000000000005,0.006697862330024289
FineVision,3000,ai2d_exact_match,0.25226683937823835,0.007816909588794397
FineVision,3000,average,0.3507423834414229,
FineVision,3000,average_rank,2.6,
FineVision,3000,chartqa_relaxed_overall,0.5028,0.010001843767601082
FineVision,3000,docvqa_val_anls,0.502653993831009,0.006267072346683124
FineVision,3000,infovqa_val_anls,0.21728617578189535,0.006796941784959762
FineVision,3000,mme_total_score,1170.2383953581434,
FineVision,3000,mmmu_val_mmmu_acc,0.27556,
FineVision,3000,mmstar_average,0.25432376938577683,
FineVision,3000,ocrbench_ocrbench_accuracy,0.436,
FineVision,3000,seedbench_seed_all,0.2792106725958866,
FineVision,3000,textvqa_val_exact_match,0.43658,0.006766885462882726
FineVision,4000,ai2d_exact_match,0.2645725388601036,0.007939149662089447
FineVision,4000,average,0.36961781722974835,
FineVision,4000,average_rank,2.7,
FineVision,4000,chartqa_relaxed_overall,0.5312,0.009982508912777261
FineVision,4000,docvqa_val_anls,0.5374434618615119,0.0062905728113059655
FineVision,4000,infovqa_val_anls,0.2287924838861707,0.006994568698639919
FineVision,4000,mme_total_score,1155.203781512605,
FineVision,4000,mmmu_val_mmmu_acc,0.25556,
FineVision,4000,mmstar_average,0.2575590188757354,
FineVision,4000,ocrbench_ocrbench_accuracy,0.453,
FineVision,4000,seedbench_seed_all,0.33913285158421347,
FineVision,4000,textvqa_val_exact_match,0.4593,0.006791695475025738
FineVision,5000,ai2d_exact_match,0.3125,0.008342439145556371
FineVision,5000,average,0.3974627910380972,
FineVision,5000,average_rank,2.6,
FineVision,5000,chartqa_relaxed_overall,0.5488,0.00995424828018316
FineVision,5000,docvqa_val_anls,0.552360266782429,0.006300308519952055
FineVision,5000,infovqa_val_anls,0.23425555286643698,0.007002254622066442
FineVision,5000,mme_total_score,1181.4653861544618,
FineVision,5000,mmmu_val_mmmu_acc,0.26667,
FineVision,5000,mmstar_average,0.29596648146165705,
FineVision,5000,ocrbench_ocrbench_accuracy,0.462,
FineVision,5000,seedbench_seed_all,0.43107281823235133,
FineVision,5000,textvqa_val_exact_match,0.47354000000000007,0.0068172185364497985
FineVision,6000,ai2d_exact_match,0.358160621761658,0.008629463221867162
FineVision,6000,average,0.4161227404571003,
FineVision,6000,average_rank,2.1,
FineVision,6000,chartqa_relaxed_overall,0.5628,0.00992279440175477
FineVision,6000,docvqa_val_anls,0.5747451497228876,0.00625495440870239
FineVision,6000,infovqa_val_anls,0.22152017368968838,0.006604546680525351
FineVision,6000,mme_total_score,1284.1648659463785,
FineVision,6000,mmmu_val_mmmu_acc,0.27111,
FineVision,6000,mmstar_average,0.2978489412854164,
FineVision,6000,ocrbench_ocrbench_accuracy,0.495,
FineVision,6000,seedbench_seed_all,0.4795997776542524,
FineVision,6000,textvqa_val_exact_match,0.48432,0.006800535050670284
FineVision,7000,ai2d_exact_match,0.3707901554404145,0.00869347755587734
FineVision,7000,average,0.4291083177345374,
FineVision,7000,average_rank,2.4,
FineVision,7000,chartqa_relaxed_overall,0.5656,0.009915542506251351
FineVision,7000,docvqa_val_anls,0.5940907049431567,0.006224236305767187
FineVision,7000,infovqa_val_anls,0.2515675215816963,0.007105097396092786
FineVision,7000,mme_total_score,1185.875650260104,
FineVision,7000,mmmu_val_mmmu_acc,0.26556,
FineVision,7000,mmstar_average,0.31372400960777047,
FineVision,7000,ocrbench_ocrbench_accuracy,0.504,
FineVision,7000,seedbench_seed_all,0.4964424680377988,
FineVision,7000,textvqa_val_exact_match,0.5002,0.006794794025220267
FineVision,8000,ai2d_exact_match,0.37759067357512954,0.008725299846043883
FineVision,8000,average,0.43846759477995995,
FineVision,8000,average_rank,2.2,
FineVision,8000,chartqa_relaxed_overall,0.5832,0.009862556058385773
FineVision,8000,docvqa_val_anls,0.6017336419437208,0.006231612198089698
FineVision,8000,infovqa_val_anls,0.2449256624147254,0.006992518502948913
FineVision,8000,mme_total_score,1199.2409963985594,
FineVision,8000,mmmu_val_mmmu_acc,0.28111,
FineVision,8000,mmstar_average,0.33512257186205047,
FineVision,8000,ocrbench_ocrbench_accuracy,0.51,
FineVision,8000,seedbench_seed_all,0.5024458032240133,
FineVision,8000,textvqa_val_exact_match,0.51008,0.006796301690135059
FineVision,9000,ai2d_exact_match,0.4067357512953368,0.008841214921078996
FineVision,9000,average,0.4422510732201056,
FineVision,9000,average_rank,2.0,
FineVision,9000,chartqa_relaxed_overall,0.5912,0.009834211136815875
FineVision,9000,docvqa_val_anls,0.6170968481662739,0.00617235763542544
FineVision,9000,infovqa_val_anls,0.23537031288570615,0.00670318154156447
FineVision,9000,mme_total_score,1231.5195078031213,
FineVision,9000,mmmu_val_mmmu_acc,0.25889,
FineVision,9000,mmstar_average,0.3216444898242951,
FineVision,9000,ocrbench_ocrbench_accuracy,0.515,
FineVision,9000,seedbench_seed_all,0.5120622568093385,
FineVision,9000,textvqa_val_exact_match,0.52226,0.006792711289708482
FineVision,10000,ai2d_exact_match,0.39993523316062174,0.008817096257082848
FineVision,10000,average,0.4523875703250908,
FineVision,10000,average_rank,1.7,
FineVision,10000,chartqa_relaxed_overall,0.5996,0.00980154906867574
FineVision,10000,docvqa_val_anls,0.6262613496433054,0.006147756371688175
FineVision,10000,infovqa_val_anls,0.263290074230132,0.007186788766942786
FineVision,10000,mme_total_score,1240.8218287314926,
FineVision,10000,mmmu_val_mmmu_acc,0.28778,
FineVision,10000,mmstar_average,0.32972717906018517,
FineVision,10000,ocrbench_ocrbench_accuracy,0.517,
FineVision,10000,seedbench_seed_all,0.5217342968315731,
FineVision,10000,textvqa_val_exact_match,0.5261600000000001,0.006785774843600811
FineVision,11000,ai2d_exact_match,0.422279792746114,0.008889771831066474
FineVision,11000,average,0.4561398159525099,
FineVision,11000,average_rank,1.7,
FineVision,11000,chartqa_relaxed_overall,0.6104,0.009755142291143075
FineVision,11000,docvqa_val_anls,0.6373130149166712,0.006128022584995044
FineVision,11000,infovqa_val_anls,0.24419378339723755,0.006897644885887063
FineVision,11000,mme_total_score,1322.9488795518205,
FineVision,11000,mmmu_val_mmmu_acc,0.27778,
FineVision,11000,mmstar_average,0.3298563439522548,
FineVision,11000,ocrbench_ocrbench_accuracy,0.521,
FineVision,11000,seedbench_seed_all,0.5237354085603113,
FineVision,11000,textvqa_val_exact_match,0.5387,0.006770851562852138
FineVision,12000,ai2d_exact_match,0.42001295336787564,0.008883255931688034
FineVision,12000,average,0.4582751140055433,
FineVision,12000,average_rank,1.6,
FineVision,12000,chartqa_relaxed_overall,0.618,0.009719474639861454
FineVision,12000,docvqa_val_anls,0.6393961983751871,0.0061228747388476674
FineVision,12000,infovqa_val_anls,0.24798874058574302,0.006855374548993139
FineVision,12000,mme_total_score,1225.6453581432572,
FineVision,12000,mmmu_val_mmmu_acc,0.27889,
FineVision,12000,mmstar_average,0.34010867846816534,
FineVision,12000,ocrbench_ocrbench_accuracy,0.512,
FineVision,12000,seedbench_seed_all,0.5350194552529183,
FineVision,12000,textvqa_val_exact_match,0.5330600000000001,0.006777713092109446
FineVision,13000,ai2d_exact_match,0.4375,0.008928571428571428
FineVision,13000,average,0.4692868662590049,
FineVision,13000,average_rank,1.5,
FineVision,13000,chartqa_relaxed_overall,0.6148,0.00973479791861169
FineVision,13000,docvqa_val_anls,0.6511374872549951,0.006086953065248391
FineVision,13000,infovqa_val_anls,0.24465055100441893,0.006808432538374664
FineVision,13000,mme_total_score,1281.7122849139657,
FineVision,13000,mmmu_val_mmmu_acc,0.28222,
FineVision,13000,mmstar_average,0.3453069542917521,
FineVision,13000,ocrbench_ocrbench_accuracy,0.549,
FineVision,13000,seedbench_seed_all,0.5442468037798777,
FineVision,13000,textvqa_val_exact_match,0.55472,0.0067416788982325
FineVision,14000,ai2d_exact_match,0.4572538860103627,0.00896620675297095
FineVision,14000,average,0.47352486841689195,
FineVision,14000,average_rank,1.4,
FineVision,14000,chartqa_relaxed_overall,0.6172,0.009723347231923635
FineVision,14000,docvqa_val_anls,0.6502269393708169,0.006057950730638126
FineVision,14000,infovqa_val_anls,0.25805460837190913,0.007037735231659539
FineVision,14000,mme_total_score,1309.1444577831132,
FineVision,14000,mmmu_val_mmmu_acc,0.28111,
FineVision,14000,mmstar_average,0.34575818188776586,
FineVision,14000,ocrbench_ocrbench_accuracy,0.551,
FineVision,14000,seedbench_seed_all,0.5483602001111729,
FineVision,14000,textvqa_val_exact_match,0.55276,0.006751206724612103
FineVision,15000,ai2d_exact_match,0.45045336787564766,0.008954861634252399
FineVision,15000,average,0.47878665012878824,
FineVision,15000,average_rank,1.3,
FineVision,15000,chartqa_relaxed_overall,0.612,0.009747841205275417
FineVision,15000,docvqa_val_anls,0.6621413031955148,0.006056838050222495
FineVision,15000,infovqa_val_anls,0.2706898598157733,0.007200315730154543
FineVision,15000,mme_total_score,1384.2171868747498,
FineVision,15000,mmmu_val_mmmu_acc,0.30222,
FineVision,15000,mmstar_average,0.35408135695920684,
FineVision,15000,ocrbench_ocrbench_accuracy,0.558,
FineVision,15000,seedbench_seed_all,0.5411339633129516,
FineVision,15000,textvqa_val_exact_match,0.5583600000000001,0.0067279027203879065
FineVision,16000,ai2d_exact_match,0.45077720207253885,0.008955440137395838
FineVision,16000,average,0.47665128022935843,
FineVision,16000,average_rank,1.5,
FineVision,16000,chartqa_relaxed_overall,0.632,0.00964715642305132
FineVision,16000,docvqa_val_anls,0.6709415729142987,0.005999818105621502
FineVision,16000,infovqa_val_anls,0.26050032542402035,0.006997451875879188
FineVision,16000,mme_total_score,1317.8491396558625,
FineVision,16000,mmmu_val_mmmu_acc,0.27556,
FineVision,16000,mmstar_average,0.33214333327093315,
FineVision,16000,ocrbench_ocrbench_accuracy,0.56,
FineVision,16000,seedbench_seed_all,0.5463590883824346,
FineVision,16000,textvqa_val_exact_match,0.56158,0.006723854754867398
FineVision,17000,ai2d_exact_match,0.45919689119170987,0.008969138793675545
FineVision,17000,average,0.4777141780162423,
FineVision,17000,average_rank,1.3,
FineVision,17000,chartqa_relaxed_overall,0.632,0.00964715642305132
FineVision,17000,docvqa_val_anls,0.6796338519136422,0.005948761388267941
FineVision,17000,infovqa_val_anls,0.28070956072505215,0.007298333094144192
FineVision,17000,mme_total_score,1381.9161664665867,
FineVision,17000,mmmu_val_mmmu_acc,0.27667,
FineVision,17000,mmstar_average,0.3370289492329521,
FineVision,17000,ocrbench_ocrbench_accuracy,0.519,
FineVision,17000,seedbench_seed_all,0.5510283490828238,
FineVision,17000,textvqa_val_exact_match,0.56416,0.006724830373229479
FineVision,18000,ai2d_exact_match,0.46567357512953367,0.008977921602780726
FineVision,18000,average,0.4819834595278701,
FineVision,18000,average_rank,1.2,
FineVision,18000,chartqa_relaxed_overall,0.6376,0.009615793331418735
FineVision,18000,docvqa_val_anls,0.6775884603912571,0.005972234236435759
FineVision,18000,infovqa_val_anls,0.27154318420389256,0.007164903131667027
FineVision,18000,mme_total_score,1336.922769107643,
FineVision,18000,mmmu_val_mmmu_acc,0.28667,
FineVision,18000,mmstar_average,0.34482796716566916,
FineVision,18000,ocrbench_ocrbench_accuracy,0.533,
FineVision,18000,seedbench_seed_all,0.5543079488604781,
FineVision,18000,textvqa_val_exact_match,0.5666399999999999,0.006713392287599574
FineVision,19000,ai2d_exact_match,0.4682642487046632,0.008981008686994101
FineVision,19000,average,0.4899006713916878,
FineVision,19000,average_rank,1.2,
FineVision,19000,chartqa_relaxed_overall,0.6444,0.009575809858898698
FineVision,19000,docvqa_val_anls,0.678226526479947,0.005970619221588814
FineVision,19000,infovqa_val_anls,0.26993847247278,0.0071348470764911525
FineVision,19000,mme_total_score,1406.6628651460583,
FineVision,19000,mmmu_val_mmmu_acc,0.28333,
FineVision,19000,mmstar_average,0.356220913822775,
FineVision,19000,ocrbench_ocrbench_accuracy,0.577,
FineVision,19000,seedbench_seed_all,0.554585881045025,
FineVision,19000,textvqa_val_exact_match,0.57714,0.0066918487914812905
FineVision,20000,ai2d_exact_match,0.47571243523316065,0.00898853090258662
FineVision,20000,average,0.4873169067639118,
FineVision,20000,average_rank,1.2,
FineVision,20000,chartqa_relaxed_overall,0.6336,0.009638338810708618
FineVision,20000,docvqa_val_anls,0.6895214454380043,0.005896462073053767
FineVision,20000,infovqa_val_anls,0.2655657550458317,0.007033265532032538
FineVision,20000,mme_total_score,1324.6738695478193,
FineVision,20000,mmmu_val_mmmu_acc,0.30111,
FineVision,20000,mmstar_average,0.33806766134497995,
FineVision,20000,ocrbench_ocrbench_accuracy,0.555,
FineVision,20000,seedbench_seed_all,0.5587548638132296,
FineVision,20000,textvqa_val_exact_match,0.56852,0.006720151338087659
Cauldron,1000,ai2d_exact_match,0.28886010362694303,0.008157423105367313
Cauldron,1000,average,0.29904301214549334,
Cauldron,1000,average_rank,1.9,
Cauldron,1000,chartqa_relaxed_overall,0.1936,0.007903961351247664
Cauldron,1000,docvqa_val_anls,0.32153744261519257,0.005317068996930092
Cauldron,1000,infovqa_val_anls,0.1431990055083018,0.005424936025458022
Cauldron,1000,mme_total_score,1172.0779311724689,
Cauldron,1000,mmmu_val_mmmu_acc,0.27667,
Cauldron,1000,mmstar_average,0.2911329978035828,
Cauldron,1000,ocrbench_ocrbench_accuracy,0.337,
Cauldron,1000,seedbench_seed_all,0.39360755975541967,
Cauldron,1000,textvqa_val_exact_match,0.44578,0.0067711747933144
Cauldron,2000,ai2d_exact_match,0.41871761658031087,0.008879446246519871
Cauldron,2000,average,0.34894207663644056,
Cauldron,2000,average_rank,1.9,
Cauldron,2000,chartqa_relaxed_overall,0.2056,0.00808440468059435
Cauldron,2000,docvqa_val_anls,0.37496112947656884,0.005489559822643159
Cauldron,2000,infovqa_val_anls,0.14667060624395192,0.005473110880489631
Cauldron,2000,mme_total_score,1248.6002400960383,
Cauldron,2000,mmmu_val_mmmu_acc,0.28667,
Cauldron,2000,mmstar_average,0.34478967650439835,
Cauldron,2000,ocrbench_ocrbench_accuracy,0.368,
Cauldron,2000,seedbench_seed_all,0.5013896609227348,
Cauldron,2000,textvqa_val_exact_match,0.49368,0.0068081481840761415
Cauldron,3000,ai2d_exact_match,0.4653497409326425,0.00897751861457722
Cauldron,3000,average,0.3647655686453986,
Cauldron,3000,average_rank,2.4,
Cauldron,3000,chartqa_relaxed_overall,0.2192,0.008275744025504309
Cauldron,3000,docvqa_val_anls,0.3999560247980121,0.005545460541574292
Cauldron,3000,infovqa_val_anls,0.15452276899525894,0.005625373377223539
Cauldron,3000,mme_total_score,1164.4316726690677,
Cauldron,3000,mmmu_val_mmmu_acc,0.27667,
Cauldron,3000,mmstar_average,0.34444117730168444,
Cauldron,3000,ocrbench_ocrbench_accuracy,0.403,
Cauldron,3000,seedbench_seed_all,0.5147304057809894,
Cauldron,3000,textvqa_val_exact_match,0.50502,0.006802809387533405
Cauldron,4000,ai2d_exact_match,0.48121761658031087,0.008992802471886854
Cauldron,4000,average,0.3694904966669109,
Cauldron,4000,average_rank,2.3,
Cauldron,4000,chartqa_relaxed_overall,0.2184,0.008264859294607735
Cauldron,4000,docvqa_val_anls,0.40927640030259055,0.005557758057811595
Cauldron,4000,infovqa_val_anls,0.15259984907145144,0.005629341537638722
Cauldron,4000,mme_total_score,1238.5236094437776,
Cauldron,4000,mmmu_val_mmmu_acc,0.26667,
Cauldron,4000,mmstar_average,0.36056167686607765,
Cauldron,4000,ocrbench_ocrbench_accuracy,0.414,
Cauldron,4000,seedbench_seed_all,0.5240689271817677,
Cauldron,4000,textvqa_val_exact_match,0.49862,0.006804563140709856
Cauldron,5000,ai2d_exact_match,0.48607512953367876,0.008995663534025174
Cauldron,5000,average,0.3715613183242104,
Cauldron,5000,average_rank,2.3,
Cauldron,5000,chartqa_relaxed_overall,0.2236,0.008334806752495259
Cauldron,5000,docvqa_val_anls,0.42332206291362884,0.005573327842684563
Cauldron,5000,infovqa_val_anls,0.15868297927477548,0.005670852175948406
Cauldron,5000,mme_total_score,1159.8522408963586,
Cauldron,5000,mmmu_val_mmmu_acc,0.26889,
Cauldron,5000,mmstar_average,0.360337335219157,
Cauldron,5000,ocrbench_ocrbench_accuracy,0.401,
Cauldron,5000,seedbench_seed_all,0.5198443579766537,
Cauldron,5000,textvqa_val_exact_match,0.5023,0.0068036313744923
Cauldron,6000,ai2d_exact_match,0.5025906735751295,0.008999033321198393
Cauldron,6000,average,0.3678206000506273,
Cauldron,6000,average_rank,2.2,
Cauldron,6000,chartqa_relaxed_overall,0.2228,0.008324168469720259
Cauldron,6000,docvqa_val_anls,0.4147154618557465,0.005557478918091434
Cauldron,6000,infovqa_val_anls,0.14825798330117057,0.005517775162348899
Cauldron,6000,mme_total_score,1182.059923969588,
Cauldron,6000,mmmu_val_mmmu_acc,0.27111,
Cauldron,6000,mmstar_average,0.3484854117958612,
Cauldron,6000,ocrbench_ocrbench_accuracy,0.391,
Cauldron,6000,seedbench_seed_all,0.5185658699277377,
Cauldron,6000,textvqa_val_exact_match,0.49285999999999996,0.0068052528515312825
Cauldron,7000,ai2d_exact_match,0.49838082901554404,0.008999106932714641
Cauldron,7000,average,0.3749288136256422,
Cauldron,7000,average_rank,2.0,
Cauldron,7000,chartqa_relaxed_overall,0.2276,0.00838733777631434
Cauldron,7000,docvqa_val_anls,0.42525461500166023,0.005595478547875609
Cauldron,7000,infovqa_val_anls,0.14305767989732765,0.005444282186253047
Cauldron,7000,mme_total_score,1262.065426170468,
Cauldron,7000,mmmu_val_mmmu_acc,0.29333,
Cauldron,7000,mmstar_average,0.35012603751558075,
Cauldron,7000,ocrbench_ocrbench_accuracy,0.403,
Cauldron,7000,seedbench_seed_all,0.5222901612006671,
Cauldron,7000,textvqa_val_exact_match,0.51132,0.00682164778449453
Cauldron,8000,ai2d_exact_match,0.49028497409326427,0.008997455247470544
Cauldron,8000,average,0.3674367285685282,
Cauldron,8000,average_rank,2.8,
Cauldron,8000,chartqa_relaxed_overall,0.2256,0.008361209238380008
Cauldron,8000,docvqa_val_anls,0.40937518311359955,0.005568234588180622
Cauldron,8000,infovqa_val_anls,0.14953110986986237,0.005518589617885333
Cauldron,8000,mme_total_score,1210.7711084433772,
Cauldron,8000,mmmu_val_mmmu_acc,0.28889,
Cauldron,8000,mmstar_average,0.32742675529850473,
Cauldron,8000,ocrbench_ocrbench_accuracy,0.406,
Cauldron,8000,seedbench_seed_all,0.512562534741523,
Cauldron,8000,textvqa_val_exact_match,0.49726000000000004,0.006823680165585169
Cauldron,9000,ai2d_exact_match,0.49287564766839376,0.008998240543632314
Cauldron,9000,average,0.3635862393983371,
Cauldron,9000,average_rank,3.0,
Cauldron,9000,chartqa_relaxed_overall,0.2264,0.008371693383064148
Cauldron,9000,docvqa_val_anls,0.4019142603693516,0.005557969721056488
Cauldron,9000,infovqa_val_anls,0.15576345355793061,0.005631711679425604
Cauldron,9000,mme_total_score,1161.06112444978,
Cauldron,9000,mmmu_val_mmmu_acc,0.27,
Cauldron,9000,mmstar_average,0.33510800699714055,
Cauldron,9000,ocrbench_ocrbench_accuracy,0.401,
Cauldron,9000,seedbench_seed_all,0.5066147859922179,
Cauldron,9000,textvqa_val_exact_match,0.4825999999999999,0.006824717089570126
Cauldron,10000,ai2d_exact_match,0.4951424870466321,0.008998729431386465
Cauldron,10000,average,0.3613896970671388,
Cauldron,10000,average_rank,3.2,
Cauldron,10000,chartqa_relaxed_overall,0.2276,0.00838733777631434
Cauldron,10000,docvqa_val_anls,0.400968382089468,0.005551850287661274
Cauldron,10000,infovqa_val_anls,0.15155496077062244,0.0055346119867504375
Cauldron,10000,mme_total_score,1230.2276910764306,
Cauldron,10000,mmmu_val_mmmu_acc,0.26,
Cauldron,10000,mmstar_average,0.32908517910608676,
Cauldron,10000,ocrbench_ocrbench_accuracy,0.395,
Cauldron,10000,seedbench_seed_all,0.4972762645914397,
Cauldron,10000,textvqa_val_exact_match,0.49588000000000004,0.006836984276038533
Cauldron,11000,ai2d_exact_match,0.49676165803108807,0.008998965371572357
Cauldron,11000,average,0.36198497174992383,
Cauldron,11000,average_rank,3.0,
Cauldron,11000,chartqa_relaxed_overall,0.2284,0.008397713059747491
Cauldron,11000,docvqa_val_anls,0.4051111426655002,0.0055740680205303966
Cauldron,11000,infovqa_val_anls,0.14954437197310022,0.005537262124650125
Cauldron,11000,mme_total_score,1210.5605242096838,
Cauldron,11000,mmmu_val_mmmu_acc,0.27111,
Cauldron,11000,mmstar_average,0.33316183100069335,
Cauldron,11000,ocrbench_ocrbench_accuracy,0.383,
Cauldron,11000,seedbench_seed_all,0.5043357420789327,
Cauldron,11000,textvqa_val_exact_match,0.48644,0.006834542228525236
Cauldron,12000,ai2d_exact_match,0.5009715025906736,0.008999137132137068
Cauldron,12000,average,0.3661893496614986,
Cauldron,12000,average_rank,3.2,
Cauldron,12000,chartqa_relaxed_overall,0.2332,0.008459061785476934
Cauldron,12000,docvqa_val_anls,0.40826612382074784,0.0055749766883040515
Cauldron,12000,infovqa_val_anls,0.1451043668322714,0.0054346014264420334
Cauldron,12000,mme_total_score,1204.859843937575,
Cauldron,12000,mmmu_val_mmmu_acc,0.29222,
Cauldron,12000,mmstar_average,0.3322773065724958,
Cauldron,12000,ocrbench_ocrbench_accuracy,0.386,
Cauldron,12000,seedbench_seed_all,0.5047248471372985,
Cauldron,12000,textvqa_val_exact_match,0.49294000000000004,0.006824466715369768
Cauldron,13000,ai2d_exact_match,0.4880181347150259,0.00899656981935399
Cauldron,13000,average,0.3609903418270159,
Cauldron,13000,average_rank,3.2,
Cauldron,13000,chartqa_relaxed_overall,0.23,0.008418334000200726
Cauldron,13000,docvqa_val_anls,0.39428463826041577,0.005550710740937849
Cauldron,13000,infovqa_val_anls,0.15077272156398794,0.005555043265840396
Cauldron,13000,mme_total_score,1199.0380152060825,
Cauldron,13000,mmmu_val_mmmu_acc,0.27667,
Cauldron,13000,mmstar_average,0.3323119954668039,
Cauldron,13000,ocrbench_ocrbench_accuracy,0.39,
Cauldron,13000,seedbench_seed_all,0.5000555864369094,
Cauldron,13000,textvqa_val_exact_match,0.4868,0.006822203492428118
Cauldron,14000,ai2d_exact_match,0.49060880829015546,0.00899756662777987
Cauldron,14000,average,0.36202481121184005,
Cauldron,14000,average_rank,2.9,
Cauldron,14000,chartqa_relaxed_overall,0.2264,0.008371693383064148
Cauldron,14000,docvqa_val_anls,0.40917044569115923,0.0055666808292464285
Cauldron,14000,infovqa_val_anls,0.1424839907142797,0.0054301311838352165
Cauldron,14000,mme_total_score,1183.6356542617045,
Cauldron,14000,mmmu_val_mmmu_acc,0.29,
Cauldron,14000,mmstar_average,0.31528335804531843,
Cauldron,14000,ocrbench_ocrbench_accuracy,0.393,
Cauldron,14000,seedbench_seed_all,0.5020566981656476,
Cauldron,14000,textvqa_val_exact_match,0.48922,0.006837726904596613
Cauldron,15000,ai2d_exact_match,0.4896373056994819,0.008997221155546275
Cauldron,15000,average,0.3560155869130515,
Cauldron,15000,average_rank,3.2,
Cauldron,15000,chartqa_relaxed_overall,0.2264,0.008371693383064148
Cauldron,15000,docvqa_val_anls,0.39997251595677663,0.0055655493795707745
Cauldron,15000,infovqa_val_anls,0.13834600428667498,0.005423970029609658
Cauldron,15000,mme_total_score,1171.8512404961984,
Cauldron,15000,mmmu_val_mmmu_acc,0.27667,
Cauldron,15000,mmstar_average,0.31369390041016126,
Cauldron,15000,ocrbench_ocrbench_accuracy,0.385,
Cauldron,15000,seedbench_seed_all,0.5010005558643691,
Cauldron,15000,textvqa_val_exact_match,0.47342,0.006818885551175648
Cauldron,16000,ai2d_exact_match,0.4838082901554404,0.008994434238637765
Cauldron,16000,average,0.3566345947908368,
Cauldron,16000,average_rank,3.4,
Cauldron,16000,chartqa_relaxed_overall,0.22,0.008286583553358689
Cauldron,16000,docvqa_val_anls,0.40446794741098796,0.005565712054024941
Cauldron,16000,infovqa_val_anls,0.1414810779340465,0.005414255001486301
Cauldron,16000,mme_total_score,1163.921468587435,
Cauldron,16000,mmmu_val_mmmu_acc,0.26444,
Cauldron,16000,mmstar_average,0.3211159497904861,
Cauldron,16000,ocrbench_ocrbench_accuracy,0.392,
Cauldron,16000,seedbench_seed_all,0.5045580878265703,
Cauldron,16000,textvqa_val_exact_match,0.47784,0.0068411071493878735
Cauldron,17000,ai2d_exact_match,0.4795984455958549,0.008991659681159872
Cauldron,17000,average,0.35664663136828295,
Cauldron,17000,average_rank,3.3,
Cauldron,17000,chartqa_relaxed_overall,0.2232,0.008329493152795851
Cauldron,17000,docvqa_val_anls,0.39683521379075226,0.0055483771434975925
Cauldron,17000,infovqa_val_anls,0.14519383287788715,0.005493162839439223
Cauldron,17000,mme_total_score,1216.2439975990396,
Cauldron,17000,mmmu_val_mmmu_acc,0.27667,
Cauldron,17000,mmstar_average,0.3294722845469949,
Cauldron,17000,ocrbench_ocrbench_accuracy,0.386,
Cauldron,17000,seedbench_seed_all,0.4938299055030573,
Cauldron,17000,textvqa_val_exact_match,0.47902,0.006822615153700749
Cauldron,18000,ai2d_exact_match,0.48575129533678757,0.008995499260034972
Cauldron,18000,average,0.3559572601168983,
Cauldron,18000,average_rank,3.3,
Cauldron,18000,chartqa_relaxed_overall,0.22,0.008286583553358689
Cauldron,18000,docvqa_val_anls,0.39553075414155453,0.005560094600545488
Cauldron,18000,infovqa_val_anls,0.1441200977793978,0.005482620397489444
Cauldron,18000,mme_total_score,1146.935774309724,
Cauldron,18000,mmmu_val_mmmu_acc,0.28333,
Cauldron,18000,mmstar_average,0.31718334943636844,
Cauldron,18000,ocrbench_ocrbench_accuracy,0.393,
Cauldron,18000,seedbench_seed_all,0.49571984435797667,
Cauldron,18000,textvqa_val_exact_match,0.46897999999999995,0.006834829544251984
Cauldron,19000,ai2d_exact_match,0.47506476683937826,0.00898795641911507
Cauldron,19000,average,0.35389113555756785,
Cauldron,19000,average_rank,3.4,
Cauldron,19000,chartqa_relaxed_overall,0.2196,0.008281169428700436
Cauldron,19000,docvqa_val_anls,0.3927677091095705,0.005557918115613283
Cauldron,19000,infovqa_val_anls,0.14242963523056748,0.005420426599891758
Cauldron,19000,mme_total_score,1156.7713085234095,
Cauldron,19000,mmmu_val_mmmu_acc,0.26667,
Cauldron,19000,mmstar_average,0.3300183589775604,
Cauldron,19000,ocrbench_ocrbench_accuracy,0.393,
Cauldron,19000,seedbench_seed_all,0.4895497498610339,
Cauldron,19000,textvqa_val_exact_match,0.47591999999999995,0.0068329619195279245
Cauldron,20000,ai2d_exact_match,0.48218911917098445,0.008993442748995703
Cauldron,20000,average,0.35315414152261965,
Cauldron,20000,average_rank,3.1,
Cauldron,20000,chartqa_relaxed_overall,0.2228,0.008324168469720259
Cauldron,20000,docvqa_val_anls,0.3995019956467228,0.005554102577571356
Cauldron,20000,infovqa_val_anls,0.13561089161386572,0.005312619238987202
Cauldron,20000,mme_total_score,1205.715886354542,
Cauldron,20000,mmmu_val_mmmu_acc,0.27667,
Cauldron,20000,mmstar_average,0.3019064734976851,
Cauldron,20000,ocrbench_ocrbench_accuracy,0.392,
Cauldron,20000,seedbench_seed_all,0.49182879377431904,
Cauldron,20000,textvqa_val_exact_match,0.4758799999999999,0.0068345144112400185
Cambrian,1000,ai2d_exact_match,0.2969559585492228,0.00822373246069825
Cambrian,1000,average,0.2927820669039429,
Cambrian,1000,average_rank,2.3,
Cambrian,1000,chartqa_relaxed_overall,0.3652,0.009631650506356148
Cambrian,1000,docvqa_val_anls,0.3321611875422322,0.005779917542014128
Cambrian,1000,infovqa_val_anls,0.14245417507906105,0.005737797137238206
Cambrian,1000,mme_total_score,1199.468087234894,
Cambrian,1000,mmmu_val_mmmu_acc,0.24556,
Cambrian,1000,mmstar_average,0.25503356223234036,
Cambrian,1000,ocrbench_ocrbench_accuracy,0.257,
Cambrian,1000,seedbench_seed_all,0.3486937187326292,
Cambrian,1000,textvqa_val_exact_match,0.39198,0.0066503820519040295
Cambrian,2000,ai2d_exact_match,0.36204663212435234,0.008649846657326264
Cambrian,2000,average,0.34977426052091565,
Cambrian,2000,average_rank,2.3,
Cambrian,2000,chartqa_relaxed_overall,0.4272,0.009895414680177737
Cambrian,2000,docvqa_val_anls,0.4044005302893221,0.006099745172446295
Cambrian,2000,infovqa_val_anls,0.16067123444748188,0.005906486800204124
Cambrian,2000,mme_total_score,1191.6502601040415,
Cambrian,2000,mmmu_val_mmmu_acc,0.27,
Cambrian,2000,mmstar_average,0.3140124492167455,
Cambrian,2000,ocrbench_ocrbench_accuracy,0.293,
Cambrian,2000,seedbench_seed_all,0.4954974986103391,
Cambrian,2000,textvqa_val_exact_match,0.42113999999999996,0.006720777771268006
Cambrian,3000,ai2d_exact_match,0.3954015544041451,0.008800034697838395
Cambrian,3000,average,0.36894910100121225,
Cambrian,3000,average_rank,1.9,
Cambrian,3000,chartqa_relaxed_overall,0.4512,0.00995424828018316
Cambrian,3000,docvqa_val_anls,0.4317442116227413,0.006203480507897517
Cambrian,3000,infovqa_val_anls,0.17555075927653038,0.006227695613801885
Cambrian,3000,mme_total_score,1311.187975190076,
Cambrian,3000,mmmu_val_mmmu_acc,0.28222,
Cambrian,3000,mmstar_average,0.3241666733128301,
Cambrian,3000,ocrbench_ocrbench_accuracy,0.289,
Cambrian,3000,seedbench_seed_all,0.5216787103946637,
Cambrian,3000,textvqa_val_exact_match,0.4495799999999999,0.006762330259763156
Cambrian,4000,ai2d_exact_match,0.3960492227979275,0.00880252039912977
Cambrian,4000,average,0.38270567946732525,
Cambrian,4000,average_rank,2.2,
Cambrian,4000,chartqa_relaxed_overall,0.4764,0.009990852959439592
Cambrian,4000,docvqa_val_anls,0.46350742276594625,0.006276498296530657
Cambrian,4000,infovqa_val_anls,0.17819320935276328,0.006230849386066924
Cambrian,4000,mme_total_score,1239.0667266906762,
Cambrian,4000,mmmu_val_mmmu_acc,0.26778,
Cambrian,4000,mmstar_average,0.3298927333298682,
Cambrian,4000,ocrbench_ocrbench_accuracy,0.334,
Cambrian,4000,seedbench_seed_all,0.5273485269594219,
Cambrian,4000,textvqa_val_exact_match,0.47118000000000004,0.0067854764061200295
Cambrian,5000,ai2d_exact_match,0.40382124352331605,0.00883109414387431
Cambrian,5000,average,0.3896927239658996,
Cambrian,5000,average_rank,2.2,
Cambrian,5000,chartqa_relaxed_overall,0.4912,0.01000045137036546
Cambrian,5000,docvqa_val_anls,0.47067674424138894,0.006257580396259991
Cambrian,5000,infovqa_val_anls,0.19432385292037085,0.00653326869729313
Cambrian,5000,mme_total_score,1214.843337334934,
Cambrian,5000,mmmu_val_mmmu_acc,0.26556,
Cambrian,5000,mmstar_average,0.3255942091936794,
Cambrian,5000,ocrbench_ocrbench_accuracy,0.348,
Cambrian,5000,seedbench_seed_all,0.5292384658143413,
Cambrian,5000,textvqa_val_exact_match,0.47881999999999997,0.0067962283116337965
Cambrian,6000,ai2d_exact_match,0.4183937823834197,0.00887848400426025
Cambrian,6000,average,0.39990121640985093,
Cambrian,6000,average_rank,2.4,
Cambrian,6000,chartqa_relaxed_overall,0.5048,0.010001539697392967
Cambrian,6000,docvqa_val_anls,0.5016482570925722,0.006248476976439708
Cambrian,6000,infovqa_val_anls,0.19206925076752404,0.006399951499514914
Cambrian,6000,mme_total_score,1176.5368147258905,
Cambrian,6000,mmmu_val_mmmu_acc,0.26667,
Cambrian,6000,mmstar_average,0.33910121942401966,
Cambrian,6000,ocrbench_ocrbench_accuracy,0.349,
Cambrian,6000,seedbench_seed_all,0.5391884380211228,
Cambrian,6000,textvqa_val_exact_match,0.48823999999999995,0.006792935247288521
Cambrian,7000,ai2d_exact_match,0.4326424870466321,0.008917121282993509
Cambrian,7000,average,0.40874111160527243,
Cambrian,7000,average_rank,2.2,
Cambrian,7000,chartqa_relaxed_overall,0.5088,0.01000045137036546
Cambrian,7000,docvqa_val_anls,0.5036441729071615,0.006331057466984081
Cambrian,7000,infovqa_val_anls,0.21047690542452482,0.0067248622097179815
Cambrian,7000,mme_total_score,1226.7814125650261,
Cambrian,7000,mmmu_val_mmmu_acc,0.29,
Cambrian,7000,mmstar_average,0.338458434622219,
Cambrian,7000,ocrbench_ocrbench_accuracy,0.366,
Cambrian,7000,seedbench_seed_all,0.5344080044469149,
Cambrian,7000,textvqa_val_exact_match,0.49423999999999996,0.006789004536492761
Cambrian,8000,ai2d_exact_match,0.4375,0.008928571428571428
Cambrian,8000,average,0.4145399236017655,
Cambrian,8000,average_rank,2.2,
Cambrian,8000,chartqa_relaxed_overall,0.5312,0.009982508912777261
Cambrian,8000,docvqa_val_anls,0.5139425879433994,0.006316907313170543
Cambrian,8000,infovqa_val_anls,0.20402472511542052,0.00665285157736885
Cambrian,8000,mme_total_score,1243.7800120048018,
Cambrian,8000,mmmu_val_mmmu_acc,0.28222,
Cambrian,8000,mmstar_average,0.3300028831814166,
Cambrian,8000,ocrbench_ocrbench_accuracy,0.397,
Cambrian,8000,seedbench_seed_all,0.5364091161756531,
Cambrian,8000,textvqa_val_exact_match,0.49855999999999995,0.006793174127235705
Cambrian,9000,ai2d_exact_match,0.4251943005181347,0.008897867521411106
Cambrian,9000,average,0.41587431550154147,
Cambrian,9000,average_rank,2.0,
Cambrian,9000,chartqa_relaxed_overall,0.5316,0.009982005418395102
Cambrian,9000,docvqa_val_anls,0.524278096798472,0.006327817979288962
Cambrian,9000,infovqa_val_anls,0.2075069347958689,0.006574086714467312
Cambrian,9000,mme_total_score,1196.0997398959585,
Cambrian,9000,mmmu_val_mmmu_acc,0.28556,
Cambrian,9000,mmstar_average,0.33833745626187595,
Cambrian,9000,ocrbench_ocrbench_accuracy,0.381,
Cambrian,9000,seedbench_seed_all,0.5456920511395219,
Cambrian,9000,textvqa_val_exact_match,0.5036999999999999,0.006790970877355565
Cambrian,10000,ai2d_exact_match,0.44559585492227977,0.008945723914357835
Cambrian,10000,average,0.41659534392300923,
Cambrian,10000,average_rank,2.0,
Cambrian,10000,chartqa_relaxed_overall,0.5416,0.00996732235888869
Cambrian,10000,docvqa_val_anls,0.5215772912722147,0.006314944464077694
Cambrian,10000,infovqa_val_anls,0.18925972424188112,0.006302599390246784
Cambrian,10000,mme_total_score,1241.6579631852742,
Cambrian,10000,mmmu_val_mmmu_acc,0.27889,
Cambrian,10000,mmstar_average,0.34495128935097424,
Cambrian,10000,ocrbench_ocrbench_accuracy,0.373,
Cambrian,10000,seedbench_seed_all,0.5510839355197332,
Cambrian,10000,textvqa_val_exact_match,0.5034000000000001,0.0067932111363852585
Cambrian,11000,ai2d_exact_match,0.4481865284974093,0.008950704796242765
Cambrian,11000,average,0.42096531591252645,
Cambrian,11000,average_rank,2.0,
Cambrian,11000,chartqa_relaxed_overall,0.5388,0.0099718403035556
Cambrian,11000,docvqa_val_anls,0.5266496382012209,0.006315639724937912
Cambrian,11000,infovqa_val_anls,0.210453542763111,0.006757501751011823
Cambrian,11000,mme_total_score,1288.1182472989194,
Cambrian,11000,mmmu_val_mmmu_acc,0.28556,
Cambrian,11000,mmstar_average,0.33813173019346515,
Cambrian,11000,ocrbench_ocrbench_accuracy,0.372,
Cambrian,11000,seedbench_seed_all,0.547526403557532,
Cambrian,11000,textvqa_val_exact_match,0.5213800000000001,0.00677771101429669
Cambrian,12000,ai2d_exact_match,0.4566062176165803,0.008965198879336198
Cambrian,12000,average,0.42647137409223257,
Cambrian,12000,average_rank,2.1,
Cambrian,12000,chartqa_relaxed_overall,0.5488,0.00995424828018316
Cambrian,12000,docvqa_val_anls,0.5432685128640529,0.006286968775744768
Cambrian,12000,infovqa_val_anls,0.214068867667478,0.006728697021311144
Cambrian,12000,mme_total_score,1272.0885354141656,
Cambrian,12000,mmmu_val_mmmu_acc,0.27556,
Cambrian,12000,mmstar_average,0.3364706975313428,
Cambrian,12000,ocrbench_ocrbench_accuracy,0.396,
Cambrian,12000,seedbench_seed_all,0.5505280711506393,
Cambrian,12000,textvqa_val_exact_match,0.51694,0.00676817323313926
Cambrian,13000,ai2d_exact_match,0.44591968911917096,0.008946359966425538
Cambrian,13000,average,0.42595033048849396,
Cambrian,13000,average_rank,2.1,
Cambrian,13000,chartqa_relaxed_overall,0.5484,0.009955029736109216
Cambrian,13000,docvqa_val_anls,0.5438384263330651,0.006322105329987294
Cambrian,13000,infovqa_val_anls,0.2206834922799479,0.006931006985711701
Cambrian,13000,mme_total_score,1294.3567426970787,
Cambrian,13000,mmmu_val_mmmu_acc,0.27889,
Cambrian,13000,mmstar_average,0.3258043460972802,
Cambrian,13000,ocrbench_ocrbench_accuracy,0.404,
Cambrian,13000,seedbench_seed_all,0.5466370205669816,
Cambrian,13000,textvqa_val_exact_match,0.5193800000000001,0.006779976160381913
Cambrian,14000,ai2d_exact_match,0.452720207253886,0.00895883074213608
Cambrian,14000,average,0.4290628718702856,
Cambrian,14000,average_rank,2.2,
Cambrian,14000,chartqa_relaxed_overall,0.5624,0.009923804147377265
Cambrian,14000,docvqa_val_anls,0.5501582985035621,0.006289139790552158
Cambrian,14000,infovqa_val_anls,0.2108586833777777,0.006694603397438603
Cambrian,14000,mme_total_score,1258.3851540616247,
Cambrian,14000,mmmu_val_mmmu_acc,0.28444,
Cambrian,14000,mmstar_average,0.3392338272359765,
Cambrian,14000,ocrbench_ocrbench_accuracy,0.391,
Cambrian,14000,seedbench_seed_all,0.5506948304613675,
Cambrian,14000,textvqa_val_exact_match,0.5200600000000001,0.006762031077483937
Cambrian,15000,ai2d_exact_match,0.4575777202072539,0.008966704964444827
Cambrian,15000,average,0.4277300448618869,
Cambrian,15000,average_rank,2.2,
Cambrian,15000,chartqa_relaxed_overall,0.5572,0.009936335154498413
Cambrian,15000,docvqa_val_anls,0.550106577844955,0.006305789516584643
Cambrian,15000,infovqa_val_anls,0.2065365477570411,0.006585265308234506
Cambrian,15000,mme_total_score,1191.499399759904,
Cambrian,15000,mmmu_val_mmmu_acc,0.27667,
Cambrian,15000,mmstar_average,0.3287834934674655,
Cambrian,15000,ocrbench_ocrbench_accuracy,0.403,
Cambrian,15000,seedbench_seed_all,0.5489160644802669,
Cambrian,15000,textvqa_val_exact_match,0.52078,0.006761241098810132
Cambrian,16000,ai2d_exact_match,0.45174870466321243,0.008957152666985158
Cambrian,16000,average,0.4283932783055524,
Cambrian,16000,average_rank,2.0,
Cambrian,16000,chartqa_relaxed_overall,0.566,0.00991448025705367
Cambrian,16000,docvqa_val_anls,0.5507111549470696,0.006298722691255348
Cambrian,16000,infovqa_val_anls,0.21185403234992514,0.0065982885956266755
Cambrian,16000,mme_total_score,1242.7407963185274,
Cambrian,16000,mmmu_val_mmmu_acc,0.28111,
Cambrian,16000,mmstar_average,0.32560559611383355,
Cambrian,16000,ocrbench_ocrbench_accuracy,0.394,
Cambrian,16000,seedbench_seed_all,0.5540300166759311,
Cambrian,16000,textvqa_val_exact_match,0.5204799999999999,0.006783488561456611
Cambrian,17000,ai2d_exact_match,0.4585492227979275,0.008968176705111413
Cambrian,17000,average,0.43044446070382536,
Cambrian,17000,average_rank,2.4,
Cambrian,17000,chartqa_relaxed_overall,0.5656,0.009915542506251351
Cambrian,17000,docvqa_val_anls,0.5528747665552118,0.006300095973166064
Cambrian,17000,infovqa_val_anls,0.20960594545383252,0.0066643358201217045
Cambrian,17000,mme_total_score,1292.4750900360143,
Cambrian,17000,mmmu_val_mmmu_acc,0.27111,
Cambrian,17000,mmstar_average,0.3297184661133375,
Cambrian,17000,ocrbench_ocrbench_accuracy,0.409,
Cambrian,17000,seedbench_seed_all,0.555141745414119,
Cambrian,17000,textvqa_val_exact_match,0.5224,0.006774129151791618
Cambrian,18000,ai2d_exact_match,0.4523963730569948,0.008958275210820045
Cambrian,18000,average,0.43086034100304976,
Cambrian,18000,average_rank,2.4,
Cambrian,18000,chartqa_relaxed_overall,0.566,0.00991448025705367
Cambrian,18000,docvqa_val_anls,0.5527950768923724,0.006311862091164367
Cambrian,18000,infovqa_val_anls,0.21943552260393814,0.006848865968629337
Cambrian,18000,mme_total_score,1271.4629851940776,
Cambrian,18000,mmmu_val_mmmu_acc,0.28333,
Cambrian,18000,mmstar_average,0.3399009269355101,
Cambrian,18000,ocrbench_ocrbench_accuracy,0.403,
Cambrian,18000,seedbench_seed_all,0.5493051695386326,
Cambrian,18000,textvqa_val_exact_match,0.5115799999999999,0.0067870754820260944
Cambrian,19000,ai2d_exact_match,0.45012953367875647,0.008954279299902583
Cambrian,19000,average,0.43057935657557483,
Cambrian,19000,average_rank,2.2,
Cambrian,19000,chartqa_relaxed_overall,0.5704,0.009902361269085337
Cambrian,19000,docvqa_val_anls,0.5526262050544066,0.006310038331338026
Cambrian,19000,infovqa_val_anls,0.21937034023427093,0.006858602078113178
Cambrian,19000,mme_total_score,1269.9476790716285,
Cambrian,19000,mmmu_val_mmmu_acc,0.28556,
Cambrian,19000,mmstar_average,0.3314266960826673,
Cambrian,19000,ocrbench_ocrbench_accuracy,0.404,
Cambrian,19000,seedbench_seed_all,0.5465814341300722,
Cambrian,19000,textvqa_val_exact_match,0.51512,0.006773909823053313
Cambrian,20000,ai2d_exact_match,0.45531088082901555,0.008963137311190377
Cambrian,20000,average,0.42817340693945505,
Cambrian,20000,average_rank,2.4,
Cambrian,20000,chartqa_relaxed_overall,0.5684,0.009907968668564455
Cambrian,20000,docvqa_val_anls,0.549188563518089,0.006325944032596611
Cambrian,20000,infovqa_val_anls,0.21755406764942647,0.0068363256354831885
Cambrian,20000,mme_total_score,1290.6296518607442,
Cambrian,20000,mmmu_val_mmmu_acc,0.28444,
Cambrian,20000,mmstar_average,0.32485343172593534,
Cambrian,20000,ocrbench_ocrbench_accuracy,0.392,
Cambrian,20000,seedbench_seed_all,0.5486937187326293,
Cambrian,20000,textvqa_val_exact_match,0.51312,0.006789609184524225
LLaVa,1000,ai2d_exact_match,0.25777202072538863,0.007872600874396432
LLaVa,1000,average,0.2581360512843851,
LLaVa,1000,average_rank,3.0,
LLaVa,1000,chartqa_relaxed_overall,0.1576,0.007288768514542319
LLaVa,1000,docvqa_val_anls,0.2850280465017524,0.005237571860745478
LLaVa,1000,infovqa_val_anls,0.15291302898150733,0.005597827181699182
LLaVa,1000,mme_total_score,844.0894357743098,
LLaVa,1000,mmmu_val_mmmu_acc,0.25333,
LLaVa,1000,mmstar_average,0.22969486173769915,
LLaVa,1000,ocrbench_ocrbench_accuracy,0.35,
LLaVa,1000,seedbench_seed_all,0.2717065036131184,
LLaVa,1000,textvqa_val_exact_match,0.36518,0.006561838543046682
LLaVa,2000,ai2d_exact_match,0.24676165803108807,0.007759553547248649
LLaVa,2000,average,0.28023175511348764,
LLaVa,2000,average_rank,3.2,
LLaVa,2000,chartqa_relaxed_overall,0.19,0.007847587772910948
LLaVa,2000,docvqa_val_anls,0.31839133336930814,0.005353711170722305
LLaVa,2000,infovqa_val_anls,0.1625232406439703,0.005680709103352321
LLaVa,2000,mme_total_score,677.0834333733493,
LLaVa,2000,mmmu_val_mmmu_acc,0.25111,
LLaVa,2000,mmstar_average,0.2602226545829147,
LLaVa,2000,ocrbench_ocrbench_accuracy,0.389,
LLaVa,2000,seedbench_seed_all,0.2864369093941078,
LLaVa,2000,textvqa_val_exact_match,0.41764000000000007,0.006695635323587844
LLaVa,3000,ai2d_exact_match,0.31541450777202074,0.00836346730591157
LLaVa,3000,average,0.3241247472461608,
LLaVa,3000,average_rank,3.1,
LLaVa,3000,chartqa_relaxed_overall,0.2048,0.008072722684486087
LLaVa,3000,docvqa_val_anls,0.33927313841893186,0.005424261898744584
LLaVa,3000,infovqa_val_anls,0.17400826017663457,0.005878416771815313
LLaVa,3000,mme_total_score,674.5895358143258,
LLaVa,3000,mmmu_val_mmmu_acc,0.27778,
LLaVa,3000,mmstar_average,0.28839612401739867,
LLaVa,3000,ocrbench_ocrbench_accuracy,0.428,
LLaVa,3000,seedbench_seed_all,0.4512506948304614,
LLaVa,3000,textvqa_val_exact_match,0.4382,0.006743326070219196
LLaVa,4000,ai2d_exact_match,0.30667098445595853,0.008299228398743067
LLaVa,4000,average,0.34151562451124173,
LLaVa,4000,average_rank,2.8,
LLaVa,4000,chartqa_relaxed_overall,0.2168,0.00824295350666284
LLaVa,4000,docvqa_val_anls,0.36894439928615425,0.005583877165382837
LLaVa,4000,infovqa_val_anls,0.1815741433661475,0.005975096001960774
LLaVa,4000,mme_total_score,660.3387354941976,
LLaVa,4000,mmmu_val_mmmu_acc,0.29444,
LLaVa,4000,mmstar_average,0.3089940618086463,
LLaVa,4000,ocrbench_ocrbench_accuracy,0.439,
LLaVa,4000,seedbench_seed_all,0.48265703168426904,
LLaVa,4000,textvqa_val_exact_match,0.4745599999999999,0.006778004835488831
LLaVa,5000,ai2d_exact_match,0.3176813471502591,0.00837955903737489
LLaVa,5000,average,0.3488971740226244,
LLaVa,5000,average_rank,2.9,
LLaVa,5000,chartqa_relaxed_overall,0.2076,0.008113397986710395
LLaVa,5000,docvqa_val_anls,0.37667351380566144,0.005504553709162657
LLaVa,5000,infovqa_val_anls,0.19157302816202296,0.006066754825254386
LLaVa,5000,mme_total_score,596.045218087235,
LLaVa,5000,mmmu_val_mmmu_acc,0.28889,
LLaVa,5000,mmstar_average,0.30911460927022283,
LLaVa,5000,ocrbench_ocrbench_accuracy,0.471,
LLaVa,5000,seedbench_seed_all,0.49972206781545303,
LLaVa,5000,textvqa_val_exact_match,0.47781999999999997,0.00678922884027701
LLaVa,6000,ai2d_exact_match,0.3626943005181347,0.00865318426683941
LLaVa,6000,average,0.35336013036474917,
LLaVa,6000,average_rank,3.3,
LLaVa,6000,chartqa_relaxed_overall,0.2164,0.00823744852629073
LLaVa,6000,docvqa_val_anls,0.3796381971300078,0.005512363416378596
LLaVa,6000,infovqa_val_anls,0.1911083172357537,0.00606756561226675
LLaVa,6000,mme_total_score,751.7179871948779,
LLaVa,6000,mmmu_val_mmmu_acc,0.27111,
LLaVa,6000,mmstar_average,0.3230226430014031,
LLaVa,6000,ocrbench_ocrbench_accuracy,0.471,
LLaVa,6000,seedbench_seed_all,0.49788771539744303,
LLaVa,6000,textvqa_val_exact_match,0.46738,0.006777431212101451
LLaVa,7000,ai2d_exact_match,0.3636658031088083,0.008658158841882565
LLaVa,7000,average,0.36232264653787655,
LLaVa,7000,average_rank,3.4,
LLaVa,7000,chartqa_relaxed_overall,0.2276,0.00838733777631434
LLaVa,7000,docvqa_val_anls,0.38862032747814834,0.005554025202613156
LLaVa,7000,infovqa_val_anls,0.1987523491607365,0.006169459873730798
LLaVa,7000,mme_total_score,700.0341136454582,
LLaVa,7000,mmmu_val_mmmu_acc,0.28,
LLaVa,7000,mmstar_average,0.32238002502982693,
LLaVa,7000,ocrbench_ocrbench_accuracy,0.469,
LLaVa,7000,seedbench_seed_all,0.5175653140633686,
LLaVa,7000,textvqa_val_exact_match,0.49332,0.006784414578741135
LLaVa,8000,ai2d_exact_match,0.38244818652849744,0.008746910624026853
LLaVa,8000,average,0.36916094621046264,
LLaVa,8000,average_rank,2.8,
LLaVa,8000,chartqa_relaxed_overall,0.2276,0.00838733777631434
LLaVa,8000,docvqa_val_anls,0.4000384036155175,0.005647492303754258
LLaVa,8000,infovqa_val_anls,0.20267340215584623,0.006186451136703468
LLaVa,8000,mme_total_score,787.0998399359744,
LLaVa,8000,mmmu_val_mmmu_acc,0.28333,
LLaVa,8000,mmstar_average,0.33877512170436386,
LLaVa,8000,ocrbench_ocrbench_accuracy,0.47,
LLaVa,8000,seedbench_seed_all,0.5221234018899389,
LLaVa,8000,textvqa_val_exact_match,0.49546,0.006796875545678079
LLaVa,9000,ai2d_exact_match,0.3856865284974093,0.008760803506529557
LLaVa,9000,average,0.3660729124456708,
LLaVa,9000,average_rank,3.0,
LLaVa,9000,chartqa_relaxed_overall,0.2212,0.00830275847651416
LLaVa,9000,docvqa_val_anls,0.3961556104365206,0.005555787005997977
LLaVa,9000,infovqa_val_anls,0.20795411138332273,0.006302696156883479
LLaVa,9000,mme_total_score,697.6510604241697,
LLaVa,9000,mmmu_val_mmmu_acc,0.27444,
LLaVa,9000,mmstar_average,0.33019217959261743,
LLaVa,9000,ocrbench_ocrbench_accuracy,0.47,
LLaVa,9000,seedbench_seed_all,0.5140077821011673,
LLaVa,9000,textvqa_val_exact_match,0.49501999999999996,0.006795224421237829
LLaVa,10000,ai2d_exact_match,0.3636658031088083,0.008658158841882561
LLaVa,10000,average,0.36465272894871764,
LLaVa,10000,average_rank,3.1,
LLaVa,10000,chartqa_relaxed_overall,0.2216,0.008308127706914342
LLaVa,10000,docvqa_val_anls,0.3905169927438113,0.005559588309122447
LLaVa,10000,infovqa_val_anls,0.210842797817216,0.0062742161273205005
LLaVa,10000,mme_total_score,710.1757703081232,
LLaVa,10000,mmmu_val_mmmu_acc,0.25667,
LLaVa,10000,mmstar_average,0.33485115141559363,
LLaVa,10000,ocrbench_ocrbench_accuracy,0.484,
LLaVa,10000,seedbench_seed_all,0.5220678154530295,
LLaVa,10000,textvqa_val_exact_match,0.49766000000000005,0.0067820722630208075
LLaVa,11000,ai2d_exact_match,0.3539507772020725,0.008606685322379343
LLaVa,11000,average,0.3619647158138698,
LLaVa,11000,average_rank,3.3,
LLaVa,11000,chartqa_relaxed_overall,0.226,0.008366456779283321
LLaVa,11000,docvqa_val_anls,0.39615321520069524,0.0055548098783566
LLaVa,11000,infovqa_val_anls,0.20231707967850712,0.006189706400735626
LLaVa,11000,mme_total_score,620.8629451780713,
LLaVa,11000,mmmu_val_mmmu_acc,0.26778,
LLaVa,11000,mmstar_average,0.3504522318333254,
LLaVa,11000,ocrbench_ocrbench_accuracy,0.48,
LLaVa,11000,seedbench_seed_all,0.5084491384102279,
LLaVa,11000,textvqa_val_exact_match,0.47257999999999994,0.0067942373414689025
LLaVa,12000,ai2d_exact_match,0.3963730569948187,0.008803757198545707
LLaVa,12000,average,0.36835635606525785,
LLaVa,12000,average_rank,3.1,
LLaVa,12000,chartqa_relaxed_overall,0.234,0.008469137530835504
LLaVa,12000,docvqa_val_anls,0.3998087503562603,0.005606788206948343
LLaVa,12000,infovqa_val_anls,0.19486992137918643,0.006137557366661157
LLaVa,12000,mme_total_score,707.7871148459384,
LLaVa,12000,mmmu_val_mmmu_acc,0.26444,
LLaVa,12000,mmstar_average,0.34510216846405867,
LLaVa,12000,ocrbench_ocrbench_accuracy,0.466,
LLaVa,12000,seedbench_seed_all,0.5159533073929962,
LLaVa,12000,textvqa_val_exact_match,0.49866000000000005,0.006787787245571138
LLaVa,13000,ai2d_exact_match,0.37661917098445596,0.008720866089740391
LLaVa,13000,average,0.3660925061677603,
LLaVa,13000,average_rank,3.2,
LLaVa,13000,chartqa_relaxed_overall,0.23,0.008418334000200726
LLaVa,13000,docvqa_val_anls,0.39678037656395876,0.005562201990102385
LLaVa,13000,infovqa_val_anls,0.20007389352596994,0.006181717086032354
LLaVa,13000,mme_total_score,762.4510804321728,
LLaVa,13000,mmmu_val_mmmu_acc,0.26111,
LLaVa,13000,mmstar_average,0.3487764851969923,
LLaVa,13000,ocrbench_ocrbench_accuracy,0.487,
LLaVa,13000,seedbench_seed_all,0.5187326292384659,
LLaVa,13000,textvqa_val_exact_match,0.47573999999999994,0.006786037174972445
LLaVa,14000,ai2d_exact_match,0.40382124352331605,0.008831094143874325
LLaVa,14000,average,0.3665520961603681,
LLaVa,14000,average_rank,3.5,
LLaVa,14000,chartqa_relaxed_overall,0.224,0.0083401092900026
LLaVa,14000,docvqa_val_anls,0.39653795108545226,0.0055480083540036754
LLaVa,14000,infovqa_val_anls,0.1966338205713239,0.006145830112184984
LLaVa,14000,mme_total_score,648.8810524209684,
LLaVa,14000,mmmu_val_mmmu_acc,0.27222,
LLaVa,14000,mmstar_average,0.3348780070169728,
LLaVa,14000,ocrbench_ocrbench_accuracy,0.482,
LLaVa,14000,seedbench_seed_all,0.5121178432462479,
LLaVa,14000,textvqa_val_exact_match,0.47676,0.006784540255411228
LLaVa,15000,ai2d_exact_match,0.38374352331606215,0.008752516998880439
LLaVa,15000,average,0.3656314014070533,
LLaVa,15000,average_rank,3.3,
LLaVa,15000,chartqa_relaxed_overall,0.222,0.008313485768211027
LLaVa,15000,docvqa_val_anls,0.3956148602850384,0.005571289516040145
LLaVa,15000,infovqa_val_anls,0.2003939669503818,0.006205919365204143
LLaVa,15000,mme_total_score,744.8995598239295,
LLaVa,15000,mmmu_val_mmmu_acc,0.25111,
LLaVa,15000,mmstar_average,0.34431451447442113,
LLaVa,15000,ocrbench_ocrbench_accuracy,0.491,
LLaVa,15000,seedbench_seed_all,0.5223457476375765,
LLaVa,15000,textvqa_val_exact_match,0.48016000000000003,0.006780152577471598
LLaVa,16000,ai2d_exact_match,0.38244818652849744,0.008746910624026851
LLaVa,16000,average,0.3664952284054124,
LLaVa,16000,average_rank,3.1,
LLaVa,16000,chartqa_relaxed_overall,0.2272,0.008382133861209024
LLaVa,16000,docvqa_val_anls,0.3971604594021061,0.005596507964441207
LLaVa,16000,infovqa_val_anls,0.20130541865614268,0.006177273754737603
LLaVa,16000,mme_total_score,741.5084033613446,
LLaVa,16000,mmmu_val_mmmu_acc,0.25444,
LLaVa,16000,mmstar_average,0.34322789378570057,
LLaVa,16000,ocrbench_ocrbench_accuracy,0.488,
LLaVa,16000,seedbench_seed_all,0.5151750972762645,
LLaVa,16000,textvqa_val_exact_match,0.4895,0.0067890182024819105
LLaVa,17000,ai2d_exact_match,0.36852331606217614,0.008682460781863906
LLaVa,17000,average,0.3659850040618015,
LLaVa,17000,average_rank,3.0,
LLaVa,17000,chartqa_relaxed_overall,0.2264,0.008371693383064148
LLaVa,17000,docvqa_val_anls,0.3895535425900796,0.005559420230793686
LLaVa,17000,infovqa_val_anls,0.19870913061640477,0.0061833458200064835
LLaVa,17000,mme_total_score,738.0654261704681,
LLaVa,17000,mmmu_val_mmmu_acc,0.27667,
LLaVa,17000,mmstar_average,0.3488362957589257,
LLaVa,17000,ocrbench_ocrbench_accuracy,0.486,
LLaVa,17000,seedbench_seed_all,0.514952751528627,
LLaVa,17000,textvqa_val_exact_match,0.48422,0.006797929147037179
LLaVa,18000,ai2d_exact_match,0.3785621761658031,0.008729696327646351
LLaVa,18000,average,0.3667559662544118,
LLaVa,18000,average_rank,3.1,
LLaVa,18000,chartqa_relaxed_overall,0.2268,0.008376919070233621
LLaVa,18000,docvqa_val_anls,0.39054490192374947,0.005557124380968682
LLaVa,18000,infovqa_val_anls,0.19983100041999644,0.006171606410532323
LLaVa,18000,mme_total_score,746.5269107643057,
LLaVa,18000,mmmu_val_mmmu_acc,0.27,
LLaVa,18000,mmstar_average,0.3522401814266279,
LLaVa,18000,ocrbench_ocrbench_accuracy,0.497,
LLaVa,18000,seedbench_seed_all,0.5137854363535297,
LLaVa,18000,textvqa_val_exact_match,0.47203999999999996,0.006793178720998519
LLaVa,19000,ai2d_exact_match,0.3707901554404145,0.008693477555877339
LLaVa,19000,average,0.3627892845719615,
LLaVa,19000,average_rank,3.2,
LLaVa,19000,chartqa_relaxed_overall,0.2284,0.008397713059747491
LLaVa,19000,docvqa_val_anls,0.3886627325813464,0.005572189741680524
LLaVa,19000,infovqa_val_anls,0.18766806187395813,0.006047287494792444
LLaVa,19000,mme_total_score,735.0644257703082,
LLaVa,19000,mmmu_val_mmmu_acc,0.27556,
LLaVa,19000,mmstar_average,0.34617955399790473,
LLaVa,19000,ocrbench_ocrbench_accuracy,0.487,
LLaVa,19000,seedbench_seed_all,0.50550305725403,
LLaVa,19000,textvqa_val_exact_match,0.47534,0.00678734045691651
LLaVa,20000,ai2d_exact_match,0.3746761658031088,0.008711886524907501
LLaVa,20000,average,0.3636232406961286,
LLaVa,20000,average_rank,3.3,
LLaVa,20000,chartqa_relaxed_overall,0.2224,0.00831883268198588
LLaVa,20000,docvqa_val_anls,0.3865323770909091,0.005551659686181904
LLaVa,20000,infovqa_val_anls,0.1967140503390298,0.006138459642690392
LLaVa,20000,mme_total_score,688.5517206882753,
LLaVa,20000,mmmu_val_mmmu_acc,0.27556,
LLaVa,20000,mmstar_average,0.3525069399025931,
LLaVa,20000,ocrbench_ocrbench_accuracy,0.494,
LLaVa,20000,seedbench_seed_all,0.5113396331295164,
LLaVa,20000,textvqa_val_exact_match,0.45888,0.006775175991953595